BitMagic-C++
bmstrsparsevec.h
Go to the documentation of this file.
1#ifndef BMSTRSPARSEVEC__H__INCLUDED__
2#define BMSTRSPARSEVEC__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmstrsparsevec.h
22 \brief string sparse vector based on bit-transposed matrix
23*/
24
25#include <stddef.h>
26#include "bmconst.h"
27
28#ifndef BM_NO_STL
29#include <stdexcept>
30#endif
31
32#ifndef BM__H__INCLUDED__
33// BitMagic utility headers do not include main "bm.h" declaration
34// #include "bm.h" or "bm64.h" explicitly
35# error missing include (bm.h or bm64.h)
36#endif
37
38#include "bmtrans.h"
39#include "bmalgo.h"
40#include "bmbuffer.h"
41#include "bmbmatrix.h"
42#include "bmdef.h"
43
44namespace bm
45{
46
47/*!
48 \brief sparse vector for strings with compression using bit transposition method
49
50 Initial string is bit-transposed into bit-planes so collection may use less
51 memory due to prefix sum (GAP) compression in bit-plains.
52
53 @ingroup sv
54*/
55template<typename CharType, typename BV, unsigned MAX_STR_SIZE>
56class str_sparse_vector : public base_sparse_vector<CharType, BV, MAX_STR_SIZE>
57{
58public:
59 typedef BV bvector_type;
62 typedef CharType value_type;
64 typedef typename BV::allocator_type allocator_type;
67 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
70
71 /*! Statistical information about memory allocation details. */
72 struct statistics : public bv_statistics
73 {};
74
76 {
77 sv_octet_plains = MAX_STR_SIZE
78 };
79
80 typedef
81 bm::heap_matrix<unsigned char,
82 MAX_STR_SIZE, // ROWS
83 256, // COLS
86
87 struct is_remap_support { enum trait { value = true }; };
88 struct is_rsc_support { enum trait { value = false }; };
89
90 /**
91 Reference class to access elements via common [] operator
92 @ingroup sv
93 */
95 {
96 public:
99 : str_sv_(str_sv), idx_(idx)
100 {}
101
102 operator const value_type*() const BMNOEXCEPT
103 {
104 str_sv_.get(idx_, buf_, MAX_STR_SIZE);
105 return &(buf_[0]);
106 }
107
108 bool operator==(const const_reference& ref) const BMNOEXCEPT
109 { return bool(*this) == bool(ref); }
110 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
111 private:
113 size_type idx_;
114 mutable CharType buf_[MAX_STR_SIZE];
115 };
116
117 /**
118 Reference class to access elements via common [] operator
119 @ingroup sv
120 */
122 {
123 public:
126 : str_sv_(str_sv), idx_(idx)
127 {}
128
129 operator const value_type*() const BMNOEXCEPT
130 {
131 str_sv_.get(idx_, buf_, MAX_STR_SIZE);
132 return &(buf_[0]);
133 }
134
136 {
137 // TO DO: implement element copy bit by bit
138 str_sv_.set(idx_, (const value_type*)ref);
139 return *this;
140 }
141
143 {
144 str_sv_.set(idx_, str);
145 return *this;
146 }
147 bool operator==(const reference& ref) const BMNOEXCEPT
148 { return bool(*this) == bool(ref); }
149 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
150 private:
152 size_type idx_;
153 mutable CharType buf_[MAX_STR_SIZE];
154 };
155
156 /**
157 Const iterator to do quick traverse of the sparse vector.
158
159 Implementation uses buffer for decoding so, competing changes
160 to the original vector may not match the iterator returned values.
161
162 This iterator keeps an operational buffer of transposed elements,
163 so memory footprint is not negligable.
164
165 @ingroup sv
166 */
168 {
169 public:
170 friend class str_sparse_vector;
171#ifndef BM_NO_STL
172 typedef std::input_iterator_tag iterator_category;
173#endif
180 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
181
182 typedef long long difference_type;
183 typedef CharType* pointer;
184 typedef CharType*& reference;
185 public:
190
191 bool operator==(const const_iterator& it) const BMNOEXCEPT
192 { return (pos_ == it.pos_) && (sv_ == it.sv_); }
194 { return ! operator==(it); }
196 { return pos_ < it.pos_; }
198 { return pos_ <= it.pos_; }
200 { return pos_ > it.pos_; }
202 { return pos_ >= it.pos_; }
203
204 /// \brief Get current position (value)
205 const value_type* operator*() const BMNOEXCEPT { return this->value(); }
206
207 /// \brief Advance to the next available value
209 { this->advance(); return *this; }
210
211 /// \brief Advance to the next available value
213 { const_iterator tmp(*this);this->advance(); return tmp; }
214
215
216 /// \brief Get current position (value)
217 const value_type* value() const BMNOEXCEPT;
218
219 /// \brief Get NULL status
220 bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
221
222 /// Returns true if iterator is at a valid position
223 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
224
225 /// Invalidate current iterator
227
228 /// Current position (index) in the vector
229 size_type pos() const BMNOEXCEPT { return pos_; }
230
231 /// re-position to a specified position
233
234 /// advance iterator forward by one
235 void advance() BMNOEXCEPT;
236
237 protected:
238 typedef bm::heap_matrix<CharType,
239 1024, // ROWS: number of strings in one batch
240 MAX_STR_SIZE, // COLS
242
243 private:
244 const str_sparse_vector_type* sv_; ///!< ptr to parent
245 mutable size_type pos_; ///!< Position
246 mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
247 mutable size_type pos_in_buf_; ///!< buffer position
248 };
249
250
251 /**
252 Back insert iterator implements buffered insert, faster than generic
253 access assignment.
254
255 Limitations for buffered inserter:
256 1. Do not use more than one inserter (into one vector) at the same time
257 2. Use method flush() at the end to send the rest of accumulated buffer
258 flush is happening automatically on destruction, but if flush produces an
259 exception (for whatever reason) it will be an exception in destructor.
260 As such, explicit flush() is safer way to finilize the sparse vector load.
261
262 @ingroup sv
263 */
265 {
266 public:
267#ifndef BM_NO_STL
268 typedef std::output_iterator_tag iterator_category;
269#endif
276 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
277
278 typedef void difference_type;
279 typedef void pointer;
280 typedef void reference;
281
282 public:
286
288 {
289 BM_ASSERT(bi.empty());
290 this->flush(); sv_ = bi.sv_;
291 return *this;
292 }
293
295
296 /** push value to the vector */
298 { this->add(v); return *this; }
299
300
301 /** push value to the vector */
302 template<typename StrType>
304 {
305 this->add(v.c_str()); return *this; // TODO: avoid c_str()
306 }
307
308 /** noop */
309 back_insert_iterator& operator*() { return *this; }
310 /** noop */
311 back_insert_iterator& operator++() { return *this; }
312 /** noop */
313 back_insert_iterator& operator++( int ) { return *this; }
314
315 /** add value to the container*/
316 void add(const value_type* v);
317
318 /** add NULL (no-value) to the container */
319 void add_null();
320
321 /** add a series of consequitve NULLs (no-value) to the container */
322 void add_null(size_type count);
323
324 /** return true if insertion buffer is empty */
325 bool empty() const BMNOEXCEPT;
326
327 /** flush the accumulated buffer */
328 void flush();
329 protected:
331
332 /** add value to the buffer without changing the NULL vector
333 @param v - value to push back
334 @return index of added value in the internal buffer
335 @internal
336 */
337 size_type add_value(const value_type* v);
338
339 private:
340 enum buf_size_e
341 {
342 n_buf_size = 1024 * 8
343 };
344 typedef bm::heap_matrix<CharType,
345 n_buf_size, // ROWS: number of strings in one batch
346 MAX_STR_SIZE, // COLS
348
349 private:
350 str_sparse_vector_type* sv_; ///!< pointer on the parent vector
351 bvector_type* bv_null_; ///!< not NULL vector pointer
352 buffer_matrix_type buf_matrix_; ///!< value buffer
353 size_type pos_in_buf_; ///!< buffer position
354 block_idx_type prev_nb_; ///!< previous block added
355 };
356
357
358public:
359
360 /*!
361 \brief Sparse vector constructor
362
363 \param null_able - defines if vector supports NULL values flag
364 by default it is OFF, use bm::use_null to enable it
365 \param ap - allocation strategy for underlying bit-vectors
366 Default allocation policy uses BM_BIT setting (fastest access)
367 \param bv_max_size - maximum possible size of underlying bit-vectors
368 Please note, this is NOT size of svector itself, it is dynamic upper limit
369 which should be used very carefully if we surely know the ultimate size
370 \param alloc - allocator for bit-vectors
371
372 \sa bvector<>
373 \sa bm::bvector<>::allocation_policy
374 \sa bm::startegy
375 */
378 size_type bv_max_size = bm::id_max,
379 const allocator_type& alloc = allocator_type());
380
381 /*! copy-ctor */
383
384 /*! copy assignmment operator */
387 {
388 if (this != &str_sv)
390 remap_flags_ = str_sv.remap_flags_;
393 return *this;
394 }
395#ifndef BM_NO_CXX11
396 /*! move-ctor */
398 {
399 parent_type::swap(str_sv);
400 remap_flags_ = str_sv.remap_flags_;
401 remap_matrix1_.swap(str_sv.remap_matrix1_);
402 remap_matrix2_.swap(str_sv.remap_matrix2_);
403 }
404
405 /*! move assignmment operator */
408 {
409 if (this != &str_sv)
410 {
411 this->swap(str_sv);
412 }
413 return *this;
414 }
415#endif
416
417public:
418
419 // ------------------------------------------------------------
420 /*! @name String element access */
421 ///@{
422
423 /** \brief Operator to get write access to an element */
424 reference operator[](size_type idx) { return reference(*this, idx); }
425
426 /** \brief Operator to get read access to an element */
428 { return const_reference(*this, idx); }
429
430 /*!
431 \brief set specified element with bounds checking and automatic resize
432 \param idx - element index (vector auto-resized if needs to)
433 \param str - string to set (zero terminated)
434 */
435 void set(size_type idx, const value_type* str);
436
437 /*!
438 \brief set NULL status for the specified element
439 Vector is resized automatically
440 \param idx - element index (vector auto-resized if needs to)
441 */
442 void set_null(size_type idx);
443
444
445 /*!
446 \brief insert the specified element
447 \param idx - element index (vector auto-resized if needs to)
448 \param str - string to set (zero terminated)
449 */
450 void insert(size_type idx, const value_type* str);
451
452
453 /*!
454 \brief insert STL string
455 \param idx - element index (vector auto-resized if needs to)
456 \param str - STL string to set
457 */
458 template<typename StrType>
459 void insert(size_type idx, const StrType& str)
460 {
461 this->insert(idx, str.c_str()); // TODO: avoid c_str()
462 }
463
464 /*!
465 \brief erase the specified element
466 \param idx - element index
467 */
468 void erase(size_type idx);
469
470 /*!
471 \brief get specified element
472
473 \param idx - element index
474 \param str - string buffer
475 \param buf_size - string buffer size
476
477 @return string length
478 */
480 value_type* str, size_type buf_size) const BMNOEXCEPT;
481
482 /*!
483 \brief set specified element with bounds checking and automatic resize
484
485 This is an equivalent of set() method, but templetized to be
486 more compatible with the STL std::string and the likes
487
488 \param idx - element index (vector auto-resized if needs to)
489 \param str - input string
490 expected an STL class with size() support,
491 like basic_string<> or vector<char>
492 */
493 template<typename StrType>
494 void assign(size_type idx, const StrType& str)
495 {
496 if (idx >= this->size())
497 this->size_ = idx+1;
498
499 size_type str_size = size_type(str.size());
500 size_type sz = size_type((str_size < MAX_STR_SIZE) ? str_size : MAX_STR_SIZE-1);
501 if (!sz)
502 {
503 this->clear_value_plains_from(0, idx);
504 return;
505 }
506 unsigned i = 0;
507 for (; i < sz; ++i)
508 {
509 CharType ch = str[i];
510 if (remap_flags_) // compressional re-mapping is in effect
511 {
512 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
513 BM_ASSERT(remap_value);
514 ch = CharType(remap_value);
515 }
516 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
517 if (!ch)
518 break;
519 } // for i
520 if (idx > sz)
521 return;
522 this->bmatr_.set_octet(idx, sz, 0);
523 this->clear_value_plains_from(unsigned(sz*8+1), idx);
524 }
525
526 /*!
527 \brief push back a string
528 \param str - string to set
529 (STL class with size() support, like basic_string)
530 */
531 template<typename StrType>
532 void push_back(const StrType& str) { assign(this->size_, str); }
533
534 /*!
535 \brief push back a string (zero terminated)
536 \param str - string to set
537 */
538 void push_back(const value_type* str) { set(this->size_, str); }
539
540
541 /*!
542 \brief get specified string element
543 Template method expects an STL-compatible type basic_string<>
544 \param idx - element index (vector auto-resized if needs to)
545 \param str - string to get [out]
546 */
547 template<typename StrType>
548 void get(size_type idx, StrType& str) const
549 {
550 str.clear();
551 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
552 {
553 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
554 if (ch == 0)
555 break;
556 if (remap_flags_)
557 {
558 const unsigned char* remap_row = remap_matrix1_.row(i);
559 unsigned char remap_value = remap_row[unsigned(ch)];
560 BM_ASSERT(remap_value);
561 if (!remap_value) // unknown dictionary element
562 {
564 break;
565 }
566 ch = CharType(remap_value);
567 }
568 str.push_back(ch);
569 } // for i
570 }
571
572 /*! Swap content */
573 void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
574
575 ///@}
576
577 // ------------------------------------------------------------
578 /*! @name Element comparison functions */
579 ///@{
580
581 /**
582 \brief Compare vector element with argument lexicographically
583
584 NOTE: for a re-mapped container, input string may have no correct
585 remapping, in this case we have an ambiguity
586 (we know it is not equal (0) but LT or GT?).
587 Behavior is undefined.
588
589 \param idx - vactor element index
590 \param str - argument to compare with
591
592 \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
593 */
594 int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
595
596
597 /**
598 \brief Find size of common prefix between two vector elements in octets
599 \return size of common prefix
600 */
601 unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
602
603 ///@}
604
605
606 // ------------------------------------------------------------
607 /*! @name Clear */
608 ///@{
609
610 /*! \brief resize to zero, free memory */
611 void clear() BMNOEXCEPT;
612
613 /*!
614 \brief clear range (assign bit 0 for all plains)
615 \param left - interval start
616 \param right - interval end (closed interval)
617 \param set_null - set cleared values to unassigned (NULL)
618 */
619 str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
620 clear_range(size_type left, size_type right, bool set_null = false)
621 {
623 return *this;
624 }
625
626
627 ///@}
628
629
630 // ------------------------------------------------------------
631 /*! @name Size, etc */
632 ///@{
633
634 /*! \brief return size of the vector
635 \return size of sparse vector
636 */
637 size_type size() const { return this->size_; }
638
639 /*! \brief return true if vector is empty
640 \return true if empty
641 */
642 bool empty() const { return (size() == 0); }
643
644 /*! \brief resize vector
645 \param sz - new size
646 */
647 void resize(size_type sz) { parent_type::resize(sz); }
648
649 /*! \brief get maximum string length capacity
650 \return maximum string length sparse vector can take
651 */
652 static size_type max_str() { return sv_octet_plains; }
653
654 /*! \brief get effective string length used in vector
655 Calculate and returns efficiency, how close are we
656 to the reserved maximum.
657 \return current string length maximum
658 */
660
661 /*! \brief get effective string length used in vector
662 \return current string length maximum
663 */
665 ///@}
666
667
668 // ------------------------------------------------------------
669 /*! @name Memory optimization/compression */
670 ///@{
671
672 /*!
673 \brief run memory optimization for all vector plains
674 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
675 \param opt_mode - requested compression depth
676 \param stat - memory allocation statistics after optimization
677 */
678 void optimize(
679 bm::word_t* temp_block = 0,
682
683 /*!
684 @brief Calculates memory statistics.
685
686 Function fills statistics structure containing information about how
687 this vector uses memory and estimation of max. amount of memory
688 bvector needs to serialize itself.
689
690 @param st - pointer on statistics structure to be filled in.
691
692 @sa statistics
693 */
694 void calc_stat(
696 ) const BMNOEXCEPT;
697
698
699 ///@}
700
701 // ------------------------------------------------------------
702 /*! @name Iterator access */
703 //@{
704
705 /** Provide const iterator access to container content */
707
708 /** Provide const iterator access to the end */
710
711 /** Get const_itertor re-positioned to specific element
712 @param idx - position in the sparse vector
713 */
716
717 /** Provide back insert iterator
718 Back insert iterator implements buffered insertion, which is faster, than random access
719 or push_back
720 */
723
724 ///@}
725
726
727
728 // ------------------------------------------------------------
729 /*! @name Various traits */
730 ///@{
731
732 /** \brief trait if sparse vector is "compressed" (false)
733 */
734 static
735 bool is_compressed() BMNOEXCEPT { return false; }
736
737 ///@}
738
739 // ------------------------------------------------------------
740 /*! @name remapping, succinct utilities
741 Remapping implements reduction of dit-depth thus improves
742 search performance. Remapping limits farther modifications
743 of sparse vector.
744 */
745 ///@{
746
747 /**
748 Get remapping status (true|false)
749 */
750 bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
751
752 /**
753 Build remapping profile and load content from another sparse vector
754 \param str_sv - source sparse vector (assumed it is not remapped)
755 */
756 void remap_from(const str_sparse_vector& str_sv);
757
758 /*!
759 Calculate flags which octets are present on each byte-plain.
760 @internal
761 */
762 void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT;
763
764 static
766 plain_octet_matrix_type& octet_remap_matrix1,
767 plain_octet_matrix_type& octet_remap_matrix2,
768 const plain_octet_matrix_type& octet_occupancy_matrix);
769 /*!
770 remap string from external (ASCII) system to matrix internal code
771 @return true if remapping was ok, false if found incorrect value
772 for the plain
773 @internal
774 */
775 static
776 bool remap_tosv(value_type* BMRESTRICT sv_str,
777 size_type buf_size,
778 const value_type* BMRESTRICT str,
779 const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2
780 ) BMNOEXCEPT;
781
782 /*!
783 remap string from external (ASCII) system to matrix internal code
784 @internal
785 */
786 bool remap_tosv(value_type* sv_str,
787 size_type buf_size,
788 const value_type* str) const BMNOEXCEPT
789 {
790 return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
791 }
792 /*!
793 remap string from internal code to external (ASCII) system
794 @return true if remapping was ok, false if found incorrect value
795 for the plain
796 @internal
797 */
798 static
799 bool remap_fromsv(
801 size_type buf_size,
802 const value_type* BMRESTRICT sv_str,
803 const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
804 ) BMNOEXCEPT;
805 /*!
806 re-calculate remap matrix2 based on matrix1
807 @internal
808 */
810
811 ///@}
812
813 // ------------------------------------------------------------
814 /*! @name Export content to C-style */
815 ///@{
816
817 /**
818 \brief Bulk export strings to a C-style matrix of chars
819
820 \param cmatr - dest matrix (bm::heap_matrix)
821 \param idx_from - index in the sparse vector to export from
822 \param dec_size - decoding size (matrix column allocation should match)
823 \param zero_mem - set to false if target array is pre-initialized
824 with 0s to avoid performance penalty
825
826 \return number of actually exported elements (can be less than requested)
827 */
828 template<typename CharMatrix>
829 size_type decode(CharMatrix& cmatr,
830 size_type idx_from, size_type dec_size,
831 bool zero_mem = true) const
832 {
833 BM_ASSERT(cmatr.is_init());
834 if (zero_mem)
835 cmatr.set_zero();
836
837 size_type rows = size_type(cmatr.rows());
838 BM_ASSERT(cmatr.cols() >= MAX_STR_SIZE);
839 size_type max_sz = this->size() - idx_from;
840 if (max_sz < dec_size)
841 dec_size = max_sz;
842 if (rows < dec_size)
843 dec_size = rows;
844 if (!dec_size)
845 return dec_size;
846
847 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
848 {
849 unsigned bi = 0;
850 for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
851 {
852 const bvector_type* bv = this->bmatr_.get_row(k);
853 if (!bv)
854 continue;
855 value_type mask = value_type(1u << bi);
856 typename bvector_type::enumerator en(bv, idx_from);
857 for ( ;en.valid(); ++en )
858 {
859 size_type idx = *en - idx_from;
860 if (idx >= dec_size)
861 break;
862 typename CharMatrix::value_type* str = cmatr.row(idx);
863 str[i] |= mask;
864 } // for en
865 } // for k
866 } // for i
867
868 if (remap_flags_)
869 {
870 for (unsigned i = 0; i < dec_size; ++i)
871 {
872 typename CharMatrix::value_type* str = cmatr.row(i);
873 remap_matrix1_.remapz(str);
874 } // for i
875 }
876 return dec_size;
877 }
878
879 /**
880 \brief Bulk import of strings from a C-style matrix of chars
881
882 \param cmatr - source matrix (bm::heap_matrix)
883 [in/out] parameter gets modified(corrupted)
884 in the process
885 \param idx_from - destination index in the sparse vector
886 \param imp_size - import size (number or rows to import)
887 */
888 template<typename CharMatrix>
889 void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
890 {
891 if (!imp_size)
892 return;
893 if (idx_from < this->size_) // in case it touches existing elements
894 {
895 // clear all plains in the range to provide corrrect import of 0 values
896 this->clear_range(idx_from, idx_from + imp_size - 1);
897 }
898 import_no_check(cmatr, idx_from, imp_size);
899 }
900
901 /**
902 \brief Bulk push-back import of strings from a C-style matrix of chars
903
904 \param cmatr - source matrix (bm::heap_matrix)
905 [in/out] parameter gets modified(corrupted)
906 in the process
907 \param imp_size - import size (number or rows to import)
908 */
909 template<typename CharMatrix>
910 void import_back(CharMatrix& cmatr, size_type imp_size)
911 {
912 if (!imp_size)
913 return;
914 import_no_check(cmatr, this->size(), imp_size);
915 }
916
917
918 ///@}
919
920 // ------------------------------------------------------------
921 /*! @name Merge, split, partition data */
922 ///@{
923
924 /**
925 @brief copy range of values from another sparse vector
926
927 Copy [left..right] values from the source vector,
928 clear everything outside the range.
929
930 \param sv - source vector
931 \param left - index from in losed diapason of [left..right]
932 \param right - index to in losed diapason of [left..right]
933 \param splice_null - "use_null" copy range for NULL vector or
934 do not copy it
935 */
937 size_type left, size_type right,
938 bm::null_support splice_null = bm::use_null);
939
940 ///@}
941
942 // ------------------------------------------------------------
943
944 /*! \brief syncronize internal structures */
945 void sync(bool force);
946
947 /*!
948 \brief check if another sparse vector has the same content and size
949
950 \param sv - sparse vector for comparison
951 \param null_able - flag to consider NULL vector in comparison (default)
952 or compare only value content plains
953
954 \return true, if it is the same
955 */
957 bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
958
959 /**
960 \brief find position of compressed element by its rank
961 */
962 static
964
965 /**
966 \brief size of sparse vector (may be different for RSC)
967 */
969
970protected:
971
972 /// @internal
973 template<typename CharMatrix>
974 void import_no_check(CharMatrix& cmatr,
975 size_type idx_from, size_type imp_size,
976 bool set_not_null = true)
977 {
978 BM_ASSERT (cmatr.is_init());
979
980 unsigned max_str_size = 0;
981 {
982 for (unsigned j = 0; j < imp_size; ++j)
983 {
984 typename CharMatrix::value_type* str = cmatr.row(j);
985 unsigned i;
986 for (i = 0; i < MAX_STR_SIZE; ++i)
987 {
988 value_type ch = str[i];
989 if (!ch)
990 {
991 max_str_size = (i > max_str_size) ? i : max_str_size;
992 break;
993 }
994 if (remap_flags_) // re-mapping is in effect
995 {
996 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
997 BM_ASSERT(remap_value);
998 if (!remap_value) // unknown dictionary element
1000 str[i] = CharType(remap_value);
1001 }
1002 } // for i
1003 if (i == MAX_STR_SIZE)
1004 max_str_size = i;
1005 } // for j
1006 }
1007
1008 size_type bit_list[CharMatrix::n_rows];
1009 for (unsigned i = 0; i < max_str_size; ++i)
1010 {
1011 for (unsigned bi = 0; bi < 8; ++bi)
1012 {
1013 unsigned n_bits = 0;
1014 value_type mask = value_type(1u << bi);
1015 for (size_type j = 0; j < imp_size; ++j)
1016 {
1017 typename CharMatrix::value_type* str = cmatr.row(j);
1018 value_type ch = str[i];
1019 if (!ch)
1020 continue;
1021 if (ch & mask)
1022 {
1023 bit_list[n_bits++] = idx_from + j;
1024 str[i] ^= mask;
1025 }
1026 } // for j
1027 if (n_bits) // set transposed bits to the target plain
1028 {
1029 unsigned plain = i*8 + bi;
1030 bvector_type* bv = this->bmatr_.get_row(plain);
1031 if (!bv)
1032 {
1033 bv = this->bmatr_.construct_row(plain);
1034 bv->init();
1035 }
1036 bv->set(&bit_list[0], n_bits, BM_SORTED);
1037 }
1038 } // for k
1039 } // for i
1040
1041 size_type idx_to = idx_from + imp_size - 1;
1042 if (set_not_null)
1043 {
1044 bvector_type* bv_null = this->get_null_bvect();
1045 if (bv_null)
1046 bv_null->set_range(idx_from, idx_to);
1047 }
1048 if (idx_to >= this->size())
1049 this->size_ = idx_to+1;
1050
1051 }
1052
1053 // ------------------------------------------------------------
1054 /*! @name Errors and exceptions */
1055 ///@{
1056
1057 /**
1058 \brief throw range error
1059 \internal
1060 */
1061 static
1062 void throw_range_error(const char* err_msg);
1063
1064 /**
1065 \brief throw domain error
1066 \internal
1067 */
1068 static
1069 void throw_bad_value(const char* err_msg);
1070
1071 ///@}
1072
1073 /*! \brief set value without checking boundaries */
1074 void set_value(size_type idx, const value_type* str);
1075
1076 /*! \brief set value without checking boundaries or support of NULL */
1077 void set_value_no_null(size_type idx, const value_type* str);
1078
1079 /*! \brief insert value without checking boundaries */
1080 void insert_value(size_type idx, const value_type* str);
1081
1082 /*! \brief insert value without checking boundaries or support of NULL */
1083 void insert_value_no_null(size_type idx, const value_type* str);
1084
1085
1086 size_type size_internal() const { return size(); }
1088
1089 size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
1090 const unsigned char* get_remap_buffer() const
1091 { return remap_matrix1_.get_buffer().buf(); }
1092 unsigned char* init_remap_buffer()
1093 {
1094 remap_matrix1_.init();
1095 return remap_matrix1_.get_buffer().data();
1096 }
1097 void set_remap() { remap_flags_ = 1; }
1098
1099protected:
1100
1102 size_type* idx_from, size_type* idx_to) const
1103 {
1104 *idx_from = from; *idx_to = to; return true;
1105 }
1106
1107protected:
1108 template<class SVect> friend class sparse_vector_serializer;
1109 template<class SVect> friend class sparse_vector_deserializer;
1110
1111protected:
1112 unsigned remap_flags_; ///< remapping status
1113 plain_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1114 plain_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1115};
1116
1117//---------------------------------------------------------------------
1118//---------------------------------------------------------------------
1119
1120
1121template<class CharType, class BV, unsigned MAX_STR_SIZE>
1123 bm::null_support null_able,
1125 size_type bv_max_size,
1126 const allocator_type& alloc)
1127: parent_type(null_able, ap, bv_max_size, alloc),
1128 remap_flags_(0)
1129{}
1130
1131
1132//---------------------------------------------------------------------
1133
1134template<class CharType, class BV, unsigned MAX_STR_SIZE>
1136 const str_sparse_vector& str_sv)
1137: parent_type(str_sv),
1138 remap_flags_(str_sv.remap_flags_),
1139 remap_matrix1_(str_sv.remap_matrix1_),
1140 remap_matrix2_(str_sv.remap_matrix2_)
1141{}
1142
1143//---------------------------------------------------------------------
1144
1145template<class CharType, class BV, unsigned MAX_STR_SIZE>
1148{
1149 parent_type::swap(str_sv);
1150 bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1151 remap_matrix1_.swap(str_sv.remap_matrix1_);
1152 remap_matrix2_.swap(str_sv.remap_matrix2_);
1153}
1154
1155//---------------------------------------------------------------------
1156
1157template<class CharType, class BV, unsigned MAX_STR_SIZE>
1159 size_type idx, const value_type* str)
1160{
1161 if (idx >= this->size())
1162 this->size_ = idx+1;
1163 set_value(idx, str);
1164}
1165
1166//---------------------------------------------------------------------
1167
1168template<class CharType, class BV, unsigned MAX_STR_SIZE>
1170 size_type idx, const value_type* str)
1171{
1172 if (idx >= this->size())
1173 {
1174 this->size_ = idx+1;
1175 set_value(idx, str);
1176 return;
1177 }
1178 insert_value(idx, str);
1179 this->size_++;
1180}
1181
1182//---------------------------------------------------------------------
1183
1184template<class CharType, class BV, unsigned MAX_STR_SIZE>
1186{
1187 BM_ASSERT(idx < this->size_);
1188 if (idx >= this->size_)
1189 return;
1190 this->erase_column(idx);
1191 this->size_--;
1192}
1193
1194//---------------------------------------------------------------------
1195
1196template<class CharType, class BV, unsigned MAX_STR_SIZE>
1198{
1199 bvector_type* bv_null = this->get_null_bvect();
1200 if (bv_null)
1201 bv_null->clear_bit_no_check(idx);
1202 if (idx >= this->size_)
1203 {
1204 this->size_ = idx + 1;
1205 }
1206}
1207
1208//---------------------------------------------------------------------
1209
1210template<class CharType, class BV, unsigned MAX_STR_SIZE>
1212 size_type idx, const value_type* str)
1213{
1214 set_value_no_null(idx, str);
1215 bvector_type* bv_null = this->get_null_bvect();
1216 if (bv_null)
1217 bv_null->set_bit_no_check(idx);
1218}
1219
1220//---------------------------------------------------------------------
1221
1222template<class CharType, class BV, unsigned MAX_STR_SIZE>
1224 size_type idx, const value_type* str)
1225{
1226 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1227 {
1228 CharType ch = str[i];
1229 if (!ch)
1230 {
1231 this->clear_value_plains_from(i*8, idx);
1232 return;
1233 }
1234
1235 if (remap_flags_) // compressional re-mapping is in effect
1236 {
1237 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1238 BM_ASSERT(remap_value);
1239 if (!remap_value) // unknown dictionary element
1240 {
1241 this->clear_value_plains_from(i*8, idx);
1242 return;
1243 }
1244 ch = CharType(remap_value);
1245 }
1246 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1247 } // for i
1248}
1249
1250//---------------------------------------------------------------------
1251
1252template<class CharType, class BV, unsigned MAX_STR_SIZE>
1254 size_type idx, const value_type* str)
1255{
1256 insert_value_no_null(idx, str);
1257 this->insert_null(idx, true);
1258}
1259
1260//---------------------------------------------------------------------
1261
1262template<class CharType, class BV, unsigned MAX_STR_SIZE>
1264 size_type idx, const value_type* str)
1265{
1266 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1267 {
1268 CharType ch = str[i];
1269 if (!ch)
1270 {
1271 this->insert_clear_value_plains_from(i*8, idx);
1272 return;
1273 }
1274
1275 if (remap_flags_) // compressional re-mapping is in effect
1276 {
1277 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1278 BM_ASSERT(remap_value);
1279 if (!remap_value) // unknown dictionary element
1280 {
1281 this->insert_clear_value_plains_from(i*8, idx);
1282 return;
1283 }
1284 ch = CharType(remap_value);
1285 }
1286 this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1287 } // for i
1288}
1289
1290
1291//---------------------------------------------------------------------
1292
1293template<class CharType, class BV, unsigned MAX_STR_SIZE>
1296 size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
1297{
1298 size_type i = 0;
1299 for (; i < MAX_STR_SIZE; ++i)
1300 {
1301 if (i < buf_size)
1302 str[i] = 0;
1303 else
1304 break;
1305 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1306 if (ch == 0)
1307 {
1308 str[i] = 0;
1309 break;
1310 }
1311 str[i] = ch;
1312 } // for i
1313 if (remap_flags_)
1314 {
1315 remap_matrix1_.remap(str, i);
1316 }
1317 return i;
1318}
1319
1320//---------------------------------------------------------------------
1321
1322template<class CharType, class BV, unsigned MAX_STR_SIZE>
1324 bm::word_t* temp_block,
1325 typename bvector_type::optmode opt_mode,
1327{
1328 typename bvector_type::statistics stbv;
1329 parent_type::optimize(temp_block, opt_mode, &stbv);
1330
1331 if (st)
1332 st->add(stbv);
1333}
1334
1335//---------------------------------------------------------------------
1336
1337template<class CharType, class BV, unsigned MAX_STR_SIZE>
1340 ) const BMNOEXCEPT
1341{
1342 BM_ASSERT(st);
1343 typename bvector_type::statistics stbv;
1344 parent_type::calc_stat(&stbv);
1345
1346 st->reset();
1347
1348 st->bit_blocks += stbv.bit_blocks;
1349 st->gap_blocks += stbv.gap_blocks;
1350 st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1351 st->bv_count += stbv.bv_count;
1352 st->max_serialize_mem += stbv.max_serialize_mem + 8;
1353 st->memory_used += stbv.memory_used;
1354 st->gap_cap_overhead += stbv.gap_cap_overhead;
1355
1356 size_t remap_mem_usage = sizeof(remap_flags_);
1357 remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1358 remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1359
1360 st->memory_used += remap_mem_usage;
1361 if (remap_flags_) // use of remapping requires some extra storage
1362 {
1363 st->max_serialize_mem += remap_mem_usage;
1364 }
1365}
1366
1367//---------------------------------------------------------------------
1368
1369template<class CharType, class BV, unsigned MAX_STR_SIZE>
1371 size_type idx,
1372 const value_type* str) const BMNOEXCEPT
1373{
1374 BM_ASSERT(str);
1375 int res = 0;
1376 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1377 {
1378 CharType ch = str[i];
1379 if (remap_flags_ && ch)
1380 {
1381 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1382 if (!remap_value) // unknown dictionary element
1383 {
1384 throw_bad_value(0);
1385 }
1386 ch = CharType(remap_value);
1387 }
1388
1389 res = this->bmatr_.compare_octet(idx, i, ch);
1390 if (res || !ch)
1391 break;
1392 } // for
1393 return res;
1394}
1395
1396//---------------------------------------------------------------------
1397
1398template<class CharType, class BV, unsigned MAX_STR_SIZE>
1400 size_type idx1, size_type idx2) const BMNOEXCEPT
1401{
1402 unsigned i = 0;
1403 for (; i < MAX_STR_SIZE; ++i)
1404 {
1405 CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1406 CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1407 if (!ch1 || !ch2)
1408 {
1409 if (i)
1410 --i;
1411 break;
1412 }
1413 if (ch1 != ch2)
1414 {
1415 break;
1416 }
1417 } // for
1418
1419 return i;
1420}
1421
1422//---------------------------------------------------------------------
1423
1424template<class CharType, class BV, unsigned MAX_STR_SIZE>
1425bool
1427 size_type rank,
1428 size_type& pos) BMNOEXCEPT
1429{
1430 BM_ASSERT(rank);
1431 pos = rank - 1;
1432 return true;
1433}
1434
1435//---------------------------------------------------------------------
1436
1437template<class CharType, class BV, unsigned MAX_STR_SIZE>
1440 const BMNOEXCEPT
1441{
1442 for (int i = MAX_STR_SIZE-1; i >= 0; --i)
1443 {
1444 unsigned octet_plain = unsigned(i) * unsigned(sizeof(CharType)) * 8;
1445 for (unsigned j = 0; j < sizeof(CharType) * 8; ++j)
1446 {
1447 if (this->bmatr_.row(octet_plain+j))
1448 return unsigned(i)+1;
1449 } // for j
1450 } // for i
1451 return 0;
1452}
1453
1454//---------------------------------------------------------------------
1455
1456template<class CharType, class BV, unsigned MAX_STR_SIZE>
1458 plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT
1459{
1460 octet_matrix.init();
1461 octet_matrix.set_zero();
1462
1463 size_type size = this->size();
1464
1465 for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1466 {
1467 unsigned char* row = octet_matrix.row(i);
1468
1469 // TODO: optimize partial transposition
1470 for (size_type j = 0; j < size; ++j)
1471 {
1472 unsigned char ch = this->bmatr_.get_octet(j, i);
1473 unsigned k = ch;
1474 if (k)
1475 row[k] = 1;
1476 } // for j
1477 } // for i
1478}
1479
1480//---------------------------------------------------------------------
1481
1482template<class CharType, class BV, unsigned MAX_STR_SIZE>
1484 plain_octet_matrix_type& octet_remap_matrix1,
1485 plain_octet_matrix_type& octet_remap_matrix2,
1486 const plain_octet_matrix_type& octet_occupancy_matrix)
1487{
1488 octet_remap_matrix1.init();
1489 octet_remap_matrix1.set_zero();
1490 octet_remap_matrix2.init();
1491 octet_remap_matrix2.set_zero();
1492
1493 for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
1494 {
1495 const unsigned char* row = octet_occupancy_matrix.row(i);
1496 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
1497 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
1498 unsigned count = 1;
1499 for (unsigned j = 1; j < octet_occupancy_matrix.cols(); ++j)
1500 {
1501 if (row[j]) // octet is present
1502 {
1503 // set two remapping table values
1504 remap_row1[count] = (unsigned char)j;
1505 remap_row2[j] = (unsigned char)count;
1506 ++count;
1507 BM_ASSERT(count < 256);
1508 }
1509 } // for j
1510 } // for i
1511}
1512
1513//---------------------------------------------------------------------
1514
1515template<class CharType, class BV, unsigned MAX_STR_SIZE>
1517{
1518 BM_ASSERT(remap_flags_);
1519
1520 remap_matrix2_.init();
1521 remap_matrix2_.set_zero();
1522
1523 for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
1524 {
1525 const unsigned char* remap_row1 = remap_matrix1_.row(i);
1526 unsigned char* remap_row2 = remap_matrix2_.row(i);
1527 for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
1528 {
1529 if (remap_row1[j])
1530 {
1531 unsigned count = remap_row1[j];
1532 remap_row2[count] = (unsigned char)j;
1533 BM_ASSERT(count < 256);
1534 }
1535 } // for j
1536 } // for i
1537}
1538
1539//---------------------------------------------------------------------
1540
1541template<class CharType, class BV, unsigned MAX_STR_SIZE>
1543 value_type* BMRESTRICT sv_str,
1544 size_type buf_size,
1545 const value_type* BMRESTRICT str,
1546 const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
1547{
1548 for (unsigned i = 0; i < buf_size; ++i)
1549 {
1550 CharType ch = str[i];
1551 if (ch == 0)
1552 {
1553 sv_str[i] = ch;
1554 break;
1555 }
1556 const unsigned char* remap_row = octet_remap_matrix2.row(i);
1557 unsigned char remap_value = remap_row[unsigned(ch)];
1558 if (!remap_value) // unknown dictionary element
1559 {
1560 return false;
1561 }
1562 sv_str[i] = CharType(remap_value);
1563 } // for i
1564 return true;
1565}
1566
1567//---------------------------------------------------------------------
1568
1569template<class CharType, class BV, unsigned MAX_STR_SIZE>
1572 size_type buf_size,
1573 const value_type* BMRESTRICT sv_str,
1574 const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
1575 ) BMNOEXCEPT
1576{
1577 for (unsigned i = 0; i < buf_size; ++i)
1578 {
1579 CharType ch = sv_str[i];
1580 if (ch == 0)
1581 {
1582 str[i] = ch;
1583 break;
1584 }
1585 const unsigned char* remap_row = octet_remap_matrix1.row(i);
1586 unsigned char remap_value = remap_row[unsigned(ch)];
1587 if (!remap_value) // unknown dictionary element
1588 {
1589 return false;
1590 }
1591 str[i] = CharType(remap_value);
1592 } // for i
1593 return true;
1594}
1595
1596//---------------------------------------------------------------------
1597
1598template<class CharType, class BV, unsigned MAX_STR_SIZE>
1599void
1601{
1602 if (str_sv.is_remap())
1603 {
1604 *this = str_sv;
1605 return;
1606 }
1607 this->clear();
1608 if (str_sv.empty()) // no content to remap
1609 {
1610 return;
1611 }
1612
1613 plain_octet_matrix_type omatrix; // occupancy map
1614 str_sv.calc_octet_stat(omatrix);
1615
1616 str_sv.build_octet_remap(remap_matrix1_, remap_matrix2_, omatrix);
1617 remap_flags_ = 1; // turn ON remapped mode
1618
1619 const unsigned buffer_size = 1024 * 8;
1620
1621 typedef bm::heap_matrix<CharType,
1622 buffer_size, // ROWS: number of strings in one batch
1623 MAX_STR_SIZE, // COLS
1624 allocator_type> remap_buffer_type;
1625
1626 remap_buffer_type cmatr(true);
1627 for (size_type i = 0; true; )
1628 {
1629 size_type dsize = str_sv.decode(cmatr, i, buffer_size, true);
1630 if (!dsize)
1631 break;
1632 this->import(cmatr, i, dsize);
1633 i += dsize;
1634 } // for i
1635}
1636
1637//---------------------------------------------------------------------
1638
1639template<class CharType, class BV, unsigned MAX_STR_SIZE>
1641{
1642 if (remap_flags_)
1643 {
1644 recalc_remap_matrix2();
1645 }
1646}
1647
1648//---------------------------------------------------------------------
1649
1650template<class CharType, class BV, unsigned MAX_STR_SIZE>
1653 bm::null_support null_able) const BMNOEXCEPT
1654{
1655 // at this point both vectors should have the same remap settings
1656 // to be considered "equal".
1657 // Strictly speaking this is incorrect, because re-map and non-remap
1658 // vectors may have the same content
1659
1660 if (remap_flags_ != sv.remap_flags_)
1661 return false;
1662 if (remap_flags_)
1663 {
1664 bool b;
1665 b = remap_matrix1_.get_buffer().equal(sv.remap_matrix1_.get_buffer());
1666 if (!b)
1667 return b;
1668 b = remap_matrix2_.get_buffer().equal(sv.remap_matrix2_.get_buffer());
1669 if (!b)
1670 return b;
1671 }
1672 return parent_type::equal(sv, null_able);
1673}
1674
1675//---------------------------------------------------------------------
1676
1677template<class CharType, class BV, unsigned MAX_STR_SIZE>
1680 size_type left, size_type right,
1681 bm::null_support splice_null)
1682{
1683 if (left > right)
1684 bm::xor_swap(left, right);
1685 this->clear();
1686
1687 remap_flags_ = sv.remap_flags_;
1688 remap_matrix1_ = sv.remap_matrix1_;
1689 remap_matrix2_ = sv.remap_matrix2_;
1690
1691 this->copy_range_plains(sv, left, right, splice_null);
1692 this->resize(sv.size());
1693}
1694
1695
1696//---------------------------------------------------------------------
1697
1698template<class CharType, class BV, unsigned MAX_STR_SIZE>
1701{
1702 typedef typename
1704 return it_type(this);
1705}
1706
1707//---------------------------------------------------------------------
1708
1709template<class CharType, class BV, unsigned MAX_STR_SIZE>
1711{
1712 parent_type::clear();
1713}
1714
1715//---------------------------------------------------------------------
1716
1717template<class CharType, class BV, unsigned MAX_STR_SIZE>
1719 const char* err_msg)
1720{
1721#ifndef BM_NO_STL
1722 throw std::range_error(err_msg);
1723#else
1724 BM_ASSERT_THROW(false, BM_ERR_RANGE);
1725#endif
1726}
1727
1728//---------------------------------------------------------------------
1729
1730template<class CharType, class BV, unsigned MAX_STR_SIZE>
1732 const char* err_msg)
1733{
1734#ifndef BM_NO_STL
1735 if (!err_msg)
1736 err_msg = "Unknown/incomparable dictionary character";
1737 throw std::domain_error(err_msg);
1738#else
1739 BM_ASSERT_THROW(false, BM_BAD_VALUE);
1740#endif
1741}
1742
1743
1744//---------------------------------------------------------------------
1745//
1746//---------------------------------------------------------------------
1747
1748
1749template<class CharType, class BV, unsigned MAX_STR_SIZE>
1753
1754//---------------------------------------------------------------------
1755
1756template<class CharType, class BV, unsigned MAX_STR_SIZE>
1759: sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0))
1760{}
1761
1762//---------------------------------------------------------------------
1763
1764template<class CharType, class BV, unsigned MAX_STR_SIZE>
1769
1770//---------------------------------------------------------------------
1771
1772template<class CharType, class BV, unsigned MAX_STR_SIZE>
1776: sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
1777{}
1778
1779//---------------------------------------------------------------------
1780
1781template<class CharType, class BV, unsigned MAX_STR_SIZE>
1784{
1785 BM_ASSERT(sv_);
1786 BM_ASSERT(this->valid());
1787 if (pos_in_buf_ == ~size_type(0))
1788 {
1789 if (!buf_matrix_.is_init())
1790 buf_matrix_.init();
1791 pos_in_buf_ = 0;
1792 size_type d = sv_->decode(buf_matrix_, pos_, buffer_matrix_type::n_rows);
1793 if (!d)
1794 {
1795 pos_ = bm::id_max;
1796 return 0;
1797 }
1798 }
1799 return buf_matrix_.row(pos_in_buf_);
1800}
1801
1802//---------------------------------------------------------------------
1803
1804template<class CharType, class BV, unsigned MAX_STR_SIZE>
1805void
1808 ) BMNOEXCEPT
1809{
1810 pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
1811 pos_in_buf_ = ~size_type(0);
1812}
1813
1814//---------------------------------------------------------------------
1815
1816template<class CharType, class BV, unsigned MAX_STR_SIZE>
1817void
1819{
1820 if (pos_ == bm::id_max) // nothing to do, we are at the end
1821 return;
1822 ++pos_;
1823
1824 if (pos_ >= sv_->size())
1825 this->invalidate();
1826 else
1827 {
1828 if (pos_in_buf_ != ~size_type(0))
1829 {
1830 ++pos_in_buf_;
1831 if (pos_in_buf_ >= buffer_matrix_type::n_rows)
1832 pos_in_buf_ = ~size_type(0);
1833 }
1834 }
1835}
1836
1837//---------------------------------------------------------------------
1838//
1839//---------------------------------------------------------------------
1840
1841template<class CharType, class BV, unsigned MAX_STR_SIZE>
1845
1846//---------------------------------------------------------------------
1847
1848template<class CharType, class BV, unsigned MAX_STR_SIZE>
1851: sv_(sv), pos_in_buf_(~size_type(0))
1852{
1853 if (sv)
1854 {
1855 prev_nb_ = sv_->size() >> bm::set_block_shift;
1856 bv_null_ = sv_->get_null_bvect();
1857 }
1858 else
1859 {
1860 bv_null_ = 0; prev_nb_ = 0;
1861 }
1862}
1863
1864//---------------------------------------------------------------------
1865
1866template<class CharType, class BV, unsigned MAX_STR_SIZE>
1869: sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_)
1870{
1871 BM_ASSERT(bi.empty());
1872}
1873
1874//---------------------------------------------------------------------
1875
1876template<class CharType, class BV, unsigned MAX_STR_SIZE>
1881
1882//---------------------------------------------------------------------
1883
1884template<class CharType, class BV, unsigned MAX_STR_SIZE>
1885bool
1887 const BMNOEXCEPT
1888{
1889 return (pos_in_buf_ == ~size_type(0) || !sv_);
1890}
1891
1892//---------------------------------------------------------------------
1893
1894template<class CharType, class BV, unsigned MAX_STR_SIZE>
1896{
1897 if (this->empty())
1898 return;
1899
1900 sv_->import_no_check(buf_matrix_, sv_->size(), pos_in_buf_+1, false);
1901 pos_in_buf_ = ~size_type(0);
1902 block_idx_type nb = sv_->size() >> bm::set_block_shift;
1903 if (nb != prev_nb_)
1904 {
1905 // optimize all previous blocks in all planes
1906 sv_->optimize_block(prev_nb_);
1907 prev_nb_ = nb;
1908 }
1909}
1910
1911//---------------------------------------------------------------------
1912
1913template<class CharType, class BV, unsigned MAX_STR_SIZE>
1916{
1917 if (!v)
1918 {
1919 this->add_null();
1920 return;
1921 }
1922 size_type buf_idx = this->add_value(v);
1923 if (bv_null_)
1924 {
1925 size_type sz = sv_->size();
1926 bv_null_->set_bit_no_check(sz + buf_idx);
1927 }
1928}
1929
1930//---------------------------------------------------------------------
1931
1932template<class CharType, class BV, unsigned MAX_STR_SIZE>
1934{
1935 /*size_type buf_idx = */this->add_value("");
1936}
1937
1938//---------------------------------------------------------------------
1939
1940template<class CharType, class BV, unsigned MAX_STR_SIZE>
1943{
1944 for (size_type i = 0; i < count; ++i) // TODO: optimization
1945 this->add_value("");
1946}
1947
1948
1949//---------------------------------------------------------------------
1950
1951template<class CharType, class BV, unsigned MAX_STR_SIZE>
1955{
1956 BM_ASSERT(sv_);
1957 BM_ASSERT(v);
1958 if (pos_in_buf_ == ~size_type(0))
1959 {
1960 if (!buf_matrix_.is_init())
1961 buf_matrix_.init();
1962 pos_in_buf_ = 0;
1963 buf_matrix_.set_zero();
1964 }
1965 else
1966 if (pos_in_buf_ >= buffer_matrix_type::n_rows-1)
1967 {
1968 this->flush();
1969 pos_in_buf_ = 0;
1970 buf_matrix_.set_zero();
1971 }
1972 else
1973 {
1974 ++pos_in_buf_;
1975 }
1976 value_type* r = buf_matrix_.row(pos_in_buf_);
1977 for (unsigned i = 0; i < buffer_matrix_type::n_columns; ++i)
1978 {
1979 r[i] = v[i];
1980 if (!r[i])
1981 break;
1982 } // for i
1983 return pos_in_buf_;
1984}
1985
1986//---------------------------------------------------------------------
1987
1988
1989} // namespace
1990
1991#endif
Algorithms for bvector<> (main include)
basic bit-matrix class and utilities
Constants, tables and typedefs.
Definitions(internal)
#define BMRESTRICT
Definition bmdef.h:193
#define BMNOEXCEPT
Definition bmdef.h:79
#define BM_ASSERT
Definition bmdef.h:130
#define BM_ASSERT_THROW(x, xerrcode)
Definition bmdef.h:401
Utilities for bit transposition (internal) (experimental!)
Base class for bit-transposed sparse vector construction.
Definition bmbmatrix.h:254
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
Definition bmbmatrix.h:1208
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
Definition bmbmatrix.h:1175
void clear_range(size_type left, size_type right, bool set_null)
Definition bmbmatrix.h:1239
bmatrix_type bmatr_
bit-transposed matrix
Definition bmbmatrix.h:495
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
Definition bmbmatrix.h:1285
bvector_type_ptr plain(unsigned i) BMNOEXCEPT
get access to bit-plain as is (can return NULL)
Definition bmbmatrix.h:376
void clear_value_plains_from(unsigned plain_idx, size_type idx)
Definition bmbmatrix.h:1406
Basic dense bit-matrix class.
Definition bmbmatrix.h:54
bvector_type_ptr construct_row(size_type row)
Definition bmbmatrix.h:715
void set_octet(size_type pos, size_type octet_idx, unsigned char octet)
Definition bmbmatrix.h:821
unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
Definition bmbmatrix.h:913
bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT
Definition bmbmatrix.h:570
Constant iterator designed to enumerate "ON" bits.
Definition bm.h:600
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Definition bm.h:280
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition bm.h:130
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:134
bm::id_t size_type
Definition bm.h:117
Alloc allocator_type
Definition bm.h:110
sparse vector de-serializer
Back insert iterator implements buffered insert, faster than generic access assignment.
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
back_insert_iterator & operator=(const StrType &v)
push value to the vector
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
back_insert_iterator & operator++(int)
noop
str_sparse_vector_type::value_type value_type
void add(const value_type *v)
add value to the container
void add_null()
add NULL (no-value) to the container
str_sparse_vector_type * str_sparse_vector_type_ptr
back_insert_iterator & operator=(const value_type *v)
push value to the vector
void flush()
flush the accumulated buffer
bvector_type::block_idx_type block_idx_type
str_sparse_vector_type::size_type size_type
bvector_type::allocator_type allocator_type
allocator_type::allocator_pool_type allocator_pool_type
void add_null(size_type count)
add a series of consequitve NULLs (no-value) to the container
size_type add_value(const value_type *v)
add value to the buffer without changing the NULL vector
back_insert_iterator & operator++()
noop
str_sparse_vector_type::bvector_type bvector_type
Const iterator to do quick traverse of the sparse vector.
std::input_iterator_tag iterator_category
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
const value_type * value() const BMNOEXCEPT
Get current position (value)
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
str_sparse_vector_type::size_type size_type
bool operator>=(const const_iterator &it) const BMNOEXCEPT
void invalidate() BMNOEXCEPT
Invalidate current iterator.
str_sparse_vector_type::value_type value_type
bvector_type::allocator_type allocator_type
bool operator<=(const const_iterator &it) const BMNOEXCEPT
bm::heap_matrix< CharType, 1024, MAX_STR_SIZE, allocator_type > buffer_matrix_type
bool operator!=(const const_iterator &it) const BMNOEXCEPT
allocator_type::allocator_pool_type allocator_pool_type
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
void advance() BMNOEXCEPT
advance iterator forward by one
str_sparse_vector_type::bvector_type bvector_type
bool is_null() const BMNOEXCEPT
Get NULL status.
bool operator<(const const_iterator &it) const BMNOEXCEPT
const value_type * operator*() const BMNOEXCEPT
Get current position (value)
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
str_sparse_vector_type * str_sparse_vector_type_ptr
Reference class to access elements via common [] operator.
bool operator==(const const_reference &ref) const BMNOEXCEPT
const_reference(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXCEPT
Reference class to access elements via common [] operator.
reference(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXCEPT
bool operator==(const reference &ref) const BMNOEXCEPT
reference & operator=(const value_type *str)
bool is_null() const BMNOEXCEPT
reference & operator=(const reference &ref)
sparse vector for strings with compression using bit transposition method
void insert(size_type idx, const StrType &str)
insert STL string
void erase(size_type idx)
erase the specified element
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
size_type effective_vector_max() const
get effective string length used in vector
bool is_remap() const BMNOEXCEPT
Get remapping status (true|false)
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bvector_type::size_type size_type
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector.
base_sparse_vector< CharType, BV, MAX_STR_SIZE > parent_type
bvector_type::enumerator bvector_enumerator_type
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all plains)
size_t remap_size() const
static void throw_range_error(const char *err_msg)
throw range error
BV::allocator_type allocator_type
void resize_internal(size_type sz)
size_type size_internal() const
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv)
void clear() BMNOEXCEPT
resize to zero, free memory
bool equal(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const plain_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
bool empty() const
return true if vector is empty
const_reference operator[](size_type idx) const
Operator to get read access to an element
bvector_type * bvector_type_ptr
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
void calc_stat(struct str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void push_back(const StrType &str)
push back a string
void calc_octet_stat(plain_octet_matrix_type &octet_matrix) const BMNOEXCEPT
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
bm::basic_bmatrix< BV > bmatrix_type
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
unsigned remap_flags_
remapping status
bvector_type::allocation_policy allocation_policy_type
str_sparse_vector(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &&str_sv) BMNOEXCEPT
size_type size() const
return size of the vector
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains
void sync(bool force)
syncronize internal structures
void copy_range(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &sv, size_type left, size_type right, bm::null_support splice_null=bm::use_null)
copy range of values from another sparse vector
void insert(size_type idx, const value_type *str)
insert the specified element
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
void push_back(const value_type *str)
push back a string (zero terminated)
static size_type max_str()
get maximum string length capacity
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
const bvector_type * bvector_type_const_ptr
static bool is_compressed() BMNOEXCEPT
trait if sparse vector is "compressed" (false)
const unsigned char * get_remap_buffer() const
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const plain_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
reference operator[](size_type idx)
Operator to get write access to an element
static void throw_bad_value(const char *err_msg)
throw domain error
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
bm::heap_matrix< unsigned char, MAX_STR_SIZE, 256, typename bvector_type::allocator_type > plain_octet_matrix_type
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
void resize(size_type sz)
resize vector
unsigned char * init_remap_buffer()
plain_octet_matrix_type remap_matrix2_
octet remap table 2
allocator_type::allocator_pool_type allocator_pool_type
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
plain_octet_matrix_type remap_matrix1_
octet remap table 1
static void build_octet_remap(plain_octet_matrix_type &octet_remap_matrix1, plain_octet_matrix_type &octet_remap_matrix2, const plain_octet_matrix_type &octet_occupancy_matrix)
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
Definition bmfunc.h:438
null_support
NULL-able value support.
Definition bmconst.h:214
@ BM_SORTED
input set is sorted (ascending order)
Definition bmconst.h:191
@ use_null
support "non-assigned" or "NULL" logic
Definition bmconst.h:215
@ no_null
do not support NULL values
Definition bmconst.h:216
Definition bm.h:77
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
Definition bmfunc.h:909
const unsigned id_max
Definition bmconst.h:108
unsigned int word_t
Definition bmconst.h:38
const unsigned set_block_shift
Definition bmconst.h:55
Structure with statistical information about memory allocation footprint, serialization projection,...
Definition bmfunc.h:55
size_t gap_cap_overhead
gap memory overhead between length and capacity
Definition bmfunc.h:62
size_t ptr_sub_blocks
Number of sub-blocks.
Definition bmfunc.h:58
size_t gap_blocks
Number of GAP blocks.
Definition bmfunc.h:57
size_t bit_blocks
Number of bit blocks.
Definition bmfunc.h:56
size_t bv_count
Number of bit-vectors.
Definition bmfunc.h:59
size_t max_serialize_mem
estimated maximum memory for serialization
Definition bmfunc.h:60
size_t memory_used
memory usage for all blocks and service tables
Definition bmfunc.h:61
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Definition bmfunc.h:105
memory allocation policy
Definition bm.h:820
Statistical information about bitset's memory allocation details.
Definition bm.h:122