BitMagic-C++
bmstrsparsevec.h
Go to the documentation of this file.
1#ifndef BMSTRSPARSEVEC__H__INCLUDED__
2#define BMSTRSPARSEVEC__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmstrsparsevec.h
22 \brief string sparse vector based on bit-transposed matrix
23*/
24
25#include <stddef.h>
26#include "bmconst.h"
27
28#ifndef BM_NO_STL
29#include <stdexcept>
30#include <string_view>
31#endif
32
33#ifndef BM__H__INCLUDED__
34// BitMagic utility headers do not include main "bm.h" declaration
35// #include "bm.h" or "bm64.h" explicitly
36# error missing include (bm.h or bm64.h)
37#endif
38
39#include "bmtrans.h"
40#include "bmalgo.h"
41#include "bmbuffer.h"
42#include "bmbmatrix.h"
43#include "bmdef.h"
44
45namespace bm
46{
47
48enum class remap_setup
49{
50 COPY_RTABLES, //!< copy remap tables only (without data)
51};
52
53
54/*!
55 \brief succinct sparse vector for strings with compression using bit-slicing ( transposition) method
56
57 Initial string is bit-transposed into bit-slices so collection may use less
58 memory due to prefix sum (GAP) compression in bit-slices. In addition, the container
59 can use chracter re-mapping using char freaquencies to compute the minimal codes.
60 Re-mapping can reduce memory footprint, get better search performance and improve storage
61 compression.
62
63 Template parameters:
64 CharType - type of character (char or unsigned char) (wchar not tested)
65 BV - bit-vector for bit-slicing
66 STR_SIZE - initial string size (can dynamically increase on usage)
67
68 @ingroup sv
69*/
70template<typename CharType, typename BV, unsigned STR_SIZE>
71class str_sparse_vector : public base_sparse_vector<CharType, BV, STR_SIZE>
72{
73public:
74 typedef BV bvector_type;
77 typedef CharType value_type;
78 typedef CharType* value_type_prt;
80 typedef typename BV::allocator_type allocator_type;
83 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
87
88 /*! Statistical information about memory allocation details. */
89 struct statistics : public bv_statistics
90 {};
91
93 {
94 sv_octet_slices = STR_SIZE
95 };
96
97 /** Matrix of character remappings
98 @internal
99 */
100 typedef bm::dynamic_heap_matrix<unsigned char, allocator_type>
103
104 /** Matrix of character frequencies (for optimal code remap)
105 @internal
106 */
107 typedef
108 bm::dynamic_heap_matrix<size_t, allocator_type> octet_freq_matrix_type;
109
110 struct is_remap_support { enum trait { value = true }; };
111 struct is_rsc_support { enum trait { value = false }; };
112 struct is_dynamic_splices { enum trait { value = true }; };
113
115 {
116 public:
117 typedef
118 bm::heap_vector<CharType, typename bvector_type::allocator_type, true>
120 protected:
122 };
123
124 /**
125 Reference class to access elements via common [] operator
126 @ingroup sv
127 */
129 {
130 public:
133 size_type idx)
134 : str_sv_(str_sv), idx_(idx)
135 {
136 this->buf_.resize(str_sv.effective_max_str());
137 }
138
139 operator const value_type*() const BMNOEXCEPT
140 {
141 return get();
142 }
143
144 const value_type* get() const BMNOEXCEPT
145 {
146 str_sv_.get(idx_, this->buf_.data(), str_sv_.effective_max_str());
147 return this->buf_.data();
148 }
149
150 bool operator==(const const_reference& ref) const BMNOEXCEPT
151 { return bool(*this) == bool(ref); }
152 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
153 private:
155 size_type idx_;
156 };
157
158 /**
159 Reference class to access elements via common [] operator
160 @ingroup sv
161 */
162 class reference : protected reference_base
163 {
164 public:
166 size_type idx)
167 : str_sv_(str_sv), idx_(idx)
168 {
169 this->buf_.resize(str_sv.effective_max_str());
170 }
171
172 operator const value_type*() const BMNOEXCEPT
173 {
174 return get();
175 }
176
177 const value_type* get() const BMNOEXCEPT
178 {
179 str_sv_.get(idx_, this->buf_.data(), str_sv_.effective_max_str());
180 return this->buf_.data();
181 }
182
184 {
185 // TO DO: implement element copy bit by bit
186 str_sv_.set(idx_, (const value_type*)ref);
187 return *this;
188 }
189
191 {
192 str_sv_.set(idx_, str);
193 return *this;
194 }
195 bool operator==(const reference& ref) const BMNOEXCEPT
196 { return bool(*this) == bool(ref); }
197 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
198 private:
200 size_type idx_;
201 };
202
203 /**
204 Const iterator to do quick traverse of the sparse vector.
205
206 Implementation uses buffer for decoding so, competing changes
207 to the original vector may not match the iterator returned values.
208
209 This iterator keeps an operational buffer of decoded elements, so memory footprint is NOT negligable.
210
211 @ingroup sv
212 */
214 {
215 public:
216 friend class str_sparse_vector;
217#ifndef BM_NO_STL
218 typedef std::input_iterator_tag iterator_category;
219 typedef std::basic_string_view<CharType> string_view_type;
220#endif
227 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
228
229 typedef long long difference_type;
230 typedef CharType* pointer;
231 typedef CharType*& reference;
232 public:
233 /**
234 Construct iterator (not attached to any particular vector)
235 */
237 /**
238 Construct iterator (attached to sparse vector)
239 @param sv - pointer to sparse vector
240 */
242 /**
243 Construct iterator (attached to sparse vector) and positioned
244 @param sv - reference to sparse vector
245 @param pos - position in the vector to start
246 */
248
250
251 /**
252 setup iterator to retrieve a sub-string of a string
253 @param from - Position of the first character to be copied
254 @param len - length of a substring (defult: 0 read to the available end)
255 */
256 void set_substr(unsigned from, unsigned len = 0) BMNOEXCEPT;
257
258
259 bool operator==(const const_iterator& it) const BMNOEXCEPT
260 { return (pos_ == it.pos_) && (sv_ == it.sv_); }
262 { return ! operator==(it); }
264 { return pos_ < it.pos_; }
266 { return pos_ <= it.pos_; }
268 { return pos_ > it.pos_; }
270 { return pos_ >= it.pos_; }
271
272 /// \brief Get current position (value)
273 const value_type* operator*() const
274 { return this->value(); }
275
276 /// \brief Advance to the next available value
278 { this->advance(); return *this; }
279
280 /// \brief Advance to the next available value
282 { const_iterator tmp(*this);this->advance(); return tmp; }
283
284
285 /// \brief Get zero terminated string value at the current position
286 const value_type* value() const;
287
288 /// \brief Get current string as string_view
290
291 /// \brief Get NULL status
292 bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
293
294 /// Returns true if iterator is at a valid position
295 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
296
297 /// Invalidate current iterator
299
300 /// Current position (index) in the vector
301 size_type pos() const BMNOEXCEPT { return pos_; }
302
303 /// re-position to a specified position
305
306 /// advance iterator forward by one
308
309 protected:
311 {
312 n_rows = 1024
313 };
314 typedef dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
315
316 private:
317 const str_sparse_vector_type* sv_; ///!< ptr to parent
318 unsigned substr_from_; ///!< substring from
319 unsigned substr_to_; ///!< substring to
320 mutable size_type pos_; ///!< Position
321 mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
322 mutable size_type pos_in_buf_; ///!< buffer position
323 };
324
325
326 /**
327 Back insert iterator implements buffered insert, faster than generic
328 access assignment.
329
330 Limitations for buffered inserter:
331 1. Do not use more than one inserter (into one vector) at the same time
332 2. Use method flush() at the end to send the rest of accumulated buffer
333 flush is happening automatically on destruction, but if flush produces an
334 exception (for whatever reason) it will be an exception in destructor.
335 As such, explicit flush() is safer way to finilize the sparse vector load.
336
337 @ingroup sv
338 */
340 {
341 public:
342#ifndef BM_NO_STL
343 typedef std::output_iterator_tag iterator_category;
344#endif
351 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
352
353 typedef void difference_type;
354 typedef void pointer;
355 typedef void reference;
356
357 public:
361
363 {
364 BM_ASSERT(bi.empty());
365 buf_matrix_.init_resize(
366 bi.buf_matrix_.rows(), bi.buf_matrix_.cols());
367 this->flush_impl(); sv_ = bi.sv_;
368 return *this;
369 }
370
372
373 /**
374 Set optimization on load option (deafult: false)
375 */
377 { opt_mode_ = opt_mode; }
378
379 /**
380 Method to configure back inserter to collect statistics on optimal character codes.
381 This methos makes back inserter slower, but can be used to accelerate later remap() of
382 the sparse vector. Use flush at the end to apply the remapping.
383 By default inserter does not collect additional statistics.
384
385 Important! You should NOT use intermediate flush if you set remapping!
386
387 @sa flush
388 */
389 void set_remap(bool flag) BMNOEXCEPT { remap_flags_ = flag; }
390
391 /// Get curent remap state flags
392 unsigned get_remap() const BMNOEXCEPT { return remap_flags_; }
393
394 /** push value to the vector */
396 { this->add(v); return *this; }
397
398
399 /** push value to the vector */
400 template<typename StrType>
402 {
403 this->add(v.c_str()); return *this; // TODO: avoid c_str()
404 }
405
406 /** noop */
407 back_insert_iterator& operator*() { return *this; }
408 /** noop */
409 back_insert_iterator& operator++() { return *this; }
410 /** noop */
411 back_insert_iterator& operator++( int ) { return *this; }
412
413 /** add value to the container*/
414 void add(const value_type* v);
415
416 /** add NULL (no-value) to the container */
417 void add_null();
418
419 /** add a series of consequitve NULLs (no-value) to the container */
420 void add_null(size_type count);
421
422 /** flush the accumulated buffer. It is important to call flush at the end, before destruction of the
423 inserter. Flush may throw exceptions, which will be possible to intercept.
424 Otherwise inserter is flushed in the destructor.
425 */
426 void flush();
427
428 // access to internals
429 //
430
431 /// Get octet frequence matrix
432 /// @internal
434 { return omatrix_; }
435
436 protected:
437 /** return true if insertion buffer is empty */
438 bool empty() const BMNOEXCEPT;
439
441
442 /** add value to the buffer without changing the NULL vector
443 @param v - value to push back
444 @internal
445 */
446 void add_value(const value_type* v);
447
448 /**
449 account new value as remap statistics
450 */
452
454
455 private:
456 enum buf_size_e
457 {
458 n_buf_size = str_sparse_vector_type::ins_buf_size // 1024 * 8
459 };
460 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
461 friend class str_sparse_vector;
462
463 protected:
464 str_sparse_vector_type* sv_; ///!< pointer on the parent vector
465 bvector_type* bv_null_; ///!< not NULL vector pointer
466 buffer_matrix_type buf_matrix_; ///!< value buffer
467 size_type pos_in_buf_; ///!< buffer position
468 block_idx_type prev_nb_ = 0;///!< previous block added
469 typename
471 ///
472 unsigned remap_flags_ = 0; ///< target remapping
473 octet_freq_matrix_type omatrix_; ///< octet frequency matrix
474 };
475
476
477public:
478
479 /*!
480 \brief Sparse vector constructor
481
482 \param null_able - defines if vector supports NULL values flag
483 by default it is OFF, use bm::use_null to enable it
484 \param ap - allocation strategy for underlying bit-vectors
485 Default allocation policy uses BM_BIT setting (fastest access)
486 \param bv_max_size - maximum possible size of underlying bit-vectors
487 Please note, this is NOT size of svector itself, it is dynamic upper limit
488 which should be used very carefully if we surely know the ultimate size
489 \param alloc - allocator for bit-vectors
490
491 \sa bvector<>
492 \sa bm::bvector<>::allocation_policy
493 \sa bm::startegy
494 */
497 size_type bv_max_size = bm::id_max,
498 const allocator_type& alloc = allocator_type());
499
500 /*! copy-ctor */
502
503 /*! construct empty sparse vector, copying the remap tables from another vector
504 \param str_sv - source vector to take the remap tables from (assumed to be remaped)
505 \param remap_mode - remap table copy param
506 */
508
509
510 /*! copy assignmment operator */
513 {
514 if (this != &str_sv)
516 remap_flags_ = str_sv.remap_flags_;
519 return *this;
520 }
521#ifndef BM_NO_CXX11
522 /*! move-ctor */
524 {
525 parent_type::swap(str_sv);
526 remap_flags_ = str_sv.remap_flags_;
527 remap_matrix1_.swap(str_sv.remap_matrix1_);
528 remap_matrix2_.swap(str_sv.remap_matrix2_);
529 }
530
531 /*! move assignmment operator */
534 {
535 if (this != &str_sv)
536 this->swap(str_sv);
537 return *this;
538 }
539#endif
540
541public:
542
543 // ------------------------------------------------------------
544 /*! @name String element access */
545 ///@{
546
547 /** \brief Operator to get read access to an element */
548 const const_reference operator[](size_type idx) const
549 { return const_reference(*this, idx); }
550
551 /** \brief Operator to get write access to an element */
552 reference operator[](size_type idx) { return reference(*this, idx); }
553
554 /*!
555 \brief set specified element with bounds checking and automatic resize
556 \param idx - element index (vector auto-resized if needs to)
557 \param str - string to set (zero terminated)
558 */
559 void set(size_type idx, const value_type* str);
560
561 /*!
562 \brief set NULL status for the specified element
563 Vector is resized automatically
564 \param idx - element index (vector auto-resized if needs to)
565 */
567
568 /**
569 Set NULL all elements set as 1 in the argument vector
570 \param bv_idx - index bit-vector for elements which needs to be turned to NULL
571 */
572 void set_null(const bvector_type& bv_idx)
573 { this->bit_sub_rows(bv_idx, true); }
574
575 /**
576 Set vector elements spcified by argument bit-vector to empty
577 Note that set to empty elements are NOT going to tuned to NULL (NULL qualifier is preserved)
578 \param bv_idx - index bit-vector for elements which to be set to 0
579 */
580 void clear(const bvector_type& bv_idx)
581 { this->bit_sub_rows(bv_idx, false); }
582
583
584 /**
585 Set NULL all elements NOT set as 1 in the argument vector
586 \param bv_idx - index bit-vector for elements which needs to be kept
587 */
588 void keep(const bvector_type& bv_idx) { this->bit_and_rows(bv_idx); }
589
590
591 /*!
592 \brief insert the specified element
593 \param idx - element index (vector auto-resized if needs to)
594 \param str - string to set (zero terminated)
595 */
596 void insert(size_type idx, const value_type* str);
597
598 /**
599 \brief swap two vector elements between each other
600 \param idx1 - element index 1
601 \param idx1 - element index 2
602 */
603 void swap(size_type idx1, size_type idx2);
604
605
606
607 /*!
608 \brief insert STL string
609 \param idx - element index (vector auto-resized if needs to)
610 \param str - STL string to set
611 */
612 template<typename StrType>
613 void insert(size_type idx, const StrType& str)
614 {
615 this->insert(idx, str.c_str()); // TODO: avoid c_str()
616 }
617
618 /*!
619 \brief erase the specified element
620 \param idx - element index
621 */
622 void erase(size_type idx);
623
624 /*!
625 \brief get specified element
626
627 \param idx - element index
628 \param str - string buffer
629 \param buf_size - string buffer size
630
631 @return string length
632 */
634 value_type* str, size_type buf_size) const BMNOEXCEPT;
635
636 /*!
637 \brief set specified element with bounds checking and automatic resize
638
639 This is an equivalent of set() method, but templetized to be
640 more compatible with the STL std::string and the likes
641
642 \param idx - element index (vector auto-resized if needs to)
643 \param str - input string
644 expected an STL class with size() support,
645 like basic_string<> or vector<char>
646 */
647 template<typename StrType>
648 void assign(size_type idx, const StrType& str)
649 {
650 if (idx >= this->size())
651 this->size_ = idx+1;
652
653 size_type str_size = size_type(str.size());
654 if (!str_size)
655 {
656 this->clear_value_planes_from(0, idx);
657 return;
658 }
659 for (size_type i=0; i < str_size; ++i)
660 {
661 CharType ch = str[i];
662 if (remap_flags_) // compressional re-mapping is in effect
663 {
664 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
665 BM_ASSERT(remap_value);
666 ch = CharType(remap_value);
667 }
668 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
669 if (!ch)
670 break;
671 } // for i
672 this->clear_value_planes_from(unsigned(str_size*8), idx);
673 if (bvector_type* bv_null = this->get_null_bvect())
674 bv_null->set_bit_no_check(idx);
675 }
676
677 /*!
678 \brief push back a string
679 \param str - string to set
680 (STL class with size() support, like basic_string)
681 */
682 template<typename StrType>
683 void push_back(const StrType& str) { assign(this->size_, str); }
684
685 /*!
686 \brief push back a string (zero terminated)
687 \param str - string to set
688 */
689 void push_back(const value_type* str) { set(this->size_, str); }
690
691 /*!
692 \brief push back specified amount of NULL values
693 \param count - number of NULLs to push back
694 */
696
697 /*!
698 \brief push back NULL value
699 */
701
702 /*!
703 \brief get specified string element if NOT NULL
704 Template method expects an STL-compatible type basic_string<>
705 \param idx - element index (vector auto-resized if needs to)
706 \param str - string to get [out]
707 \return true if element is not null and try-get successfull
708 */
709 template<typename StrType>
710 bool try_get(size_type idx, StrType& str) const
711 {
712 if (this->is_null(idx))
713 return false;
714 get(idx, str);
715 return true;
716 }
717
718
719 /*!
720 \brief get specified string element
721 Template method expects an STL-compatible type basic_string<>
722 \param idx - element index (vector auto-resized if needs to)
723 \param str - string to get [out]
724 */
725 template<typename StrType>
726 void get(size_type idx, StrType& str) const
727 {
728 str.clear();
729 for (unsigned i = 0; true; ++i)
730 {
731 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
732 if (!ch)
733 break;
734 if (remap_flags_)
735 {
736 const unsigned char* remap_row = remap_matrix1_.row(i);
737 unsigned char remap_value = remap_row[unsigned(ch)];
738 BM_ASSERT(remap_value);
739 if (!remap_value) // unknown dictionary element
740 {
742 break;
743 }
744 ch = CharType(remap_value);
745 }
746 str.push_back(ch);
747 } // for i
748 }
749
750 /*! Swap content */
752
753 ///@}
754
755 // ------------------------------------------------------------
756 /*! @name Element comparison functions */
757 ///@{
758
759 /**
760 \brief Compare vector element with argument lexicographically
761
762 The function does not account for NULL values, NULL element is treated as an empty string
763
764 NOTE: for a re-mapped container, input string may have no correct
765 remapping, in this case we have an ambiguity
766 (we know it is not equal (0) but LT or GT?).
767 Behavior is undefined.
768
769 \param idx - vactor element index
770 \param str - argument to compare with
771
772 \return 0 - equal, < 0 - vect[idx] < str, >0 otherwise
773 */
774 int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
775
776 static
777 int compare_str(const value_type* str1, const value_type* str2) BMNOEXCEPT;
778
779 static
780 int compare_str(const value_type* str1, const value_type* str2, size_t min_len) BMNOEXCEPT;
781
782 /**
783 \brief Compare two vector elements
784
785 The function does not account for NULL values, NULL element is treated as an empty string
786 \param idx1 - vactor element index 1
787 \param idx2 - vactor element index 2
788
789 \return 0 - equal, < 0 - vect[idx1] < vect[idx2], >0 otherwise
790 */
791 int compare(size_type idx1, size_type idx2) const BMNOEXCEPT;
792
793
794 /**
795 \brief Find size of common prefix between two vector elements in octets
796 @param prefix_buf - optional param for keeping the common prefix string (without remap decode)
797 \return size of common prefix
798 */
799 template<bool USE_PREFIX_BUF = false>
801 value_type* prefix_buf=0) const BMNOEXCEPT;
802
803 /**
804 Variant of compare for remapped vectors. Caller MUST guarantee vector is remapped.
805 */
806 int compare_remap(size_type idx, const value_type* str) const BMNOEXCEPT;
807
808 /**
809 Variant of compare for non-mapped vectors. Caller MUST guarantee vector is not remapped.
810 */
811 int compare_nomap(size_type idx, const value_type* str) const BMNOEXCEPT;
812
813 ///@}
814
815
816 // ------------------------------------------------------------
817 /*! @name Clear */
818 ///@{
819
820 /*! \brief resize to zero, free memory
821 @param free_mem - true - free all bit-vectors memory,
822 false - set bit-vecor to zero (memory remains reserved)
823 @param remap - 0 - set to no-remap (default), 1 - keep remap substitution matrix for possible re-use
824 (if remap() was ever called on this vector with the datawith same frequency profiles)
825 Note that feeding the data with disimilar frequency profile would cause undefined behavior.
826 @sa remap
827 */
828 void clear_all(bool free_mem, unsigned remap=0) BMNOEXCEPT;
829
830 /*! \brief resize to zero, free memory, reset remapping */
831 void clear() BMNOEXCEPT { clear_all(true, 0); }
832
833 /*!
834 \brief clear range (assign bit 0 for all planes)
835 \param left - interval start
836 \param right - interval end (closed interval)
837 \param set_null - set cleared values to unassigned (NULL)
838 */
840 clear_range(size_type left, size_type right, bool set_null = false)
841 {
843 return *this;
844 }
845
846
847 ///@}
848
849
850 // ------------------------------------------------------------
851 /*! @name Size, etc */
852 ///@{
853
854 /*! \brief return size of the vector
855 \return size of sparse vector
856 */
857 size_type size() const { return this->size_; }
858
859 /*! \brief return true if vector is empty
860 \return true if empty
861 */
862 bool empty() const { return (size() == 0); }
863
864 /*! \brief resize vector
865 \param sz - new size
866 */
867 void resize(size_type sz) { parent_type::resize(sz, true); }
868
869 /*! \brief get maximum string length capacity
870 \return maximum string length sparse vector can take
871 */
872 static size_type max_str() { return sv_octet_slices; }
873
874 /*! \brief get effective string length used in vector
875 Calculate and returns efficiency, how close are we
876 to the reserved maximum.
877 \return current string length maximum
878 */
880
881 /*! \brief get effective string length used in vector
882 \return current string length maximum
883 */
885
886 /**
887 \brief recalculate size to exclude tail NULL elements
888 After this call size() will return the true size of the vector
889 */
891
892 ///@}
893
894
895 // ------------------------------------------------------------
896 /*! @name Memory optimization/compression */
897 ///@{
898
899 /*!
900 \brief run memory optimization for all vector planes
901 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
902 \param opt_mode - requested compression depth
903 \param stat - memory allocation statistics after optimization
904 */
906 bm::word_t* temp_block = 0,
907 typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
908 typename str_sparse_vector<CharType, BV, STR_SIZE>::statistics* stat = 0);
909
910 /*!
911 @brief Calculates memory statistics.
912
913 Function fills statistics structure containing information about how
914 this vector uses memory and estimation of max. amount of memory
915 bvector needs to serialize itself.
916
917 @param st - pointer on statistics structure to be filled in.
918
919 @sa statistics
920 */
922 struct str_sparse_vector<CharType, BV, STR_SIZE>::statistics* st
923 ) const BMNOEXCEPT;
924
925 /**
926 @brief Turn sparse vector into immutable mode
927 Read-only (immutable) vector uses less memory and allows faster searches.
928 Before freezing it is recommenede to call optimize() to get full memory saving effect
929 @sa optimize, remap
930 */
931 void freeze() { this->freeze_matr(); }
932
933 /** Returns true if vector is read-only */
934 bool is_ro() const BMNOEXCEPT { return this->is_ro_; }
935
936 ///@}
937
938 // ------------------------------------------------------------
939 /*! @name Iterator access */
940 ///@{
941
942 /** Provide const iterator access to container content */
943 const_iterator begin() const BMNOEXCEPT;
944
945 /** Provide const iterator access to the end */
946 const_iterator end() const BMNOEXCEPT { return const_iterator(this, bm::id_max); }
947
948 /** Get const_itertor re-positioned to specific element
949 @param idx - position in the sparse vector
950 */
951 const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
952 { return const_iterator(this, idx); }
953
954 /** Provide back insert iterator
955 Back insert iterator implements buffered insertion, which is faster, than random access
956 or push_back
957 */
958 back_insert_iterator get_back_inserter()
959 { return back_insert_iterator(this); }
960
961 ///@}
962
963
964
965 // ------------------------------------------------------------
966 /*! @name Various traits */
967 ///@{
968
969 /** \brief various type traits
970 */
971 static constexpr
972 bool is_compressed() BMNOEXCEPT { return false; }
973
974 static constexpr
975 bool is_str() BMNOEXCEPT { return true; }
976
977 ///@}
978
979 // ------------------------------------------------------------
980 /*! @name Char remapping, succinct utilities
981
982 Remapping runs character usage analysis (frequency analysis)
983 based on that implements reduction of dit-depth thus improves
984 search performance and memory usage (both RAM and serialized).
985
986 Remapping limits farther modifications of sparse vector.
987 (Use remapped vector as read-only).
988 */
989
990 ///@{
991
992 /**
993 Get character remapping status (true | false)
994 */
995 bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
996
997 /**
998 Build remapping profile and load content from another sparse vector
999 Remapped vector likely saves memory (both RAM and disk) but
1000 should not be modified (should be read-only).
1001
1002 \param str_sv - source sparse vector (assumed it is not remapped)
1003 \param omatrix - pointer to externall computed char freaquency matrix (optional)
1004 \so remap, freeze
1005 */
1006 void remap_from(const str_sparse_vector& str_sv,
1007 octet_freq_matrix_type* omatrix = 0);
1008
1009 /**
1010 Build remapping profile and re-load content to save memory
1011 */
1012 void remap();
1013
1014 /*!
1015 Calculate flags which octets are present on each byte-plane.
1016 @internal
1017 */
1018 void calc_octet_stat(octet_freq_matrix_type& octet_matrix) const;
1019
1020 /*!
1021 Compute optimal remap codes
1022 @internal
1023 */
1025 slice_octet_matrix_type& octet_remap_matrix1,
1026 slice_octet_matrix_type& octet_remap_matrix2,
1027 octet_freq_matrix_type& octet_occupancy_matrix) const;
1028 /*!
1029 remap string from external (ASCII) system to matrix internal code
1030 @return true if remapping was ok, false if found incorrect value
1031 for the plane
1032 @internal
1033 */
1034 static
1036 size_type buf_size,
1037 const value_type* BMRESTRICT str,
1038 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2
1039 ) BMNOEXCEPT;
1040 /*!
1041 remap string from external (ASCII) system to matrix internal code
1042 also creates a zero terminated copy string
1043 @return true if remapping was ok, false if found incorrect value
1044 for the plane
1045 @internal
1046 */
1047 static
1049 value_type* BMRESTRICT sv_str,
1050 value_type* BMRESTRICT str_cp,
1051 size_type buf_size,
1052 const value_type* BMRESTRICT str,
1053 size_t in_len,
1054 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT;
1055
1056 /*!
1057 remap string from external (ASCII) system to matrix internal code
1058 @internal
1059 */
1061 size_type buf_size,
1062 const value_type* str) const BMNOEXCEPT
1063 {
1064 return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
1065 }
1066
1067 /*!
1068 remap string from external (ASCII) system to matrix internal code
1069 @internal
1070 */
1072 value_type* BMRESTRICT sv_str,
1073 value_type* BMRESTRICT str_cp,
1074 size_type buf_size,
1075 const value_type* BMRESTRICT str,
1076 size_t in_len) const BMNOEXCEPT
1077 {
1078 return remap_n_tosv_2way(
1079 sv_str, str_cp, buf_size, str, in_len, remap_matrix2_);
1080 }
1081
1082 /*!
1083 remap string from internal code to external (ASCII) system
1084 @return true if remapping was ok, false if found incorrect value
1085 for the plane
1086 @internal
1087 */
1088 static
1091 size_type buf_size,
1092 const value_type* BMRESTRICT sv_str,
1093 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix1
1094 ) BMNOEXCEPT;
1095 /*!
1096 re-calculate remap matrix2 based on matrix1
1097 @internal
1098 */
1100
1101 ///@}
1102
1103 // ------------------------------------------------------------
1104 /*! @name Export content to C-style */
1105 ///@{
1106
1107 /**
1108 \brief Bulk export strings to a C-style matrix of chars
1109
1110 \param cmatr - dest matrix (bm::heap_matrix)
1111 \param idx_from - index in the sparse vector to export from
1112 \param dec_size - decoding size (matrix column allocation should match)
1113 \param zero_mem - set to false if target array is pre-initialized
1114 with 0s to avoid performance penalty
1115
1116 \return number of actually exported elements (can be less than requested)
1117 */
1118 template<typename CharMatrix>
1119 size_type decode(CharMatrix& cmatr,
1120 size_type idx_from,
1121 size_type dec_size,
1122 bool zero_mem = true) const
1123 {
1124 size_type str_len = effective_max_str();
1125 return decode_substr(cmatr, idx_from, dec_size,
1126 0, unsigned(str_len-1), zero_mem);
1127 }
1128
1129 /**
1130 \brief Bulk export strings to a C-style matrix of chars
1131
1132 \param cmatr - dest matrix (bm::heap_matrix)
1133 \param idx_from - index in the sparse vector to export from
1134 \param dec_size - decoding size (matrix column allocation should match)
1135 \param substr_from - sub-string position from
1136 \param substr_to - sub-string position to
1137 \param zero_mem - set to false if target array is pre-initialized
1138 with 0s to avoid performance penalty
1139
1140 \return number of actually exported elements (can be less than requested)
1141 */
1142 template<typename CharMatrix>
1143 size_type decode_substr(CharMatrix& cmatr,
1144 size_type idx_from,
1145 size_type dec_size,
1146 unsigned substr_from,
1147 unsigned substr_to,
1148 bool zero_mem = true) const
1149 {
1150
1151 /// Decoder functor
1152 /// @internal
1153 ///
1154 struct sv_decode_visitor_func
1155 {
1156 sv_decode_visitor_func(CharMatrix& cmatr) BMNOEXCEPT2
1157 : cmatr_(cmatr)
1158 {}
1159
1160 int add_bits(size_type bv_offset,
1161 const unsigned char* BMRESTRICT bits,
1162 unsigned bits_size) BMNOEXCEPT
1163 {
1164 BM_ASSERT(bits_size);
1165
1166 // can be negative (-1) when bv base offset = 0 and sv = 1,2..
1167 size_type base = bv_offset - sv_off_;
1168 const unsigned_value_type m = mask_;
1169 const unsigned i = substr_i_;
1170 unsigned j = 0;
1171 do
1172 {
1173 size_type idx = bits[j] + base;
1174 value_type* BMRESTRICT str = cmatr_.row(idx);
1175 str[i] |= m;
1176 } while (++j < bits_size);
1177 return 0;
1178 }
1179
1180 int add_range(size_type bv_offset, size_type sz) BMNOEXCEPT
1181 {
1182 BM_ASSERT(sz);
1183
1184 auto base = bv_offset - sv_off_;
1185 const unsigned_value_type m = mask_;
1186 const unsigned i = substr_i_;
1187 size_type j = 0;
1188 do
1189 {
1190 size_type idx = j + base;
1191 value_type* BMRESTRICT str = cmatr_.row(idx);
1192 str[i] |= m;
1193 } while(++j < sz);
1194 return 0;
1195 }
1196
1197 CharMatrix& cmatr_; ///< target array for reverse transpose
1198 unsigned_value_type mask_ = 0; ///< bit-plane mask
1199 unsigned substr_i_= 0; ///< i
1200 size_type sv_off_ = 0; ///< SV read offset
1201 };
1202
1203
1204 BM_ASSERT(substr_from <= substr_to);
1205 BM_ASSERT(cmatr.is_init());
1206
1207 if (zero_mem)
1208 cmatr.set_zero(); // TODO: set zero based on requested capacity
1209
1210 size_type rows = size_type(cmatr.rows());
1211 size_type max_sz = this->size() - idx_from;
1212 if (max_sz < dec_size)
1213 dec_size = max_sz;
1214 if (rows < dec_size)
1215 dec_size = rows;
1216 if (!dec_size)
1217 return dec_size;
1218
1219 sv_decode_visitor_func func(cmatr);
1220
1221 for (unsigned i = substr_from; i <= substr_to; ++i)
1222 {
1223 unsigned bi = 0;
1224 func.substr_i_ = i - substr_from; // to put substr at the str[0]
1225
1226 auto rsize = this->bmatr_.rows_not_null();
1227 for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
1228 {
1229 if (k >= rsize)
1230 goto break2;
1231 const bvector_type* bv = this->bmatr_.get_row(k);
1232 if (!bv)
1233 continue;
1234 BM_ASSERT (bv != this->get_null_bvector());
1235
1236 func.mask_ = unsigned_value_type(1u << bi);
1237 func.sv_off_ = idx_from;
1238
1239 size_type end = idx_from + dec_size;
1240 bm::for_each_bit_range_no_check(*bv, idx_from, end-1, func);
1241
1242 } // for k
1243 } // for i
1244 break2:
1245
1246 if (remap_flags_)
1247 {
1248 for (unsigned i = 0; i < dec_size; ++i)
1249 {
1250 typename CharMatrix::value_type* str = cmatr.row(i);
1251 remap_matrix1_.remapz(str);
1252 } // for i
1253 }
1254 return dec_size;
1255 }
1256
1257 /**
1258 \brief Bulk import of strings from a C-style matrix of chars
1259
1260 \param cmatr - source matrix (bm::heap_matrix)
1261 [in/out] parameter gets modified(corrupted)
1262 in the process
1263 \param idx_from - destination index in the sparse vector
1264 \param imp_size - import size (number or rows to import)
1265 */
1266 template<typename CharMatrix>
1267 void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
1268 {
1269 if (!imp_size)
1270 return;
1271 if (idx_from < this->size_) // in case it touches existing elements
1272 {
1273 // clear all planes in the range to provide corrrect import of 0 values
1274 this->clear_range(idx_from, idx_from + imp_size - 1);
1275 }
1276 import_no_check(cmatr, idx_from, imp_size);
1277 }
1278
1279 /**
1280 \brief Bulk push-back import of strings from a C-style matrix of chars
1281
1282 \param cmatr - source matrix (bm::heap_matrix)
1283 [in/out] parameter gets modified(corrupted)
1284 in the process
1285 \param imp_size - import size (number or rows to import)
1286 */
1287 template<typename CharMatrix>
1288 void import_back(CharMatrix& cmatr, size_type imp_size)
1289 {
1290 if (!imp_size)
1291 return;
1292 import_no_check(cmatr, this->size(), imp_size);
1293 }
1294
1295
1296 ///@}
1297
1298 // ------------------------------------------------------------
1299 /*! @name Merge, split, partition data */
1300 ///@{
1301
1302 /**
1303 @brief copy range of values from another sparse vector
1304
1305 Copy [left..right] values from the source vector,
1306 clear everything outside the range.
1307
1308 \param sv - source vector
1309 \param left - index from in losed diapason of [left..right]
1310 \param right - index to in losed diapason of [left..right]
1311 \param slice_null - "use_null" copy range for NULL vector or
1312 do not copy it
1313 */
1315 size_type left, size_type right,
1316 bm::null_support slice_null = bm::use_null);
1317
1318 /**
1319 \brief merge with another sparse vector using OR operation
1320 Merge is different from join(), because it borrows data from the source
1321 vector, so it gets modified (destructive join)
1322
1323 \param tr_sv - [in, out]argument vector to join with (vector mutates)
1324
1325 \return self reference
1326 */
1329
1330 /**
1331 Keep only specified interval in the sparse vector, clear all other
1332 elements.
1333
1334 \param left - interval start
1335 \param right - interval end (closed interval)
1336 \param slice_null - "use_null" copy range for NULL vector or not
1337 */
1339 bm::null_support slice_null = bm::use_null);
1340
1341 ///@}
1342
1343 // ------------------------------------------------------------
1344
1345 /*! \brief syncronize internal structures */
1346 void sync(bool force);
1347
1348 /*!
1349 \brief check if another sparse vector has the same content and size
1350
1351 \param sv - sparse vector for comparison
1352 \param null_able - flag to consider NULL vector in comparison (default)
1353 or compare only value content planes
1354
1355 \return true, if it is the same
1356 */
1358 bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
1359
1360 /**
1361 \brief find position of compressed element by its rank
1362 */
1363 static
1365
1366 /**
1367 \brief size of sparse vector (may be different for RSC)
1368 */
1370
1371protected:
1373 {
1375 };
1376
1377 /// @internal
1378 template<typename CharMatrix, size_t BufSize = ins_buf_size>
1379 void import_no_check(CharMatrix& cmatr,
1380 size_type idx_from, size_type imp_size,
1381 bool set_not_null = true)
1382 {
1383 BM_ASSERT (cmatr.is_init());
1384
1385 unsigned max_str_size = 0;
1386 {
1387 for (unsigned j = 0; j < imp_size; ++j)
1388 {
1389 typename CharMatrix::value_type* str = cmatr.row(j);
1390 typename CharMatrix::size_type i;
1391 typename CharMatrix::size_type cols = cmatr.cols();
1392 for (i = 0; i < cols; ++i)
1393 {
1394 value_type ch = str[i];
1395 if (!ch)
1396 {
1397 max_str_size =
1398 (unsigned)((i > max_str_size) ? i : max_str_size);
1399 break;
1400 }
1401 if (remap_flags_) // re-mapping is in effect
1402 {
1403 unsigned char remap_value =
1404 remap_matrix2_.get(i, (unsigned char)(ch));
1405 BM_ASSERT(remap_value); // unknown ?!
1406 /*
1407 if (!remap_value) // unknown dictionary element
1408 throw_bad_value(0); */
1409 str[i] = CharType(remap_value);
1410 }
1411 } // for i
1412 } // for j
1413 }
1414
1415 this->bmatr_.allocate_rows((1+max_str_size) * 8 + this->is_nullable());
1416
1417 unsigned_value_type ch_slice[BufSize];
1418 for (unsigned i = 0; i < max_str_size; ++i)
1419 {
1420 unsigned ch_acc = 0;
1421#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1422 if (imp_size == ins_buf_size) /// full buffer import can use loop unrolling
1423 {
1424 for (size_type j = 0; j < imp_size; j+=4)
1425 {
1426 unsigned_value_type ch0 = (unsigned_value_type)cmatr.row(j)[i];
1427 unsigned_value_type ch1 = (unsigned_value_type)cmatr.row(j+1)[i];
1428 unsigned_value_type ch2 = (unsigned_value_type)cmatr.row(j+2)[i];
1429 unsigned_value_type ch3 = (unsigned_value_type)cmatr.row(j+3)[i];
1430
1431 ch_acc |= ch0 | ch1 | ch2 | ch3;
1432 ch_slice[j] = ch0; ch_slice[j+1] = ch1;
1433 ch_slice[j+2] = ch2; ch_slice[j+3] = ch3;
1434 }
1435 }
1436 else
1437#endif
1438 {
1439 for (size_type j = 0; j < imp_size; ++j)
1440 {
1441 unsigned_value_type ch = (unsigned_value_type)cmatr.row(j)[i];
1442 ch_acc |= ch;
1443 ch_slice[j] = ch;
1444 }
1445 }
1446 import_char_slice(ch_slice, ch_acc, i, idx_from, imp_size);
1447 }
1448
1449 size_type idx_to = idx_from + imp_size - 1;
1450 if (set_not_null)
1451 {
1452 if (bvector_type* bv_null = this->get_null_bvect())
1453 bv_null->set_range(idx_from, idx_to);
1454 }
1455 if (idx_to >= this->size())
1456 this->size_ = idx_to+1;
1457
1458 }
1459#ifdef _MSC_VER
1460#pragma warning( push )
1461#pragma warning( disable : 4146 )
1462#endif
1463 /// @internal
1464 template<size_t BufSize = ins_buf_size>
1466 unsigned ch_acc,
1467 size_type char_slice_idx,
1468 size_type idx_from, size_type imp_size)
1469 {
1470 size_type bit_list[BufSize];
1471 for ( ;ch_acc; ch_acc &= ch_acc - 1) // bit-scan
1472 {
1473 unsigned n_bits = 0;
1474 const unsigned bi = (bm::word_bitcount((ch_acc & -ch_acc) - 1));
1475 unsigned mask = 1u << bi;
1476#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1477 if (imp_size == ins_buf_size) /// full buffer import can use loop unrolling
1478 {
1479 mask |= (mask << 8) | (mask << 16) | (mask << 24);
1480 for (size_type j = 0; j < imp_size; j+=4)
1481 {
1482 unsigned ch0 = ((unsigned)ch_slice[j+0]) |
1483 ((unsigned)ch_slice[j+1] << 8) |
1484 ((unsigned)ch_slice[j+2] << 16) |
1485 ((unsigned)ch_slice[j+3] << 24);
1486 ch0 &= mask;
1487 ch0 = (ch0 >> bi) | (ch0 >> (bi+7)) |
1488 (ch0 >> (bi+14)) | (ch0 >> (bi+21));
1489 ch0 &= 15u;
1490 BM_ASSERT(bm::word_bitcount(ch0) <= 4);
1491 for (size_type base_idx = idx_from + j ;ch0; ch0 &= ch0 - 1) // bit-scan
1492 {
1493 const unsigned bit_idx =
1494 (bm::word_bitcount((ch0 & -ch0) - 1));
1495 bit_list[n_bits++] = base_idx + bit_idx;
1496 } // for ch0
1497 } // for j
1498 }
1499 else
1500#endif
1501 {
1502 for (size_type j = 0; j < imp_size; ++j)
1503 {
1504 unsigned ch = unsigned(ch_slice[j]);
1505 if (ch & mask)
1506 bit_list[n_bits++] = idx_from + j;
1507 } // for j
1508 }
1509
1510 if (n_bits) // set transposed bits to the target plane
1511 {
1512 bvector_type* bv =
1513 this->get_create_slice((unsigned)(char_slice_idx * 8) + bi);
1514 bv->import_sorted(&bit_list[0], n_bits, false);
1515 }
1516 } // for ch_acc
1517 }
1518#ifdef _MSC_VER
1519#pragma warning( pop )
1520#endif
1521 // ------------------------------------------------------------
1522 /*! @name Errors and exceptions */
1523 ///@{
1524
1525 /**
1526 \brief throw range error
1527 \internal
1528 */
1529 static
1530 void throw_range_error(const char* err_msg);
1531
1532 /**
1533 \brief throw domain error
1534 \internal
1535 */
1536 static
1537 void throw_bad_value(const char* err_msg);
1538
1539 ///@}
1540
1541 /*! \brief set value without checking boundaries */
1542 void set_value(size_type idx, const value_type* str);
1543
1544 /*! \brief set value without checking boundaries or support of NULL */
1546
1547 /*! \brief insert value without checking boundaries */
1548 void insert_value(size_type idx, const value_type* str);
1549
1550 /*! \brief insert value without checking boundaries or support of NULL */
1552
1553
1554 size_type size_internal() const { return size(); }
1556
1557 size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
1558 const unsigned char* get_remap_buffer() const
1559 { return remap_matrix1_.get_buffer().buf(); }
1560 unsigned char* init_remap_buffer()
1561 {
1562 remap_matrix1_.init(true);
1563 return remap_matrix1_.get_buffer().data();
1564 }
1565 void set_remap() { remap_flags_ = 1; }
1566
1567protected:
1568
1570 size_type* idx_from, size_type* idx_to) const
1571 {
1572 *idx_from = from; *idx_to = to; return true;
1573 }
1574
1576 { return &remap_matrix1_; }
1579
1580 /**
1581 reamp using statistics table from inserter
1582 */
1583 void remap(back_insert_iterator& iit);
1584
1585 /**
1586 Remap from implementation, please note that move_data flag can violate cosnt-ness
1587 */
1589 octet_freq_matrix_type* omatrix,
1590 bool move_data);
1591
1592protected:
1593 template<class SVect> friend class sparse_vector_serializer;
1594 template<class SVect> friend class sparse_vector_deserializer;
1595
1596protected:
1597 unsigned remap_flags_; ///< remapping status
1598 slice_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1599 slice_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1600};
1601
1602//---------------------------------------------------------------------
1603//---------------------------------------------------------------------
1604
1605
1606template<class CharType, class BV, unsigned STR_SIZE>
1608 bm::null_support null_able,
1610 size_type bv_max_size,
1611 const allocator_type& alloc)
1612: parent_type(null_able, true, ap, bv_max_size, alloc),
1613 remap_flags_(0)
1614{
1615 static_assert(STR_SIZE > 1,
1616 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1617}
1618
1619
1620//---------------------------------------------------------------------
1621
1622template<class CharType, class BV, unsigned STR_SIZE>
1624 const str_sparse_vector& str_sv)
1625: parent_type(str_sv),
1626 remap_flags_(str_sv.remap_flags_),
1629{
1630 static_assert(STR_SIZE > 1,
1631 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1632}
1633
1634//---------------------------------------------------------------------
1635
1636template<class CharType, class BV, unsigned STR_SIZE>
1638 const str_sparse_vector& str_sv, bm::remap_setup remap_mode)
1639: parent_type(str_sv.get_null_support(), true),
1640 remap_flags_(str_sv.remap_flags_),
1643{
1644 BM_ASSERT(str_sv.remap_flags_); // source vector should be remapped
1646 static_assert(STR_SIZE > 1,
1647 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1648 (void) remap_mode;
1649}
1650
1651//---------------------------------------------------------------------
1652
1653template<class CharType, class BV, unsigned STR_SIZE>
1656{
1657 parent_type::swap(str_sv);
1658 bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1659 remap_matrix1_.swap(str_sv.remap_matrix1_);
1660 remap_matrix2_.swap(str_sv.remap_matrix2_);
1661}
1662
1663//---------------------------------------------------------------------
1664
1665template<class CharType, class BV, unsigned STR_SIZE>
1667 size_type idx, const value_type* str)
1668{
1669 if (idx >= this->size())
1670 this->size_ = idx+1;
1671 set_value(idx, str);
1672}
1673
1674//---------------------------------------------------------------------
1675
1676template<class CharType, class BV, unsigned STR_SIZE>
1678 size_type idx, const value_type* str)
1679{
1680 if (idx >= this->size())
1681 {
1682 this->size_ = idx+1;
1683 set_value(idx, str);
1684 return;
1685 }
1686 insert_value(idx, str);
1687 this->size_++;
1688}
1689
1690//---------------------------------------------------------------------
1691
1692template<class CharType, class BV, unsigned STR_SIZE>
1694 size_type idx2)
1695{
1696 BM_ASSERT(idx1 < this->size());
1697 BM_ASSERT(idx2 < this->size());
1698
1699 this->swap_elements(idx1, idx2);
1700}
1701
1702//---------------------------------------------------------------------
1703
1704template<class CharType, class BV, unsigned STR_SIZE>
1706{
1707 BM_ASSERT(idx < this->size_);
1708 if (idx >= this->size_)
1709 return;
1710 this->erase_column(idx, true);
1711 this->size_--;
1712}
1713
1714//---------------------------------------------------------------------
1715
1716template<class CharType, class BV, unsigned STR_SIZE>
1718{
1719 if (idx >= this->size_)
1720 this->size_ = idx + 1; // assumed nothing todo outside current size
1721 else
1722 this->bmatr_.clear_column(idx, 0);
1723}
1724//---------------------------------------------------------------------
1725
1726template<class CharType, class BV, unsigned STR_SIZE>
1728{
1729 BM_ASSERT(count);
1730 BM_ASSERT(bm::id_max - count > this->size_);
1731 BM_ASSERT(this->is_nullable());
1732
1733 this->size_ += count;
1734}
1735
1736//---------------------------------------------------------------------
1737
1738template<class CharType, class BV, unsigned STR_SIZE>
1740 size_type idx, const value_type* str)
1741{
1742 set_value_no_null(idx, str);
1743 if (bvector_type* bv_null = this->get_null_bvect())
1744 bv_null->set_bit_no_check(idx);
1745}
1746
1747//---------------------------------------------------------------------
1748
1749template<class CharType, class BV, unsigned STR_SIZE>
1751 size_type idx, const value_type* str)
1752{
1753 for (unsigned i = 0; true; ++i)
1754 {
1755 CharType ch = str[i];
1756 if (!ch)
1757 {
1758 this->clear_value_planes_from(i*8, idx);
1759 return;
1760 }
1761 if (remap_flags_) // compressional re-mapping is in effect
1762 {
1763 auto r = remap_matrix2_.rows();
1764 if (i >= r)
1765 {
1766 remap_matrix1_.resize(i + 1, remap_matrix1_.cols(), true);
1767 remap_matrix2_.resize(i + 1, remap_matrix2_.cols(), true);
1768 }
1769 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1770 BM_ASSERT(remap_value);
1771 if (!remap_value) // unknown dictionary element
1772 {
1773 this->clear_value_planes_from(i*8, idx);
1774 return;
1775 }
1776 ch = CharType(remap_value);
1777 }
1778 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1779 } // for i
1780}
1781
1782//---------------------------------------------------------------------
1783
1784template<class CharType, class BV, unsigned STR_SIZE>
1786 size_type idx, const value_type* str)
1787{
1788 insert_value_no_null(idx, str);
1789 this->insert_null(idx, true);
1790}
1791
1792//---------------------------------------------------------------------
1793
1794template<class CharType, class BV, unsigned STR_SIZE>
1796 size_type idx, const value_type* str)
1797{
1798 for (unsigned i = 0; true; ++i)
1799 {
1800 CharType ch = str[i];
1801 if (!ch)
1802 {
1803 this->insert_clear_value_planes_from(i*8, idx);
1804 return;
1805 }
1806
1807 if (remap_flags_) // compressional re-mapping is in effect
1808 {
1809 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1810 BM_ASSERT(remap_value);
1811 if (!remap_value) // unknown dictionary element
1812 {
1813 this->insert_clear_value_planes_from(i*8, idx);
1814 return;
1815 }
1816 ch = CharType(remap_value);
1817 }
1818 this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1819 } // for i
1820}
1821
1822
1823//---------------------------------------------------------------------
1824
1825template<class CharType, class BV, unsigned STR_SIZE>
1828 size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
1829{
1830 size_type i = 0;
1831 for (; true; ++i)
1832 {
1833 if (i >= buf_size)
1834 break;
1835 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1836 str[i] = ch;
1837 if (!ch)
1838 break;
1839 }
1840 if (remap_flags_)
1841 remap_matrix1_.remap(str, i);
1842 return i;
1843}
1844
1845//---------------------------------------------------------------------
1846
1847template<class CharType, class BV, unsigned STR_SIZE>
1849 bm::word_t* temp_block,
1850 typename bvector_type::optmode opt_mode,
1852{
1853 typename bvector_type::statistics stbv;
1854 parent_type::optimize(temp_block, opt_mode, &stbv);
1855
1856 if (st)
1857 st->add(stbv);
1858}
1859
1860//---------------------------------------------------------------------
1861
1862template<class CharType, class BV, unsigned STR_SIZE>
1865 ) const BMNOEXCEPT
1866{
1867 BM_ASSERT(st);
1868 typename bvector_type::statistics stbv;
1870
1871 st->reset();
1872
1873 st->bit_blocks += stbv.bit_blocks;
1874 st->gap_blocks += stbv.gap_blocks;
1875 st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1876 st->bv_count += stbv.bv_count;
1877 st->max_serialize_mem += stbv.max_serialize_mem + 8;
1878 st->memory_used += stbv.memory_used;
1879 st->gap_cap_overhead += stbv.gap_cap_overhead;
1880
1881 size_t remap_mem_usage = sizeof(remap_flags_);
1882 remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1883 remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1884
1885 st->memory_used += remap_mem_usage;
1886 if (remap_flags_) // use of remapping requires some extra storage
1887 {
1888 st->max_serialize_mem += (remap_mem_usage * 2);
1889 }
1890}
1891
1892//---------------------------------------------------------------------
1893
1894template<class CharType, class BV, unsigned STR_SIZE>
1896 const value_type* str1, const value_type* str2) BMNOEXCEPT
1897{
1898 BM_ASSERT(str1 && str2);
1899 int res = 0;
1900 for (unsigned i = 0; true; ++i)
1901 {
1902 CharType octet2 = str2[i];
1903 CharType octet1 = str1[i];
1904 if (!octet1)
1905 {
1906 res = -octet2; // -1 || 0
1907 break;
1908 }
1909 res = (octet1 > octet2) - (octet1 < octet2);
1910 if (res || !octet2)
1911 break;
1912 } // for i
1913 return res;
1914}
1915
1916//---------------------------------------------------------------------
1917
1918template<class CharType, class BV, unsigned STR_SIZE>
1920 const value_type* str1, const value_type* str2, size_t min_len) BMNOEXCEPT
1921{
1922 BM_ASSERT(str1 && str2);
1923
1924 int res = 0;
1925 size_t i = 0;
1926
1927 CharType octet2, octet1;
1928 if (min_len >= 4)
1929 {
1930 for (; i < min_len-3; i+=4)
1931 {
1932 unsigned i2, i1;
1933 ::memcpy(&i2, &str2[i], sizeof(i2));
1934 ::memcpy(&i1, &str1[i], sizeof(i1));
1936 if (i1 != i2)
1937 {
1938 octet2 = str2[i];
1939 octet1 = str1[i];
1940 res = (octet1 > octet2) - (octet1 < octet2);
1941 if (res)
1942 return res;
1943 octet2 = str2[i+1];
1944 octet1 = str1[i+1];
1945 res = (octet1 > octet2) - (octet1 < octet2);
1946 if (res)
1947 return res;
1948 octet2 = str2[i+2];
1949 octet1 = str1[i+2];
1950 res = (octet1 > octet2) - (octet1 < octet2);
1951 if (res)
1952 return res;
1953 octet2 = str2[i+3];
1954 octet1 = str1[i+3];
1955 res = (octet1 > octet2) - (octet1 < octet2);
1956 if (res)
1957 return res;
1958 BM_ASSERT(0);
1959 break;
1960 }
1961 } // for i
1962 }
1963
1964
1965 for (; i < min_len; ++i)
1966 {
1967 octet2 = str2[i];
1968 octet1 = str1[i];
1969 BM_ASSERT(octet1 && octet2);
1970 res = (octet1 > octet2) - (octet1 < octet2);
1971 if (res)
1972 return res;
1973 } // for i
1974
1975 for (; true; ++i)
1976 {
1977 octet2 = str2[i];
1978 octet1 = str1[i];
1979 if (!octet1)
1980 {
1981 res = -octet2; // -1 || 0
1982 break;
1983 }
1984 res = (octet1 > octet2) - (octet1 < octet2);
1985 if (res || !octet2)
1986 break;
1987 } // for i
1988 return res;
1989}
1990
1991
1992//---------------------------------------------------------------------
1993
1994template<class CharType, class BV, unsigned STR_SIZE>
1996 size_type idx, const value_type* str) const BMNOEXCEPT
1997{
1998 BM_ASSERT(str);
1999 BM_ASSERT(is_remap()); // MUST guarantee remapping
2000
2001 int res = 0;
2002 for (unsigned i = 0; true; ++i)
2003 {
2004 CharType octet2 = str[i];
2005 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
2006 if (!octet1)
2007 {
2008 res = -octet2; // -1 || 0
2009 break;
2010 }
2011 const unsigned char* remap_row = remap_matrix1_.row(i);
2012 CharType remap_value1 = (CharType)remap_row[unsigned(octet1)];
2013 BM_ASSERT(remap_value1);
2014 res = (remap_value1 > octet2) - (remap_value1 < octet2);
2015 if (res || !octet2)
2016 break;
2017 } // for i
2018 return res;
2019}
2020
2021//---------------------------------------------------------------------
2022
2023template<class CharType, class BV, unsigned STR_SIZE>
2025 const value_type* str) const BMNOEXCEPT
2026{
2027 BM_ASSERT(str);
2028 BM_ASSERT(!is_remap()); // MUST guarantee remapping
2029
2030 int res = 0;
2031 for (unsigned i = 0; true; ++i)
2032 {
2033 CharType octet2 = str[i];
2034 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
2035 if (!octet1)
2036 {
2037 res = -octet2; // -1 || 0
2038 break;
2039 }
2040 res = (octet1 > octet2) - (octet1 < octet2);
2041 if (res || !octet2)
2042 break;
2043 } // for i
2044 return res;
2045}
2046
2047
2048//---------------------------------------------------------------------
2049
2050template<class CharType, class BV, unsigned STR_SIZE>
2052 size_type idx,
2053 const value_type* str) const BMNOEXCEPT
2054{
2055 BM_ASSERT(str);
2056 int res = remap_flags_ ? compare_remap(idx, str)
2057 : compare_nomap(idx, str);
2058 return res;
2059}
2060
2061//---------------------------------------------------------------------
2062
2063template<class CharType, class BV, unsigned STR_SIZE>
2065 size_type idx1,
2066 size_type idx2) const BMNOEXCEPT
2067{
2068 BM_ASSERT(idx1 < size() && idx2 < size());
2069 int res = 0;
2070 if (idx1 == idx2)
2071 return 0;
2072 if (remap_flags_)
2073 {
2074 for (unsigned i = 0; true; ++i)
2075 {
2076 // TODO: implement function to return two octets at once
2077 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
2078 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
2079 if (!octet1)
2080 {
2081 res = -octet2; // -1 || 0
2082 break;
2083 }
2084 const unsigned char* remap_row = remap_matrix1_.row(i);
2085 unsigned char remap_value1 = remap_row[unsigned(octet1)];
2086 //BM_ASSERT(remap_value1);
2087 unsigned char remap_value2 = remap_row[unsigned(octet2)];
2088 //BM_ASSERT(remap_value2);
2089 res = (remap_value1 > remap_value2) - (remap_value1 < remap_value2);
2090 if (res || !octet2)
2091 break;
2092 } // for i
2093 }
2094 else
2095 {
2096 for (unsigned i = 0; true; ++i)
2097 {
2098 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
2099 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
2100 if (!octet1)
2101 {
2102 res = -octet2; // -1 || 0
2103 break;
2104 }
2105 res = (octet1 > octet2) - (octet1 < octet2);
2106 if (res || !octet2)
2107 break;
2108 } // for i
2109 }
2110 return res;
2111
2112}
2113
2114//---------------------------------------------------------------------
2115
2116template<class CharType, class BV, unsigned STR_SIZE>
2117template<bool USE_PREFIX_BUF>
2119 size_type idx1, size_type idx2,
2120 value_type* prefix_buf) const BMNOEXCEPT
2121{
2122 BM_ASSERT (!(prefix_buf && !USE_PREFIX_BUF));
2123 unsigned i = 0;
2124 CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
2125 CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
2126 if (ch1 == ch2 && (ch1|ch2))
2127 {
2128 if constexpr(USE_PREFIX_BUF)
2129 {
2130 BM_ASSERT(prefix_buf);
2131 *prefix_buf++ = ch1;
2132 }
2133 for (++i; true; ++i)
2134 {
2135 ch1 = CharType(this->bmatr_.get_octet(idx1, i));
2136 ch2 = CharType(this->bmatr_.get_octet(idx2, i));
2137 if (ch1 != ch2 || (!(ch1|ch2))) // chs not the same or both zero
2138 return i;
2139 if constexpr(USE_PREFIX_BUF)
2140 *prefix_buf++ = ch1;
2141 } // for i
2142 }
2143 return i;
2144}
2145
2146
2147//---------------------------------------------------------------------
2148
2149template<class CharType, class BV, unsigned STR_SIZE>
2150bool
2152 size_type rank,
2153 size_type& pos) BMNOEXCEPT
2154{
2155 BM_ASSERT(rank);
2156 pos = rank - 1;
2157 return true;
2158}
2159
2160//---------------------------------------------------------------------
2161
2162template<class CharType, class BV, unsigned STR_SIZE>
2165 const BMNOEXCEPT
2166{
2167 return this->bmatr_.octet_size();
2168}
2169
2170//---------------------------------------------------------------------
2171
2172template<class CharType, class BV, unsigned STR_SIZE>
2174 octet_freq_matrix_type& octet_matrix) const
2175{
2176 size_type max_str_len = effective_max_str();
2177 octet_matrix.resize(max_str_len, 256, false);
2178 octet_matrix.set_zero(); //init(true);
2179 {
2180 const_iterator it(this);
2181 for(; it.valid(); ++it)
2182 {
2183 const value_type* s = *it; // get asciiz char*
2184 if(!s)
2185 continue;
2186 for (unsigned i = 0; true; ++i) // for each char in str
2187 {
2188 value_type ch = s[i];
2189 if (!ch)
2190 break;
2191 typename
2192 octet_freq_matrix_type::value_type* row = octet_matrix.row(i);
2193 unsigned ch_idx = (unsigned char)ch;
2194 row[ch_idx] += 1;
2195 } // for i
2196 } // for it
2197 }
2198}
2199
2200//---------------------------------------------------------------------
2201
2202template<class CharType, class BV, unsigned STR_SIZE>
2204 slice_octet_matrix_type& octet_remap_matrix1,
2205 slice_octet_matrix_type& octet_remap_matrix2,
2206 octet_freq_matrix_type& octet_occupancy_matrix) const
2207{
2208 size_type max_str_len = effective_max_str();
2209 octet_remap_matrix1.resize(max_str_len, 256, false);
2210 octet_remap_matrix1.set_zero();
2211 octet_remap_matrix2.resize(max_str_len, 256, false);
2212 octet_remap_matrix2.set_zero();
2213
2214 for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
2215 {
2216 typename octet_freq_matrix_type::value_type* frq_row =
2217 octet_occupancy_matrix.row(i);
2218
2219 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
2220 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
2221
2222 const typename slice_octet_matrix_type::size_type row_size =
2223 octet_occupancy_matrix.cols();
2224 for (unsigned remap_code = 1; true; ++remap_code)
2225 {
2226 typename octet_freq_matrix_type::size_type char_idx;
2227 bool found = bm::find_max_nz(frq_row, row_size, &char_idx);
2228 #if 0
2229 bool found = bm::find_first_nz(frq_row, row_size, &char_idx);
2230 #endif
2231 if (!found)
2232 break;
2233 BM_ASSERT(char_idx);
2234 unsigned char ch = (unsigned char)char_idx;
2235 remap_row1[remap_code] = ch;
2236 remap_row2[ch] = (unsigned char)remap_code;
2237 frq_row[char_idx] = 0; // clear the picked char
2238 } // for
2239 } // for i
2240}
2241
2242//---------------------------------------------------------------------
2243
2244template<class CharType, class BV, unsigned STR_SIZE>
2246{
2248
2249 auto rows = remap_matrix1_.rows();
2250 remap_matrix2_.resize(rows, remap_matrix1_.cols(), false);
2251 if (rows)
2252 {
2253 remap_matrix2_.set_zero();
2254 //remap_matrix2_.init(true);
2255 for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
2256 {
2257 const unsigned char* remap_row1 = remap_matrix1_.row(i);
2258 unsigned char* remap_row2 = remap_matrix2_.row(i);
2259 for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
2260 {
2261 if (remap_row1[j])
2262 {
2263 unsigned ch_code = remap_row1[j];
2264 remap_row2[ch_code] = (unsigned char)j;
2265 BM_ASSERT(ch_code < 256);
2266 }
2267 } // for j
2268 } // for i
2269 } // if rows
2270}
2271
2272//---------------------------------------------------------------------
2273
2274template<class CharType, class BV, unsigned STR_SIZE>
2276 value_type* BMRESTRICT sv_str,
2277 size_type buf_size,
2278 const value_type* BMRESTRICT str,
2279 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
2280{
2281 if (!octet_remap_matrix2.rows())
2282 return false;
2283
2284 const unsigned char* remap_row = octet_remap_matrix2.row(0);
2285 for (unsigned i = 0; i < buf_size; ++i, remap_row += 256)
2286 {
2287 CharType ch = str[i];
2288 if (!ch)
2289 {
2290 sv_str[i] = ch;
2291 break;
2292 }
2293 unsigned char remap_value = remap_row[unsigned(ch)];
2294 sv_str[i] = CharType(remap_value);
2295 if (!remap_value) // unknown dictionary element
2296 return false;
2297 } // for i
2298 return true;
2299}
2300
2301//---------------------------------------------------------------------
2302
2303template<class CharType, class BV, unsigned STR_SIZE>
2305 value_type* BMRESTRICT sv_str,
2306 value_type* BMRESTRICT str_cp,
2307 size_type buf_size,
2308 const value_type* BMRESTRICT str,
2309 size_t in_len,
2310 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
2311{
2312 BM_ASSERT(in_len <= buf_size); (void) buf_size;
2313
2314 const unsigned char* remap_row = octet_remap_matrix2.row(0);
2315 for (unsigned i = 0; i < in_len; ++i, remap_row += 256)
2316 {
2317 CharType ch = str[i];
2318 str_cp[i] = value_type(ch);
2319 BM_ASSERT(ch);
2320 unsigned char remap_value = remap_row[unsigned(ch)];
2321 sv_str[i] = CharType(remap_value);
2322 if (!remap_value) // unknown dictionary element
2323 return false;
2324 } // for i
2325 sv_str[in_len] = str_cp[in_len] = 0; // gurantee zero termination
2326 return true;
2327}
2328
2329
2330//---------------------------------------------------------------------
2331
2332template<class CharType, class BV, unsigned MAX_STR_SIZE>
2335 size_type buf_size,
2336 const value_type* BMRESTRICT sv_str,
2337 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix1
2338 ) BMNOEXCEPT
2339{
2340 const unsigned char* remap_row = octet_remap_matrix1.row(0);
2341 for (unsigned i = 0; i < buf_size; ++i, remap_row += 256)
2342 {
2343 CharType ch = sv_str[i];
2344 if (!ch)
2345 {
2346 str[i] = ch;
2347 break;
2348 }
2349 unsigned char remap_value = remap_row[unsigned(ch)];
2350 str[i] = CharType(remap_value);
2351 if (!remap_value) // unknown dictionary element
2352 return false;
2353 } // for i
2354 return true;
2355}
2356
2357//---------------------------------------------------------------------
2358
2359template<class CharType, class BV, unsigned MAX_STR_SIZE>
2361{
2363 sv_tmp(this->get_null_support());
2364 sv_tmp.remap_from_impl(*this, 0, true /*move data*/);
2365 sv_tmp.swap(*this);
2366}
2367
2368//---------------------------------------------------------------------
2369
2370template<class CharType, class BV, unsigned MAX_STR_SIZE>
2373{
2374 if (iit.remap_flags_ && iit.omatrix_.rows())
2375 {
2377 sv_tmp(this->get_null_support());
2378 sv_tmp.remap_from_impl(*this, &iit.omatrix_, true /*move data*/);
2379 sv_tmp.swap(*this);
2380 }
2381 else
2382 remap();
2383}
2384
2385//---------------------------------------------------------------------
2386
2387template<class CharType, class BV, unsigned STR_SIZE>
2388void
2390 const str_sparse_vector& str_sv,
2391 octet_freq_matrix_type* omatrix)
2392{
2393 remap_from_impl(str_sv, omatrix, false);
2394}
2395
2396//---------------------------------------------------------------------
2397
2398template<class CharType, class BV, unsigned STR_SIZE>
2399void
2401 const str_sparse_vector& str_sv,
2402 octet_freq_matrix_type* omatrix,
2403 bool move_data)
2404{
2405 const unsigned buffer_size = ins_buf_size; // bm::gap_max_bits; // 65536;
2406
2407 if (str_sv.is_remap())
2408 {
2409 *this = str_sv;
2410 return;
2411 }
2412
2414 typename
2415 bm::alloc_pool_guard<typename bvector_type::allocator_pool_type, str_sparse_vector> g1, g2;
2416 if (move_data)
2417 {
2418 str_sparse_vector& sv = const_cast<str_sparse_vector&>(str_sv);
2419 g1.assign_if_not_set(pool, *this);
2420 g2.assign_if_not_set(pool, sv);
2421
2422 auto r = sv.get_bmatrix().rows();
2423 pool.set_block_limit(r + 10);
2424 }
2425
2426 this->clear_all(true);
2427 if (str_sv.empty()) // no content to remap
2428 return;
2429
2430 octet_freq_matrix_type occ_matrix; // occupancy map
2431 if (!omatrix)
2432 {
2433 str_sv.calc_octet_stat(occ_matrix);
2434 omatrix = &occ_matrix;
2435 }
2437 remap_flags_ = 1; // turn ON remapped mode
2438
2439 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
2440
2441 size_type str_len = str_sv.effective_max_str()+1;
2442 buffer_matrix_type cmatr(buffer_size, str_len);
2443 cmatr.init(true); // init and set zero
2444
2445 for (size_type i{0}, dsize; true; i += dsize)
2446 {
2447 dsize = str_sv.decode(cmatr, i, buffer_size, true);
2448 if (!dsize)
2449 break;
2450 if (move_data && (dsize == ins_buf_size)) // free the src.vect blocks
2451 {
2452 // here const_cast is OK, because we violate cosnt-ness only
2453 // in internal safe cases controlled by the upper level call
2454 //
2455 str_sparse_vector& sv = const_cast<str_sparse_vector&>(str_sv);
2456 sv.clear_range(i, i+dsize-1, false);
2457 }
2458
2459 this->import(cmatr, i, dsize);
2460 } // for i
2461
2462 if (bvector_type* bv_null = this->get_null_bvect())
2463 {
2464 if (const bvector_type* bv_null_arg = str_sv.get_null_bvector())
2465 if (move_data)
2466 {
2467 bvector_type* bv = const_cast<bvector_type*>(bv_null_arg);
2468 bv_null->swap(*bv);
2469 }
2470 else
2471 *bv_null = *bv_null_arg;
2472 else
2473 {
2474 // TODO: exception? assert? maybe it is OK...
2475 }
2476 }
2477
2478}
2479
2480//---------------------------------------------------------------------
2481
2482template<class CharType, class BV, unsigned STR_SIZE>
2484{
2485 if (remap_flags_)
2487 this->sync_ro();
2488}
2489
2490//---------------------------------------------------------------------
2491
2492template<class CharType, class BV, unsigned STR_SIZE>
2495 bm::null_support null_able) const BMNOEXCEPT
2496{
2497 // at this point both vectors should have the same remap settings
2498 // to be considered "equal".
2499 // Strictly speaking this is incorrect, because re-map and non-remap
2500 // vectors may have the same content
2501
2502 if (remap_flags_ != sv.remap_flags_)
2503 return false;
2504 if (remap_flags_)
2505 {
2506 // TODO: equal matrix dimention overlap may be not enough
2507 // (check the non-overlap to be zero)
2508 // dimentionality shrink is a result of de-serialization
2509 bool b;
2510 b = remap_matrix1_.equal_overlap(sv.remap_matrix1_);
2511 if (!b)
2512 return b;
2513 b = remap_matrix2_.equal_overlap(sv.remap_matrix2_);
2514 if (!b)
2515 return b;
2516 }
2517 return parent_type::equal(sv, null_able);
2518}
2519
2520//---------------------------------------------------------------------
2521
2522template<class CharType, class BV, unsigned STR_SIZE>
2525 size_type left, size_type right,
2526 bm::null_support slice_null)
2527{
2528 if (left > right)
2529 bm::xor_swap(left, right);
2530 this->clear_all(true);
2531
2535
2536 this->copy_range_slices(sv, left, right, slice_null);
2537 this->resize(sv.size());
2538}
2539
2540//---------------------------------------------------------------------
2541
2542template<class CharType, class BV, unsigned STR_SIZE>
2546{
2547 size_type arg_size = str_sv.size();
2548 if (this->size_ < arg_size)
2549 resize(arg_size);
2550
2551 // there is an assumption here that we only need to copy remap flags once
2552 // because we merge matrices with the same remaps
2553 // otherwise - undefined behavior
2554 //
2555 if (remap_flags_ != str_sv.remap_flags_)
2556 {
2557 remap_flags_ = str_sv.remap_flags_;
2560 }
2561 bvector_type* bv_null = this->get_null_bvect();
2562
2563 this->merge_matr(str_sv.bmatr_);
2564
2565 // our vector is NULL-able but argument is not (assumed all values are real)
2566 if (bv_null && !str_sv.is_nullable())
2567 bv_null->set_range(0, arg_size-1);
2568
2569 return *this;
2570
2571}
2572
2573//---------------------------------------------------------------------
2574
2575template<class CharType, class BV, unsigned STR_SIZE>
2577 size_type left, size_type right,
2578 bm::null_support slice_null)
2579{
2580 if (right < left)
2581 bm::xor_swap(left, right);
2582 this->keep_range_no_check(left, right, slice_null);
2583}
2584
2585//---------------------------------------------------------------------
2586
2587template<class CharType, class BV, unsigned STR_SIZE>
2590{
2591 typedef typename
2593 return it_type(this);
2594}
2595
2596//---------------------------------------------------------------------
2597
2598template<class CharType, class BV, unsigned STR_SIZE>
2600 bool free_mem, unsigned remap) BMNOEXCEPT
2601{
2602 parent_type::clear_all(free_mem);
2603 if (remap_flags_ && (remap == 0))
2604 {
2605 remap_flags_ = 0;
2606 remap_matrix1_.free();
2607 remap_matrix2_.free();
2608 }
2609}
2610
2611//---------------------------------------------------------------------
2612
2613template<class CharType, class BV, unsigned STR_SIZE>
2615 const char* err_msg)
2616{
2617#ifndef BM_NO_STL
2618 throw std::range_error(err_msg);
2619#else
2620 BM_ASSERT_THROW(false, BM_ERR_RANGE);
2621#endif
2622}
2623
2624//---------------------------------------------------------------------
2625
2626template<class CharType, class BV, unsigned STR_SIZE>
2628 const char* err_msg)
2629{
2630#ifndef BM_NO_STL
2631 if (!err_msg)
2632 err_msg = "Unknown/incomparable dictionary character";
2633 throw std::domain_error(err_msg);
2634#else
2635 BM_ASSERT_THROW(false, BM_BAD_VALUE);
2636#endif
2637}
2638
2639
2640//---------------------------------------------------------------------
2641//
2642//---------------------------------------------------------------------
2643
2644
2645template<class CharType, class BV, unsigned STR_SIZE>
2647: sv_(0), substr_from_(0), substr_to_(STR_SIZE),
2648 pos_(bm::id_max), pos_in_buf_(~size_type(0))
2649{
2650}
2651
2652//---------------------------------------------------------------------
2653
2654template<class CharType, class BV, unsigned STR_SIZE>
2657: sv_(it.sv_),
2658 substr_from_(it.substr_from_), substr_to_(it.substr_to_),
2659 pos_(it.pos_), pos_in_buf_(~size_type(0))
2660{
2661}
2662
2663//---------------------------------------------------------------------
2664
2665template<class CharType, class BV, unsigned STR_SIZE>
2668: sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
2669{
2670 substr_from_ = 0;
2671 substr_to_ = (unsigned) sv_->effective_max_str();
2672 buf_matrix_.resize(n_rows, substr_to_+1);
2673}
2674
2675//---------------------------------------------------------------------
2676
2677template<class CharType, class BV, unsigned STR_SIZE>
2681: sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
2682{
2683 substr_from_ = 0;
2684 substr_to_ = (unsigned) sv_->effective_max_str();
2685 buf_matrix_.resize(n_rows, substr_to_+1);
2686}
2687
2688//---------------------------------------------------------------------
2689
2690template<class CharType, class BV, unsigned STR_SIZE>
2692 unsigned from, unsigned len) BMNOEXCEPT
2693{
2694 unsigned max_str = sv_->effective_max_str();
2695 substr_from_ = from;
2696 if (!len)
2697 {
2698 len = 1 + max_str - from;
2699 substr_to_ = from + len;
2700 }
2701 else
2702 {
2703 // TODO: check for overflow
2704 substr_to_ = substr_from_ + (len - 1);
2705 }
2706 if (max_str < substr_to_)
2707 substr_to_ = max_str;
2708 buf_matrix_.resize(n_rows, len+1, false/*no content copy*/);
2709}
2710
2711//---------------------------------------------------------------------
2712
2713template<class CharType, class BV, unsigned STR_SIZE>
2716{
2717 BM_ASSERT(sv_);
2718 BM_ASSERT(this->valid());
2719
2720 if (pos_in_buf_ == ~size_type(0))
2721 {
2722 if (!buf_matrix_.is_init())
2723 buf_matrix_.init();
2724 pos_in_buf_ = 0;
2725 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2726 substr_from_, substr_to_);
2727 if (!d)
2728 {
2729 pos_ = bm::id_max;
2730 return 0;
2731 }
2732 }
2733 if (is_null())
2734 return 0;
2735 return buf_matrix_.row(pos_in_buf_);
2736}
2737
2738//---------------------------------------------------------------------
2739
2740template<class CharType, class BV, unsigned STR_SIZE>
2743{
2744 BM_ASSERT(sv_);
2745 BM_ASSERT(this->valid());
2746
2747 if (pos_in_buf_ == ~size_type(0))
2748 {
2749 if (!buf_matrix_.is_init())
2750 buf_matrix_.init();
2751 pos_in_buf_ = 0;
2752 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2753 substr_from_, substr_to_);
2754 if (!d)
2755 {
2756 pos_ = bm::id_max;
2757 return string_view_type();
2758 }
2759 }
2760 if (is_null())
2761 return string_view_type();
2762 return string_view_type(buf_matrix_.row(pos_in_buf_));
2763}
2764
2765
2766//---------------------------------------------------------------------
2767
2768template<class CharType, class BV, unsigned STR_SIZE>
2769void
2772 ) BMNOEXCEPT
2773{
2774 pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
2775 pos_in_buf_ = ~size_type(0);
2776}
2777
2778//---------------------------------------------------------------------
2779
2780template<class CharType, class BV, unsigned STR_SIZE>
2781void
2783{
2784 if (pos_ == bm::id_max) // nothing to do, we are at the end
2785 return;
2786 ++pos_;
2787
2788 if (pos_ >= sv_->size())
2789 this->invalidate();
2790 else
2791 {
2792 if (pos_in_buf_ != ~size_type(0))
2793 {
2794 ++pos_in_buf_;
2795 if (pos_in_buf_ >= n_rows)
2796 pos_in_buf_ = ~size_type(0);
2797 }
2798 }
2799}
2800
2801//---------------------------------------------------------------------
2802//
2803//---------------------------------------------------------------------
2804
2805template<class CharType, class BV, unsigned STR_SIZE>
2809
2810//---------------------------------------------------------------------
2811
2812template<class CharType, class BV, unsigned STR_SIZE>
2815: sv_(sv), pos_in_buf_(~size_type(0))
2816{
2817 if (sv)
2818 {
2819 prev_nb_ = sv_->size() >> bm::set_block_shift;
2820 bv_null_ = sv_->get_null_bvect();
2821 unsigned esize = (unsigned) sv_->effective_max_str();
2822 if (esize < STR_SIZE)
2823 esize = STR_SIZE;
2824 buf_matrix_.init_resize(n_buf_size, esize);
2825 }
2826 else
2827 {
2828 bv_null_ = 0; prev_nb_ = 0;
2829 }
2830}
2831
2832//---------------------------------------------------------------------
2833
2834template<class CharType, class BV, unsigned STR_SIZE>
2837: sv_(bi.sv_), bv_null_(bi.bv_null_), buf_matrix_(bi.buf_matrix_.rows(), bi.buf_matrix_.cols()),
2838 pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_), opt_mode_(bi.opt_mode_),
2839 remap_flags_(bi.remap_flags_), omatrix_(bi.omatrix_)
2840{
2841 BM_ASSERT(bi.empty());
2842}
2843
2844//---------------------------------------------------------------------
2845
2846template<class CharType, class BV, unsigned STR_SIZE>
2851
2852//---------------------------------------------------------------------
2853
2854template<class CharType, class BV, unsigned STR_SIZE>
2855bool
2861
2862//---------------------------------------------------------------------
2863
2864template<class CharType, class BV, unsigned STR_SIZE>
2866{
2867 flush_impl();
2868 if (remap_flags_)
2869 {
2870 buf_matrix_.free();
2871 sv_->remap(*this);
2872 remap_flags_ = 0;
2873 }
2874}
2875
2876//---------------------------------------------------------------------
2877
2878template<class CharType, class BV, unsigned STR_SIZE>
2880{
2881 if (this->empty())
2882 return;
2883
2884 size_type imp_idx = sv_->size();
2885 sv_->import_no_check(buf_matrix_, imp_idx, pos_in_buf_+1, false);
2887 block_idx_type nb = sv_->size() >> bm::set_block_shift;
2888 if (nb != prev_nb_)
2889 {
2890 sv_->optimize_block(prev_nb_, opt_mode_);
2891 prev_nb_ = nb;
2892 }
2893}
2894
2895
2896//---------------------------------------------------------------------
2897
2898template<class CharType, class BV, unsigned STR_SIZE>
2901{
2902 if (!v)
2903 {
2904 this->add_null();
2905 return;
2906 }
2907 size_type buf_idx = this->pos_in_buf_; // offset in
2908 size_type sz = sv_->size();
2909
2910 this->add_value(v);
2911 if (bv_null_)
2912 bv_null_->set_bit_no_check(sz + buf_idx + 1);
2913}
2914
2915//---------------------------------------------------------------------
2916
2917template<class CharType, class BV, unsigned STR_SIZE>
2919{
2920 /*size_type buf_idx = */this->add_value("");
2921}
2922
2923//---------------------------------------------------------------------
2924
2925template<class CharType, class BV, unsigned STR_SIZE>
2928{
2929 for (size_type i = 0; i < count; ++i) // TODO: optimization
2930 this->add_value("");
2931}
2932
2933//---------------------------------------------------------------------
2934
2935
2936template<class CharType, class BV, unsigned STR_SIZE>
2937void
2940{
2942
2943 size_t slen = ::strlen(v);
2944
2945 auto orows = omatrix_.rows();
2946 if (slen > orows)
2947 {
2948 if (!orows)
2949 {
2950 omatrix_.resize(slen, 256, false);
2951 omatrix_.set_zero();
2952 }
2953 else
2954 {
2955 omatrix_.resize(slen, 256, true);
2956 for (; orows < omatrix_.rows(); ++orows)
2957 {
2958 typename
2959 octet_freq_matrix_type::value_type* r = omatrix_.row(orows);
2960 ::memset(r, 0, 256 * sizeof(r[0]));
2961 } // for orows
2962 }
2963 }
2964 for (size_t i = 0; i < slen; ++i)
2965 {
2966 value_type ch = v[i];
2967 typename
2968 octet_freq_matrix_type::value_type* row = omatrix_.row(i);
2969 unsigned ch_idx = (unsigned char)ch;
2970 row[ch_idx] += 1;
2971 } // for i
2972}
2973
2974//---------------------------------------------------------------------
2975
2976template<class CharType, class BV, unsigned STR_SIZE>
2977void
2980{
2981 BM_ASSERT(sv_);
2982 BM_ASSERT(v);
2983 BM_ASSERT(buf_matrix_.rows()>0);
2984
2985 if (pos_in_buf_ >= buf_matrix_.rows()-1)
2986 {
2987 if (pos_in_buf_ == ~size_type(0) && (!buf_matrix_.is_init()))
2988 buf_matrix_.init();
2989 else
2990 this->flush_impl();
2991 pos_in_buf_ = 0; buf_matrix_.set_zero();
2992 }
2993 else
2994 {
2995 ++pos_in_buf_;
2996 }
2997
2998 if (remap_flags_)
2999 add_remap_stat(v);
3000
3002
3003 typename buffer_matrix_type::size_type i;
3004 typename buffer_matrix_type::size_type cols = buf_matrix_.cols();
3005 for (i = 0; i < cols; ++i)
3006 {
3007 r[i] = v[i];
3008 if (!r[i])
3009 return;
3010 } // for i
3011
3012 // string is longer than the initial size, matrix resize is needed
3013 for (cols = i; true; ++cols) // find the new length
3014 {
3015 if (!v[cols])
3016 break;
3017 } // for cols
3018
3019 // cols is now string length and the new mattrix size parameter
3020 buf_matrix_.resize(buf_matrix_.rows(), cols + 1);
3021
3022 r = buf_matrix_.row(pos_in_buf_);
3023 cols = buf_matrix_.cols();
3024 for (; i < cols; ++i)
3025 {
3026 r[i] = v[i];
3027 if (!r[i])
3028 return;
3029 } // for i
3030 BM_ASSERT(0);
3031}
3032
3033//---------------------------------------------------------------------
3034
3035template<class CharType, class BV, unsigned STR_SIZE>
3037{
3038 const bvector_type* bv_null = this->get_null_bvector();
3039 if (!bv_null)
3040 return;
3041 bool found = bv_null->find_reverse(this->size_);
3042 this->size_ += found;
3043}
3044
3045//---------------------------------------------------------------------
3046
3047} // namespace
3048
3049#endif
Algorithms for bvector<> (main include).
basic bit-matrix class and utilities
Constants, lookup tables and typedefs.
Definitions(internal).
#define BMRESTRICT
Definition bmdef.h:203
#define BMNOEXCEPT
Definition bmdef.h:82
#define BM_ASSERT
Definition bmdef.h:139
#define BM_ASSERT_THROW(x, xerrcode)
Definition bmdef.h:338
#define BMNOEXCEPT2
Definition bmdef.h:85
Utilities for bit transposition (internal) (experimental!).
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
void resize(size_type new_size, bool set_null)
bm::null_support get_null_support() const BMNOEXCEPT
Definition bmbmatrix.h:444
const bvector_type * get_null_bvector() const BMNOEXCEPT
Definition bmbmatrix.h:451
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
void clear_range(size_type left, size_type right, bool set_null)
bvector_type_ptr get_create_slice(unsigned i)
bmatrix_type bmatr_
bit-transposed matrix
Definition bmbmatrix.h:701
void erase_column(size_type idx, bool erase_null)
void swap_elements(size_type idx1, size_type idx2)
Definition bmbmatrix.h:420
bool is_ro_
read-only
Definition bmbmatrix.h:705
void copy_range_slices(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type left, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type right, bm::null_support slice_null)
bool is_null(size_type idx) const BMNOEXCEPT
size_type size_
array size
Definition bmbmatrix.h:703
void bit_and_rows(const bvector_type &bv)
Definition bmbmatrix.h:672
void keep_range_no_check(size_type left, size_type right, bm::null_support slice_null)
std::make_unsigned< value_type >::type unsigned_value_type
Definition bmbmatrix.h:376
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
Definition bmbmatrix.h:553
bvector_type * get_null_bvect() BMNOEXCEPT
Definition bmbmatrix.h:525
void bit_sub_rows(const bvector_type &bv, bool use_null)
Definition bmbmatrix.h:665
void clear_all(bool free_mem=true) BMNOEXCEPT
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename bvector_type::statistics *stat=0)
void insert_null(size_type idx, bool not_null)
void clear_value_planes_from(unsigned plane_idx, size_type idx)
void calc_stat(typename bvector_type::statistics *st) const BMNOEXCEPT
bool equal(const base_sparse_vector< CharType, BV, MAX_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
void insert_clear_value_planes_from(unsigned plane_idx, size_type idx)
Basic dense bit-matrix class.
Definition bmbmatrix.h:56
size_type rows() const BMNOEXCEPT
Definition bmbmatrix.h:140
Constant iterator designed to enumerate "ON" bits.
Definition bm.h:603
optmode
Optimization mode Every next level means additional checks (better compression vs time).
Definition bm.h:133
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition bm.h:118
bvector_size_type size_type
Definition bm.h:121
void import_sorted(const size_type *ids, const size_type ids_size, bool opt_flag)
Import sorted integers (set bits).
Definition bm.h:4297
Alloc allocator_type
Definition bm.h:117
Back insert iterator implements buffered insert, faster than generic access assignment.
bvector_type * bv_null_
!< pointer on the parent vector
back_insert_iterator & operator=(const StrType &v)
push value to the vector
void add(const value_type *v)
add value to the container
back_insert_iterator & operator++(int)
noop
unsigned get_remap() const BMNOEXCEPT
Get curent remap state flags.
octet_freq_matrix_type omatrix_
octet frequency matrix
void flush()
flush the accumulated buffer.
back_insert_iterator & operator=(const value_type *v)
push value to the vector
back_insert_iterator & operator++()
noop
buffer_matrix_type buf_matrix_
!< not NULL vector pointer
str_sparse_vector_type * str_sparse_vector_type_ptr
bvector_type::block_idx_type block_idx_type
str_sparse_vector_type::value_type value_type
str_sparse_vector_type::size_type size_type
void add_null()
add NULL (no-value) to the container
void set_optimize(typename bvector_type::optmode opt_mode) BMNOEXCEPT
Set optimization on load option (deafult: false).
bvector_type::allocator_type allocator_type
bvector_type::optmode opt_mode_
!< previous block added
void set_remap(bool flag) BMNOEXCEPT
Method to configure back inserter to collect statistics on optimal character codes.
const octet_freq_matrix_type & get_octet_matrix() const noexcept
Get octet frequence matrix.
block_idx_type prev_nb_
!< buffer position
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
void add_null(size_type count)
add a series of consequitve NULLs (no-value) to the container
allocator_type::allocator_pool_type allocator_pool_type
str_sparse_vector_type::bvector_type bvector_type
Const iterator to do quick traverse of the sparse vector.
str_sparse_vector_type * str_sparse_vector_type_ptr
std::input_iterator_tag iterator_category
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator<(const const_iterator &it) const BMNOEXCEPT
const value_type * operator*() const
Get current position (value).
bool operator!=(const const_iterator &it) const BMNOEXCEPT
dynamic_heap_matrix< CharType, allocator_type > buffer_matrix_type
bool is_null() const BMNOEXCEPT
Get NULL status.
const value_type * value() const
Get zero terminated string value at the current position.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type allocator_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
void advance() BMNOEXCEPT
advance iterator forward by one
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
const_iterator() BMNOEXCEPT
Construct iterator (not attached to any particular vector).
bool operator<=(const const_iterator &it) const BMNOEXCEPT
str_sparse_vector_type::bvector_type bvector_type
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
str_sparse_vector_type::size_type size_type
str_sparse_vector_type::value_type value_type
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
string_view_type get_string_view() const
Get current string as string_view.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
std::basic_string_view< CharType > string_view_type
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
const_reference(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
const value_type * get() const BMNOEXCEPT
bool operator==(const const_reference &ref) const BMNOEXCEPT
bm::heap_vector< CharType, typename bvector_type::allocator_type, true > bufffer_type
reference & operator=(const reference &ref)
reference(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
bool is_null() const BMNOEXCEPT
reference & operator=(const value_type *str)
const value_type * get() const BMNOEXCEPT
bool operator==(const reference &ref) const BMNOEXCEPT
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void insert(size_type idx, const value_type *str)
insert the specified element
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
void calc_octet_stat(octet_freq_matrix_type &octet_matrix) const
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< char, bm::bvector<>, STR_SIZE >::statistics *stat=0)
int compare(size_type idx1, size_type idx2) const BMNOEXCEPT
Compare two vector elements.
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
void resize(size_type sz)
resize vector
void calc_stat(struct str_sparse_vector< char, bm::bvector<>, STR_SIZE >::statistics *st) const BMNOEXCEPT
bool empty() const
return true if vector is empty
static size_type max_str()
get maximum string length capacity
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
unsigned char * init_remap_buffer()
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
void remap_from_impl(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix, bool move_data)
Remap from implementation, please note that move_data flag can violate cosnt-ness.
void sync(bool force)
syncronize internal structures
str_sparse_vector< CharType, BV, STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all planes)
const bvector_type * bvector_type_const_ptr
str_sparse_vector< CharType, BV, STR_SIZE > & merge(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
merge with another sparse vector using OR operation Merge is different from join(),...
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
bm::dynamic_heap_matrix< unsigned char, allocator_type > slice_octet_matrix_type
Matrix of character remappings.
bool equal(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
void remap_from(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix=0)
Build remapping profile and load content from another sparse vector Remapped vector likely saves memo...
size_type size() const
return size of the vector
int compare_nomap(size_type idx, const value_type *str) const BMNOEXCEPT
void build_octet_remap(slice_octet_matrix_type &octet_remap_matrix1, slice_octet_matrix_type &octet_remap_matrix2, octet_freq_matrix_type &octet_occupancy_matrix) const
const remap_matrix_type * get_remap_matrix() const
void keep(const bvector_type &bv_idx)
Set NULL all elements NOT set as 1 in the argument vector.
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content.
void import_char_slice(const unsigned_value_type *ch_slice, unsigned ch_acc, size_type char_slice_idx, size_type idx_from, size_type imp_size)
bm::basic_bmatrix< BV > bmatrix_type
unsigned common_prefix_length(size_type idx1, size_type idx2, value_type *prefix_buf=0) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
bool is_remap() const BMNOEXCEPT
Get character remapping status (true | false).
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
void clear_all(bool free_mem, unsigned remap=0) BMNOEXCEPT
remap_matrix_type * get_remap_matrix()
void push_back(const StrType &str)
push back a string
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
bvector_type::allocation_policy allocation_policy_type
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT ...
BV::allocator_type allocator_type
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
bm::dynamic_heap_matrix< size_t, allocator_type > octet_freq_matrix_type
Matrix of character frequencies (for optimal code remap).
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
parent_type::unsigned_value_type unsigned_value_type
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
static int compare_str(const value_type *str1, const value_type *str2) BMNOEXCEPT
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
void resize_internal(size_type sz)
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
str_sparse_vector(const str_sparse_vector &str_sv)
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bool remap_n_tosv_2way(value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len) const BMNOEXCEPT
bool try_get(size_type idx, StrType &str) const
get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<...
void erase(size_type idx)
erase the specified element
int compare_remap(size_type idx, const value_type *str) const BMNOEXCEPT
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
bvector_type * bvector_type_ptr
const unsigned char * get_remap_buffer() const
str_sparse_vector(str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
void insert(size_type idx, const StrType &str)
insert STL string
static void throw_bad_value(const char *err_msg)
throw domain error
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
reference operator[](size_type idx)
Operator to get write access to an element.
static constexpr bool is_str() BMNOEXCEPT
void push_back(const value_type *str)
push back a string (zero terminated)
size_type decode_substr(CharMatrix &cmatr, size_type idx_from, size_type dec_size, unsigned substr_from, unsigned substr_to, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
base_sparse_vector< CharType, BV, STR_SIZE > parent_type
void push_back_null()
push back NULL value
void remap(back_insert_iterator &iit)
reamp using statistics table from inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
str_sparse_vector< CharType, BV, STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
slice_octet_matrix_type remap_matrix_type
bvector_type::enumerator bvector_enumerator_type
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
void push_back_null(size_type count)
push back specified amount of NULL values
void copy_range(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, size_type left, size_type right, bm::null_support slice_null=bm::use_null)
copy range of values from another sparse vector
size_t remap_size() const
static bool remap_n_tosv_2way(value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
static void throw_range_error(const char *err_msg)
throw range error
void swap(size_type idx1, size_type idx2)
swap two vector elements between each other
size_type size_internal() const
str_sparse_vector(const str_sparse_vector &str_sv, bm::remap_setup remap_mode)
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
const const_reference operator[](size_type idx) const
Operator to get read access to an element.
void keep_range(size_type left, size_type right, bm::null_support slice_null=bm::use_null)
Keep only specified interval in the sparse vector, clear all other elements.
static int compare_str(const value_type *str1, const value_type *str2, size_t min_len) BMNOEXCEPT
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::size_type size_type
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition bmutil.h:582
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
Definition bmfunc.h:595
null_support
NULL-able value support.
Definition bmconst.h:229
@ use_null
support "non-assigned" or "NULL" logic
Definition bmconst.h:230
@ no_null
do not support NULL values
Definition bmconst.h:231
Definition bm.h:78
const unsigned id_max
Definition bmconst.h:109
bool find_first_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
Definition bmfunc.h:10135
unsigned int word_t
Definition bmconst.h:39
int for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
BMFORCEINLINE bool has_zero_byte_u64(bm::id64_t v) BMNOEXCEPT
Returns true if INT64 contains 0 octet.
Definition bmutil.h:571
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition bmutil.h:534
unsigned long long int id64_t
Definition bmconst.h:35
@ COPY_RTABLES
copy remap tables only (without data)
bool find_max_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
Definition bmfunc.h:10114
const unsigned gap_max_bits
Definition bmconst.h:81
const unsigned set_block_shift
Definition bmconst.h:56
size_t gap_cap_overhead
gap memory overhead between length and capacity
Definition bmfunc.h:63
size_t ptr_sub_blocks
Number of sub-blocks.
Definition bmfunc.h:59
bv_statistics() BMNOEXCEPT
Definition bmfunc.h:67
size_t gap_blocks
Number of GAP blocks.
Definition bmfunc.h:58
size_t bit_blocks
Number of bit blocks.
Definition bmfunc.h:57
size_t bv_count
Number of bit-vectors.
Definition bmfunc.h:60
size_t max_serialize_mem
estimated maximum memory for serialization
Definition bmfunc.h:61
size_t memory_used
memory usage for all blocks and service tables
Definition bmfunc.h:62
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Definition bmfunc.h:103
memory allocation policy
Definition bm.h:805
Statistical information about bitset's memory allocation details.
Definition bm.h:125
bool is_remap
Definition xsample05.cpp:93