BitMagic-C++
bm::str_sparse_vector< CharType, BV, STR_SIZE > Class Template Reference

succinct sparse vector for strings with compression using bit-slicing ( transposition) method More...

#include <bmstrsparsevec.h>

Inheritance diagram for bm::str_sparse_vector< CharType, BV, STR_SIZE >:
Collaboration diagram for bm::str_sparse_vector< CharType, BV, STR_SIZE >:

Data Structures

struct  statistics
struct  is_remap_support
struct  is_rsc_support
struct  is_dynamic_splices
class  reference_base
class  const_reference
 Reference class to access elements via common [] operator. More...
class  reference
 Reference class to access elements via common [] operator. More...
class  const_iterator
 Const iterator to do quick traverse of the sparse vector. More...
class  back_insert_iterator
 Back insert iterator implements buffered insert, faster than generic access assignment. More...

Public Types

enum  octet_slices { sv_octet_slices = STR_SIZE }
typedef BV bvector_type
typedef bvector_typebvector_type_ptr
typedef const bvector_typebvector_type_const_ptr
typedef CharType value_type
typedef CharType * value_type_prt
typedef bvector_type::size_type size_type
typedef BV::allocator_type allocator_type
typedef bvector_type::allocation_policy allocation_policy_type
typedef bvector_type::enumerator bvector_enumerator_type
typedef allocator_type::allocator_pool_type allocator_pool_type
typedef bm::basic_bmatrix< BV > bmatrix_type
typedef base_sparse_vector< CharType, BV, STR_SIZE > parent_type
typedef parent_type::unsigned_value_type unsigned_value_type
typedef bm::dynamic_heap_matrix< unsigned char, allocator_typeslice_octet_matrix_type
 Matrix of character remappings.
typedef slice_octet_matrix_type remap_matrix_type
typedef bm::dynamic_heap_matrix< size_t, allocator_typeoctet_freq_matrix_type
 Matrix of character frequencies (for optimal code remap).
Public Types inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
enum  bit_planes
enum  vector_capacity
typedef CharType value_type
typedef BV bvector_type
typedef BV::size_type size_type
typedef bvector_typebvector_type_ptr
typedef const bvector_typebvector_type_const_ptr
typedef const value_typeconst_reference
typedef BV::allocator_type allocator_type
typedef bvector_type::allocation_policy allocation_policy_type
typedef bvector_type::enumerator bvector_enumerator_type
typedef allocator_type::allocator_pool_type allocator_pool_type
typedef bm::basic_bmatrix< BV > bmatrix_type
typedef std::make_unsigned< value_type >::type unsigned_value_type

Public Member Functions

 str_sparse_vector (bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
 Sparse vector constructor.
 str_sparse_vector (const str_sparse_vector &str_sv)
 str_sparse_vector (const str_sparse_vector &str_sv, bm::remap_setup remap_mode)
str_sparse_vector< CharType, BV, STR_SIZE > & operator= (const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
 str_sparse_vector (str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
str_sparse_vector< CharType, BV, STR_SIZE > & operator= (str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
void sync (bool force)
 syncronize internal structures
bool equal (const str_sparse_vector< CharType, BV, STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
 check if another sparse vector has the same content and size
size_type effective_size () const BMNOEXCEPT
 size of sparse vector (may be different for RSC)
String element access
const const_reference operator[] (size_type idx) const
 Operator to get read access to an element.
reference operator[] (size_type idx)
 Operator to get write access to an element.
void set (size_type idx, const value_type *str)
 set specified element with bounds checking and automatic resize
void set_null (size_type idx)
 set NULL status for the specified element Vector is resized automatically
void set_null (const bvector_type &bv_idx)
 Set NULL all elements set as 1 in the argument vector.
void clear (const bvector_type &bv_idx)
 Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT going to tuned to NULL (NULL qualifier is preserved).
void keep (const bvector_type &bv_idx)
 Set NULL all elements NOT set as 1 in the argument vector.
void insert (size_type idx, const value_type *str)
 insert the specified element
void swap (size_type idx1, size_type idx2)
 swap two vector elements between each other
template<typename StrType>
void insert (size_type idx, const StrType &str)
 insert STL string
void erase (size_type idx)
 erase the specified element
size_type get (size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
 get specified element
template<typename StrType>
void assign (size_type idx, const StrType &str)
 set specified element with bounds checking and automatic resize
template<typename StrType>
void push_back (const StrType &str)
 push back a string
void push_back (const value_type *str)
 push back a string (zero terminated)
void push_back_null (size_type count)
 push back specified amount of NULL values
void push_back_null ()
 push back NULL value
template<typename StrType>
bool try_get (size_type idx, StrType &str) const
 get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<>
template<typename StrType>
void get (size_type idx, StrType &str) const
 get specified string element Template method expects an STL-compatible type basic_string<>
void swap (str_sparse_vector &str_sv) BMNOEXCEPT
Clear
void clear_all (bool free_mem, unsigned remap=0) BMNOEXCEPT
 resize to zero, free memory
void clear () BMNOEXCEPT
 resize to zero, free memory, reset remapping
str_sparse_vector< CharType, BV, STR_SIZE > & clear_range (size_type left, size_type right, bool set_null=false)
 clear range (assign bit 0 for all planes)
Memory optimization/compression
void optimize (bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
 run memory optimization for all vector planes
void calc_stat (struct str_sparse_vector< CharType, BV, STR_SIZE >::statistics *st) const BMNOEXCEPT
 Calculates memory statistics.
void freeze ()
 Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faster searches.
bool is_ro () const BMNOEXCEPT
 Returns true if vector is read-only.
Iterator access
const_iterator begin () const BMNOEXCEPT
 Provide const iterator access to container content.
const_iterator end () const BMNOEXCEPT
 Provide const iterator access to the end.
const_iterator get_const_iterator (size_type idx) const BMNOEXCEPT
 Get const_itertor re-positioned to specific element.
back_insert_iterator get_back_inserter ()
 Provide back insert iterator Back insert iterator implements buffered insertion, which is faster, than random access or push_back.
Export content to C-style
template<typename CharMatrix>
size_type decode (CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
 Bulk export strings to a C-style matrix of chars.
template<typename CharMatrix>
size_type decode_substr (CharMatrix &cmatr, size_type idx_from, size_type dec_size, unsigned substr_from, unsigned substr_to, bool zero_mem=true) const
 Bulk export strings to a C-style matrix of chars.
template<typename CharMatrix>
void import (CharMatrix &cmatr, size_type idx_from, size_type imp_size)
 Bulk import of strings from a C-style matrix of chars.
template<typename CharMatrix>
void import_back (CharMatrix &cmatr, size_type imp_size)
 Bulk push-back import of strings from a C-style matrix of chars.
Merge, split, partition data
void copy_range (const str_sparse_vector< CharType, BV, STR_SIZE > &sv, size_type left, size_type right, bm::null_support slice_null=bm::use_null)
 copy range of values from another sparse vector
str_sparse_vector< CharType, BV, STR_SIZE > & merge (str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
 merge with another sparse vector using OR operation Merge is different from join(), because it borrows data from the source vector, so it gets modified (destructive join)
void keep_range (size_type left, size_type right, bm::null_support slice_null=bm::use_null)
 Keep only specified interval in the sparse vector, clear all other elements.
Public Member Functions inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
 base_sparse_vector ()
 base_sparse_vector (bm::null_support null_able, bool is_dynamic, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
 base_sparse_vector (const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
 base_sparse_vector (base_sparse_vector< CharType, BV, MAX_SIZE > &&bsv) BMNOEXCEPT
void swap (base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
size_type size () const BMNOEXCEPT
void resize (size_type new_size, bool set_null)
void clear_range (size_type left, size_type right, bool set_null)
void keep_range_no_check (size_type left, size_type right, bm::null_support slice_null)
void clear_all (bool free_mem=true) BMNOEXCEPT
 resize to zero, free memory
bool empty () const BMNOEXCEPT
void swap_elements (size_type idx1, size_type idx2)
 swap two vector elements
void optimize (bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename bvector_type::statistics *stat=0)
 run memory optimization for all bit-vector rows
void calc_stat (typename bvector_type::statistics *st) const BMNOEXCEPT
 Calculates memory statistics.
bool equal (const base_sparse_vector< CharType, BV, MAX_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
 check if another sparse vector has the same content and size
bool is_nullable () const BMNOEXCEPT
 check if container supports NULL(unassigned) values
bm::null_support get_null_support () const BMNOEXCEPT
 check if container supports NULL (unassigned) values
const bvector_typeget_null_bvector () const BMNOEXCEPT
 Get bit-vector of assigned values or NULL (if not constructed that way).
bool is_null (size_type idx) const BMNOEXCEPT
 test if specified element is NULL
void set_allocator_pool (allocator_pool_type *pool_ptr) BMNOEXCEPT
 Set allocation pool.
allocator_pool_typeget_allocator_pool () const BMNOEXCEPT
 Get allocation pool.
bvector_type_ptr get_create_slice (unsigned i)
 get access to bit-plain, function checks and creates a plane
bvector_type_const_ptr get_slice (unsigned i) const BMNOEXCEPT
 get read-only access to bit-plane
unsigned effective_slices () const BMNOEXCEPT
 Number of effective bit-planes in the value type.
bvector_type_ptr slice (unsigned i) BMNOEXCEPT
 get access to bit-plane as is (can return NULL)
bvector_type_const_ptr slice (unsigned i) const BMNOEXCEPT
bvector_typeget_null_bvect () BMNOEXCEPT
void free_slice (unsigned i)
 free memory in bit-plane
bm::id64_t get_slice_mask (unsigned element_idx) const BMNOEXCEPT
const bmatrix_typeget_bmatrix () const BMNOEXCEPT
bmatrix_typeget_bmatrix () BMNOEXCEPT
 access to internal bit-matrix
void mark_null_idx (unsigned null_idx) BMNOEXCEPT
 Set NULL plain index.

Static Public Member Functions

static bool find_rank (size_type rank, size_type &pos) BMNOEXCEPT
 find position of compressed element by its rank
Various traits
static constexpr bool is_compressed () BMNOEXCEPT
 various type traits
static constexpr bool is_str () BMNOEXCEPT
static constexpr bool is_signed () BMNOEXCEPT
 returns true if value type is signed integral type
static unsigned slices () BMNOEXCEPT
 get total number of bit-planes in the vector
static unsigned stored_slices () BMNOEXCEPT
 Number of stored bit-planes (value planes + extra.
static unsigned_value_type s2u (value_type v) BMNOEXCEPT
 Convert signed value type to unsigned representation.
static value_type u2s (unsigned_value_type v) BMNOEXCEPT
 Convert unsigned value type to signed representation.

Protected Types

enum  insert_buf_size_e { ins_buf_size = bm::gap_max_bits }
Protected Types inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
typedef bvector_type::block_idx_type block_idx_type

Protected Member Functions

template<typename CharMatrix, size_t BufSize = ins_buf_size>
void import_no_check (CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
template<size_t BufSize = ins_buf_size>
void import_char_slice (const unsigned_value_type *ch_slice, unsigned ch_acc, size_type char_slice_idx, size_type idx_from, size_type imp_size)
void set_value (size_type idx, const value_type *str)
 set value without checking boundaries
void set_value_no_null (size_type idx, const value_type *str)
 set value without checking boundaries or support of NULL
void insert_value (size_type idx, const value_type *str)
 insert value without checking boundaries
void insert_value_no_null (size_type idx, const value_type *str)
 insert value without checking boundaries or support of NULL
size_type size_internal () const
void resize_internal (size_type sz)
size_t remap_size () const
const unsigned char * get_remap_buffer () const
unsigned char * init_remap_buffer ()
void set_remap ()
bool resolve_range (size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
const remap_matrix_typeget_remap_matrix () const
remap_matrix_typeget_remap_matrix ()
void remap (back_insert_iterator &iit)
 reamp using statistics table from inserter
void remap_from_impl (const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix, bool move_data)
 Remap from implementation, please note that move_data flag can violate cosnt-ness.
Protected Member Functions inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
void copy_from (const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
void merge_matr (bmatrix_type &bmatr)
 Merge plane bvectors from an outside base matrix Note: outside base matrix gets destroyed.
void freeze_matr ()
 Turn on RO mode.
void clear_value_planes_from (unsigned plane_idx, size_type idx)
void insert_clear_value_planes_from (unsigned plane_idx, size_type idx)
void erase_column (size_type idx, bool erase_null)
void insert_null (size_type idx, bool not_null)
void bit_sub_rows (const bvector_type &bv, bool use_null)
 Set SUB (MINUS) operation on all existing bit-slices.
void bit_and_rows (const bvector_type &bv)
 Set AND (intersect) operation on all existing bit-slices.
void optimize_block (block_idx_type nb, typename BV::optmode opt_mode)
 plane index for the "NOT NULL" flags plane
void sync_ro () BMNOEXCEPT
 Sybc read-only state.
void copy_range_slices (const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type left, typename base_sparse_vector< CharType, BV, MAX_SIZE >::size_type right, bm::null_support slice_null)
 Perform copy_range() on a set of planes.

Static Protected Member Functions

Errors and exceptions
static void throw_range_error (const char *err_msg)
 throw range error
static void throw_bad_value (const char *err_msg)
 throw domain error
Static Protected Member Functions inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
static constexpr unsigned value_bits () BMNOEXCEPT
 Number of total bit-planes in the value type.

Protected Attributes

unsigned remap_flags_
 remapping status
slice_octet_matrix_type remap_matrix1_
 octet remap table 1
slice_octet_matrix_type remap_matrix2_
 octet remap table 2
Protected Attributes inherited from bm::base_sparse_vector< CharType, BV, STR_SIZE >
bmatrix_type bmatr_
 bit-transposed matrix
unsigned_value_type slice_mask_
 slice presence bit-mask
size_type size_
 array size
unsigned effective_slices_
 number of bit slices actually allocated
bool is_ro_
 read-only

Friends

template<class SVect>
class sparse_vector_serializer
template<class SVect>
class sparse_vector_deserializer

Element comparison functions

int compare (size_type idx, const value_type *str) const BMNOEXCEPT
 Compare vector element with argument lexicographically.
int compare (size_type idx1, size_type idx2) const BMNOEXCEPT
 Compare two vector elements.
template<bool USE_PREFIX_BUF = false>
unsigned common_prefix_length (size_type idx1, size_type idx2, value_type *prefix_buf=0) const BMNOEXCEPT
 Find size of common prefix between two vector elements in octets.
int compare_remap (size_type idx, const value_type *str) const BMNOEXCEPT
 Variant of compare for remapped vectors.
int compare_nomap (size_type idx, const value_type *str) const BMNOEXCEPT
 Variant of compare for non-mapped vectors.
static int compare_str (const value_type *str1, const value_type *str2) BMNOEXCEPT
static int compare_str (const value_type *str1, const value_type *str2, size_t min_len) BMNOEXCEPT

Size, etc

size_type size () const
 return size of the vector
bool empty () const
 return true if vector is empty
void resize (size_type sz)
 resize vector
size_type effective_max_str () const BMNOEXCEPT
 get effective string length used in vector Calculate and returns efficiency, how close are we to the reserved maximum.
size_type effective_vector_max () const
 get effective string length used in vector
void sync_size () BMNOEXCEPT
 recalculate size to exclude tail NULL elements After this call size() will return the true size of the vector
static size_type max_str ()
 get maximum string length capacity

Char remapping, succinct utilities

Remapping runs character usage analysis (frequency analysis) based on that implements reduction of dit-depth thus improves search performance and memory usage (both RAM and serialized).

Remapping limits farther modifications of sparse vector. (Use remapped vector as read-only).

bool is_remap () const BMNOEXCEPT
 Get character remapping status (true | false).
void remap_from (const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix=0)
 Build remapping profile and load content from another sparse vector Remapped vector likely saves memory (both RAM and disk) but should not be modified (should be read-only).
void remap ()
 Build remapping profile and re-load content to save memory.
void calc_octet_stat (octet_freq_matrix_type &octet_matrix) const
void build_octet_remap (slice_octet_matrix_type &octet_remap_matrix1, slice_octet_matrix_type &octet_remap_matrix2, octet_freq_matrix_type &octet_occupancy_matrix) const
bool remap_tosv (value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bool remap_n_tosv_2way (value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len) const BMNOEXCEPT
void recalc_remap_matrix2 ()
static bool remap_tosv (value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
static bool remap_n_tosv_2way (value_type *BMRESTRICT sv_str, value_type *BMRESTRICT str_cp, size_type buf_size, const value_type *BMRESTRICT str, size_t in_len, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
static bool remap_fromsv (value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT

Detailed Description

template<typename CharType, typename BV, unsigned STR_SIZE>
class bm::str_sparse_vector< CharType, BV, STR_SIZE >

succinct sparse vector for strings with compression using bit-slicing ( transposition) method

Initial string is bit-transposed into bit-slices so collection may use less memory due to prefix sum (GAP) compression in bit-slices. In addition, the container can use chracter re-mapping using char freaquencies to compute the minimal codes. Re-mapping can reduce memory footprint, get better search performance and improve storage compression.

Template parameters: CharType - type of character (char or unsigned char) (wchar not tested) BV - bit-vector for bit-slicing STR_SIZE - initial string size (can dynamically increase on usage)

Examples
strsvsample01.cpp, strsvsample02.cpp, strsvsample02a.cpp, strsvsample03.cpp, strsvsample04.cpp, strsvsample05.cpp, strsvsample06.cpp, strsvsample07.cpp, strsvsample08.cpp, strsvsample09.cpp, and xsample05.cpp.

Definition at line 71 of file bmstrsparsevec.h.

Member Typedef Documentation

◆ allocation_policy_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bvector_type::allocation_policy bm::str_sparse_vector< CharType, BV, STR_SIZE >::allocation_policy_type

Definition at line 81 of file bmstrsparsevec.h.

◆ allocator_pool_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef allocator_type::allocator_pool_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::allocator_pool_type

Definition at line 83 of file bmstrsparsevec.h.

◆ allocator_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef BV::allocator_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::allocator_type

Definition at line 80 of file bmstrsparsevec.h.

◆ bmatrix_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bm::basic_bmatrix<BV> bm::str_sparse_vector< CharType, BV, STR_SIZE >::bmatrix_type

Definition at line 84 of file bmstrsparsevec.h.

◆ bvector_enumerator_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bvector_type::enumerator bm::str_sparse_vector< CharType, BV, STR_SIZE >::bvector_enumerator_type

Definition at line 82 of file bmstrsparsevec.h.

◆ bvector_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef BV bm::str_sparse_vector< CharType, BV, STR_SIZE >::bvector_type

Definition at line 74 of file bmstrsparsevec.h.

◆ bvector_type_const_ptr

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef const bvector_type* bm::str_sparse_vector< CharType, BV, STR_SIZE >::bvector_type_const_ptr

Definition at line 76 of file bmstrsparsevec.h.

◆ bvector_type_ptr

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bvector_type* bm::str_sparse_vector< CharType, BV, STR_SIZE >::bvector_type_ptr

Definition at line 75 of file bmstrsparsevec.h.

◆ octet_freq_matrix_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bm::dynamic_heap_matrix<size_t, allocator_type> bm::str_sparse_vector< CharType, BV, STR_SIZE >::octet_freq_matrix_type

Matrix of character frequencies (for optimal code remap).

Definition at line 108 of file bmstrsparsevec.h.

◆ parent_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef base_sparse_vector<CharType, BV, STR_SIZE> bm::str_sparse_vector< CharType, BV, STR_SIZE >::parent_type

Definition at line 85 of file bmstrsparsevec.h.

◆ remap_matrix_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef slice_octet_matrix_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_matrix_type

Definition at line 102 of file bmstrsparsevec.h.

◆ size_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bvector_type::size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::size_type

Definition at line 79 of file bmstrsparsevec.h.

◆ slice_octet_matrix_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef bm::dynamic_heap_matrix<unsigned char, allocator_type> bm::str_sparse_vector< CharType, BV, STR_SIZE >::slice_octet_matrix_type

Matrix of character remappings.

Definition at line 101 of file bmstrsparsevec.h.

◆ unsigned_value_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef parent_type::unsigned_value_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::unsigned_value_type

Definition at line 86 of file bmstrsparsevec.h.

◆ value_type

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef CharType bm::str_sparse_vector< CharType, BV, STR_SIZE >::value_type

Definition at line 77 of file bmstrsparsevec.h.

◆ value_type_prt

template<typename CharType, typename BV, unsigned STR_SIZE>
typedef CharType* bm::str_sparse_vector< CharType, BV, STR_SIZE >::value_type_prt

Definition at line 78 of file bmstrsparsevec.h.

Member Enumeration Documentation

◆ insert_buf_size_e

template<typename CharType, typename BV, unsigned STR_SIZE>
enum bm::str_sparse_vector::insert_buf_size_e
protected
Enumerator
ins_buf_size 

Definition at line 1372 of file bmstrsparsevec.h.

◆ octet_slices

template<typename CharType, typename BV, unsigned STR_SIZE>
enum bm::str_sparse_vector::octet_slices
Enumerator
sv_octet_slices 

Definition at line 92 of file bmstrsparsevec.h.

Constructor & Destructor Documentation

◆ str_sparse_vector() [1/4]

template<class CharType, class BV, unsigned STR_SIZE>
bm::str_sparse_vector< CharType, BV, STR_SIZE >::str_sparse_vector ( bm::null_support null_able = bm::no_null,
allocation_policy_type ap = allocation_policy_type(),
size_type bv_max_size = bm::id_max,
const allocator_type & alloc = allocator_type() )

Sparse vector constructor.

Parameters
null_able- defines if vector supports NULL values flag by default it is OFF, use bm::use_null to enable it
ap- allocation strategy for underlying bit-vectors Default allocation policy uses BM_BIT setting (fastest access)
bv_max_size- maximum possible size of underlying bit-vectors Please note, this is NOT size of svector itself, it is dynamic upper limit which should be used very carefully if we surely know the ultimate size
alloc- allocator for bit-vectors
See also
bvector<>
bm::bvector<>::allocation_policy
bm::startegy

Definition at line 1607 of file bmstrsparsevec.h.

References remap_flags_.

Referenced by bm::str_sparse_vector< CharType, BV, STR_SIZE >::const_reference::const_reference(), copy_range(), equal(), merge(), bm::str_sparse_vector< CharType, BV, STR_SIZE >::reference::reference(), remap(), remap(), remap_from(), remap_from_impl(), str_sparse_vector(), str_sparse_vector(), and swap().

◆ str_sparse_vector() [2/4]

template<class CharType, class BV, unsigned STR_SIZE>
bm::str_sparse_vector< CharType, BV, STR_SIZE >::str_sparse_vector ( const str_sparse_vector< CharType, BV, STR_SIZE > & str_sv)

copy-ctor

Definition at line 1623 of file bmstrsparsevec.h.

References remap_flags_, remap_matrix1_, remap_matrix2_, and str_sparse_vector().

◆ str_sparse_vector() [3/4]

template<class CharType, class BV, unsigned STR_SIZE>
bm::str_sparse_vector< CharType, BV, STR_SIZE >::str_sparse_vector ( const str_sparse_vector< CharType, BV, STR_SIZE > & str_sv,
bm::remap_setup remap_mode )

construct empty sparse vector, copying the remap tables from another vector

Parameters
str_sv- source vector to take the remap tables from (assumed to be remaped)
remap_mode- remap table copy param

Definition at line 1637 of file bmstrsparsevec.h.

References BM_ASSERT, bm::COPY_RTABLES, bm::base_sparse_vector< CharType, BV, STR_SIZE >::get_null_support(), remap_flags_, remap_matrix1_, remap_matrix2_, and str_sparse_vector().

◆ str_sparse_vector() [4/4]

template<typename CharType, typename BV, unsigned STR_SIZE>
bm::str_sparse_vector< CharType, BV, STR_SIZE >::str_sparse_vector ( str_sparse_vector< CharType, BV, STR_SIZE > && str_sv)
inline

move-ctor

Definition at line 523 of file bmstrsparsevec.h.

Member Function Documentation

◆ assign()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename StrType>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::assign ( size_type idx,
const StrType & str )
inline

set specified element with bounds checking and automatic resize

This is an equivalent of set() method, but templetized to be more compatible with the STL std::string and the likes

Parameters
idx- element index (vector auto-resized if needs to)
str- input string expected an STL class with size() support, like basic_string<> or vector<char>

Definition at line 648 of file bmstrsparsevec.h.

Referenced by main(), and bm::str_sparse_vector< char, bm::bvector<>, 64 >::push_back().

◆ begin()

template<class CharType, class BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE >::const_iterator bm::str_sparse_vector< CharType, BV, STR_SIZE >::begin ( ) const

Provide const iterator access to container content.

Definition at line 2589 of file bmstrsparsevec.h.

References BMNOEXCEPT.

Referenced by main().

◆ build_octet_remap()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::build_octet_remap ( slice_octet_matrix_type & octet_remap_matrix1,
slice_octet_matrix_type & octet_remap_matrix2,
octet_freq_matrix_type & octet_occupancy_matrix ) const

Compute optimal remap codes

Definition at line 2203 of file bmstrsparsevec.h.

References BM_ASSERT, effective_max_str(), bm::find_first_nz(), and bm::find_max_nz().

Referenced by remap_from_impl().

◆ calc_octet_stat()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::calc_octet_stat ( octet_freq_matrix_type & octet_matrix) const

Calculate flags which octets are present on each byte-plane.

Definition at line 2173 of file bmstrsparsevec.h.

References effective_max_str(), and bm::str_sparse_vector< CharType, BV, STR_SIZE >::const_iterator::valid().

Referenced by remap_from_impl().

◆ calc_stat()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::calc_stat ( struct str_sparse_vector< CharType, BV, STR_SIZE >::statistics * st) const

Calculates memory statistics.

Function fills statistics structure containing information about how this vector uses memory and estimation of max. amount of memory bvector needs to serialize itself.

Parameters
st- pointer on statistics structure to be filled in.
See also
statistics

Definition at line 1863 of file bmstrsparsevec.h.

References bm::bv_statistics::bit_blocks, BM_ASSERT, BMNOEXCEPT, bm::bv_statistics::bv_count, bm::base_sparse_vector< CharType, BV, STR_SIZE >::calc_stat(), bm::bv_statistics::gap_blocks, bm::bv_statistics::gap_cap_overhead, bm::bv_statistics::max_serialize_mem, bm::bv_statistics::memory_used, bm::bv_statistics::ptr_sub_blocks, remap_flags_, remap_matrix1_, and remap_matrix2_.

Referenced by main().

◆ clear() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::clear ( )
inline

resize to zero, free memory, reset remapping

Definition at line 831 of file bmstrsparsevec.h.

◆ clear() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::clear ( const bvector_type & bv_idx)
inline

Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT going to tuned to NULL (NULL qualifier is preserved).

Parameters
bv_idx- index bit-vector for elements which to be set to 0

Definition at line 580 of file bmstrsparsevec.h.

◆ clear_all()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::clear_all ( bool free_mem,
unsigned remap = 0 )

resize to zero, free memory

Parameters
free_mem- true - free all bit-vectors memory, false - set bit-vecor to zero (memory remains reserved)
remap- 0 - set to no-remap (default), 1 - keep remap substitution matrix for possible re-use (if remap() was ever called on this vector with the datawith same frequency profiles) Note that feeding the data with disimilar frequency profile would cause undefined behavior.
See also
remap

Definition at line 2599 of file bmstrsparsevec.h.

References BMNOEXCEPT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::clear_all(), remap(), remap_flags_, remap_matrix1_, and remap_matrix2_.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::clear(), copy_range(), and remap_from_impl().

◆ clear_range()

template<typename CharType, typename BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE > & bm::str_sparse_vector< CharType, BV, STR_SIZE >::clear_range ( size_type left,
size_type right,
bool set_null = false )
inline

clear range (assign bit 0 for all planes)

Parameters
left- interval start
right- interval end (closed interval)
set_null- set cleared values to unassigned (NULL)

Definition at line 840 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::import(), main(), and remap_from_impl().

◆ common_prefix_length()

template<class CharType, class BV, unsigned STR_SIZE>
template<bool USE_PREFIX_BUF>
unsigned bm::str_sparse_vector< CharType, BV, STR_SIZE >::common_prefix_length ( size_type idx1,
size_type idx2,
value_type * prefix_buf = 0 ) const

Find size of common prefix between two vector elements in octets.

Parameters
prefix_buf- optional param for keeping the common prefix string (without remap decode)
Returns
size of common prefix

Definition at line 2118 of file bmstrsparsevec.h.

References BM_ASSERT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, and BMNOEXCEPT.

◆ compare() [1/2]

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare ( size_type idx,
const value_type * str ) const

Compare vector element with argument lexicographically.

The function does not account for NULL values, NULL element is treated as an empty string

NOTE: for a re-mapped container, input string may have no correct remapping, in this case we have an ambiguity (we know it is not equal (0) but LT or GT?). Behavior is undefined.

Parameters
idx- vactor element index
str- argument to compare with
Returns
0 - equal, < 0 - vect[idx] < str, >0 otherwise

Definition at line 2051 of file bmstrsparsevec.h.

References BM_ASSERT, BMNOEXCEPT, compare_nomap(), compare_remap(), and remap_flags_.

Referenced by main(), quicksort(), and quicksort2().

◆ compare() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare ( size_type idx1,
size_type idx2 ) const

Compare two vector elements.

The function does not account for NULL values, NULL element is treated as an empty string

Parameters
idx1- vactor element index 1
idx2- vactor element index 2
Returns
0 - equal, < 0 - vect[idx1] < vect[idx2], >0 otherwise

Definition at line 2064 of file bmstrsparsevec.h.

References BM_ASSERT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, BMNOEXCEPT, remap_flags_, remap_matrix1_, and size().

◆ compare_nomap()

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare_nomap ( size_type idx,
const value_type * str ) const

Variant of compare for non-mapped vectors.

Caller MUST guarantee vector is not remapped.

Definition at line 2024 of file bmstrsparsevec.h.

References BM_ASSERT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, BMNOEXCEPT, and is_remap.

Referenced by compare().

◆ compare_remap()

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare_remap ( size_type idx,
const value_type * str ) const

Variant of compare for remapped vectors.

Caller MUST guarantee vector is remapped.

Definition at line 1995 of file bmstrsparsevec.h.

References BM_ASSERT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, BMNOEXCEPT, is_remap, and remap_matrix1_.

Referenced by compare().

◆ compare_str() [1/2]

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare_str ( const value_type * str1,
const value_type * str2 )
static

Definition at line 1895 of file bmstrsparsevec.h.

References BM_ASSERT, and BMNOEXCEPT.

◆ compare_str() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
int bm::str_sparse_vector< CharType, BV, STR_SIZE >::compare_str ( const value_type * str1,
const value_type * str2,
size_t min_len )
static

Definition at line 1919 of file bmstrsparsevec.h.

References BM_ASSERT, BMNOEXCEPT, and bm::has_zero_byte_u64().

◆ copy_range()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::copy_range ( const str_sparse_vector< CharType, BV, STR_SIZE > & sv,
size_type left,
size_type right,
bm::null_support slice_null = bm::use_null )

copy range of values from another sparse vector

Copy [left..right] values from the source vector, clear everything outside the range.

Parameters
sv- source vector
left- index from in losed diapason of [left..right]
right- index to in losed diapason of [left..right]
slice_null- "use_null" copy range for NULL vector or do not copy it

Definition at line 2523 of file bmstrsparsevec.h.

References clear_all(), bm::base_sparse_vector< CharType, BV, STR_SIZE >::copy_range_slices(), remap_flags_, remap_matrix1_, remap_matrix2_, resize(), size(), str_sparse_vector(), and bm::xor_swap().

◆ decode()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename CharMatrix>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::decode ( CharMatrix & cmatr,
size_type idx_from,
size_type dec_size,
bool zero_mem = true ) const
inline

Bulk export strings to a C-style matrix of chars.

Parameters
cmatr- dest matrix (bm::heap_matrix)
idx_from- index in the sparse vector to export from
dec_size- decoding size (matrix column allocation should match)
zero_mem- set to false if target array is pre-initialized with 0s to avoid performance penalty
Returns
number of actually exported elements (can be less than requested)

Definition at line 1119 of file bmstrsparsevec.h.

Referenced by remap_from_impl().

◆ decode_substr()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename CharMatrix>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::decode_substr ( CharMatrix & cmatr,
size_type idx_from,
size_type dec_size,
unsigned substr_from,
unsigned substr_to,
bool zero_mem = true ) const
inline

Bulk export strings to a C-style matrix of chars.

Parameters
cmatr- dest matrix (bm::heap_matrix)
idx_from- index in the sparse vector to export from
dec_size- decoding size (matrix column allocation should match)
substr_from- sub-string position from
substr_to- sub-string position to
zero_mem- set to false if target array is pre-initialized with 0s to avoid performance penalty
Returns
number of actually exported elements (can be less than requested)

Decoder functor

< target array for reverse transpose

< bit-plane mask

< i

< SV read offset

Definition at line 1143 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::decode().

◆ effective_max_str()

template<class CharType, class BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE >::size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::effective_max_str ( ) const

◆ effective_size()

template<typename CharType, typename BV, unsigned STR_SIZE>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::effective_size ( ) const
inline

size of sparse vector (may be different for RSC)

Definition at line 1369 of file bmstrsparsevec.h.

◆ effective_vector_max()

template<typename CharType, typename BV, unsigned STR_SIZE>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::effective_vector_max ( ) const
inline

get effective string length used in vector

Returns
current string length maximum

Definition at line 884 of file bmstrsparsevec.h.

◆ empty()

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::empty ( ) const
inline

return true if vector is empty

Returns
true if empty

Definition at line 862 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< CharType, BV, STR_SIZE >::back_insert_iterator::flush_impl(), main(), and remap_from_impl().

◆ end()

template<typename CharType, typename BV, unsigned STR_SIZE>
const_iterator bm::str_sparse_vector< CharType, BV, STR_SIZE >::end ( ) const
inline

Provide const iterator access to the end.

Definition at line 946 of file bmstrsparsevec.h.

Referenced by main().

◆ equal()

template<class CharType, class BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::equal ( const str_sparse_vector< CharType, BV, STR_SIZE > & sv,
bm::null_support null_able = bm::use_null ) const

check if another sparse vector has the same content and size

Parameters
sv- sparse vector for comparison
null_able- flag to consider NULL vector in comparison (default) or compare only value content planes
Returns
true, if it is the same

Definition at line 2493 of file bmstrsparsevec.h.

References BMNOEXCEPT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::equal(), remap_flags_, remap_matrix1_, remap_matrix2_, and str_sparse_vector().

Referenced by main(), and main().

◆ erase()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::erase ( size_type idx)

erase the specified element

Parameters
idx- element index

Definition at line 1705 of file bmstrsparsevec.h.

References BM_ASSERT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::erase_column(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

Referenced by main().

◆ find_rank()

template<class CharType, class BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::find_rank ( size_type rank,
size_type & pos )
static

find position of compressed element by its rank

Definition at line 2151 of file bmstrsparsevec.h.

References BM_ASSERT, and BMNOEXCEPT.

◆ freeze()

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::freeze ( )
inline

Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faster searches.

Before freezing it is recommenede to call optimize() to get full memory saving effect

See also
optimize, remap

Definition at line 931 of file bmstrsparsevec.h.

Referenced by main().

◆ get() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename StrType>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::get ( size_type idx,
StrType & str ) const
inline

get specified string element Template method expects an STL-compatible type basic_string<>

Parameters
idx- element index (vector auto-resized if needs to)
str- string to get [out]

Definition at line 726 of file bmstrsparsevec.h.

◆ get() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE >::size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::get ( size_type idx,
value_type * str,
size_type buf_size ) const

get specified element

Parameters
idx- element index
str- string buffer
buf_size- string buffer size
Returns
string length

Definition at line 1827 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, BMNOEXCEPT, remap_flags_, and remap_matrix1_.

Referenced by check_sparse(), main(), main(), quicksort2(), and bm::str_sparse_vector< char, bm::bvector<>, 64 >::try_get().

◆ get_back_inserter()

template<typename CharType, typename BV, unsigned STR_SIZE>
back_insert_iterator bm::str_sparse_vector< CharType, BV, STR_SIZE >::get_back_inserter ( )
inline

Provide back insert iterator Back insert iterator implements buffered insertion, which is faster, than random access or push_back.

Definition at line 958 of file bmstrsparsevec.h.

Referenced by GenerateTestData(), main(), and main().

◆ get_const_iterator()

template<typename CharType, typename BV, unsigned STR_SIZE>
const_iterator bm::str_sparse_vector< CharType, BV, STR_SIZE >::get_const_iterator ( size_type idx) const
inline

Get const_itertor re-positioned to specific element.

Parameters
idx- position in the sparse vector

Definition at line 951 of file bmstrsparsevec.h.

◆ get_remap_buffer()

template<typename CharType, typename BV, unsigned STR_SIZE>
const unsigned char * bm::str_sparse_vector< CharType, BV, STR_SIZE >::get_remap_buffer ( ) const
inlineprotected

Definition at line 1558 of file bmstrsparsevec.h.

◆ get_remap_matrix() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
remap_matrix_type * bm::str_sparse_vector< CharType, BV, STR_SIZE >::get_remap_matrix ( )
inlineprotected

Definition at line 1577 of file bmstrsparsevec.h.

◆ get_remap_matrix() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
const remap_matrix_type * bm::str_sparse_vector< CharType, BV, STR_SIZE >::get_remap_matrix ( ) const
inlineprotected

Definition at line 1575 of file bmstrsparsevec.h.

◆ import()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename CharMatrix>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::import ( CharMatrix & cmatr,
size_type idx_from,
size_type imp_size )
inline

Bulk import of strings from a C-style matrix of chars.

Parameters
cmatr- source matrix (bm::heap_matrix) [in/out] parameter gets modified(corrupted) in the process
idx_from- destination index in the sparse vector
imp_size- import size (number or rows to import)

Definition at line 1267 of file bmstrsparsevec.h.

◆ import_back()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename CharMatrix>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::import_back ( CharMatrix & cmatr,
size_type imp_size )
inline

Bulk push-back import of strings from a C-style matrix of chars.

Parameters
cmatr- source matrix (bm::heap_matrix) [in/out] parameter gets modified(corrupted) in the process
imp_size- import size (number or rows to import)

Definition at line 1288 of file bmstrsparsevec.h.

◆ import_char_slice()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<size_t BufSize = ins_buf_size>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::import_char_slice ( const unsigned_value_type * ch_slice,
unsigned ch_acc,
size_type char_slice_idx,
size_type idx_from,
size_type imp_size )
inlineprotected

full buffer import can use loop unrolling

Definition at line 1465 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::import_no_check().

◆ import_no_check()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename CharMatrix, size_t BufSize = ins_buf_size>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::import_no_check ( CharMatrix & cmatr,
size_type idx_from,
size_type imp_size,
bool set_not_null = true )
inlineprotected

full buffer import can use loop unrolling

Definition at line 1379 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::import(), and bm::str_sparse_vector< char, bm::bvector<>, 64 >::import_back().

◆ init_remap_buffer()

template<typename CharType, typename BV, unsigned STR_SIZE>
unsigned char * bm::str_sparse_vector< CharType, BV, STR_SIZE >::init_remap_buffer ( )
inlineprotected

Definition at line 1560 of file bmstrsparsevec.h.

◆ insert() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename StrType>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::insert ( size_type idx,
const StrType & str )
inline

insert STL string

Parameters
idx- element index (vector auto-resized if needs to)
str- STL string to set

Definition at line 613 of file bmstrsparsevec.h.

◆ insert() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::insert ( size_type idx,
const value_type * str )

insert the specified element

Parameters
idx- element index (vector auto-resized if needs to)
str- string to set (zero terminated)

Definition at line 1677 of file bmstrsparsevec.h.

References insert_value(), set_value(), size(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::insert(), insertion_sort(), insertion_sort(), and main().

◆ insert_value()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::insert_value ( size_type idx,
const value_type * str )
protected

insert value without checking boundaries

Definition at line 1785 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::insert_null(), and insert_value_no_null().

Referenced by insert().

◆ insert_value_no_null()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::insert_value_no_null ( size_type idx,
const value_type * str )
protected

◆ is_compressed()

template<typename CharType, typename BV, unsigned STR_SIZE>
constexpr bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::is_compressed ( )
inlinestaticconstexpr

various type traits

Definition at line 972 of file bmstrsparsevec.h.

◆ is_remap()

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::is_remap ( ) const
inline

Get character remapping status (true | false).

Definition at line 995 of file bmstrsparsevec.h.

Referenced by remap_from_impl().

◆ is_ro()

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::is_ro ( ) const
inline

Returns true if vector is read-only.

Definition at line 934 of file bmstrsparsevec.h.

Referenced by main().

◆ is_str()

template<typename CharType, typename BV, unsigned STR_SIZE>
constexpr bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::is_str ( )
inlinestaticconstexpr

Definition at line 975 of file bmstrsparsevec.h.

◆ keep()

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::keep ( const bvector_type & bv_idx)
inline

Set NULL all elements NOT set as 1 in the argument vector.

Parameters
bv_idx- index bit-vector for elements which needs to be kept

Definition at line 588 of file bmstrsparsevec.h.

◆ keep_range()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::keep_range ( size_type left,
size_type right,
bm::null_support slice_null = bm::use_null )

Keep only specified interval in the sparse vector, clear all other elements.

Parameters
left- interval start
right- interval end (closed interval)
slice_null- "use_null" copy range for NULL vector or not

Definition at line 2576 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::keep_range_no_check(), and bm::xor_swap().

◆ max_str()

template<typename CharType, typename BV, unsigned STR_SIZE>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::max_str ( )
inlinestatic

get maximum string length capacity

Returns
maximum string length sparse vector can take

Definition at line 872 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< CharType, BV, STR_SIZE >::const_iterator::set_substr().

◆ merge()

template<class CharType, class BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE > & bm::str_sparse_vector< CharType, BV, STR_SIZE >::merge ( str_sparse_vector< CharType, BV, STR_SIZE > & str_sv)

merge with another sparse vector using OR operation Merge is different from join(), because it borrows data from the source vector, so it gets modified (destructive join)

Parameters
tr_sv- [in, out]argument vector to join with (vector mutates)
Returns
self reference

Definition at line 2544 of file bmstrsparsevec.h.

References bm::base_sparse_vector< Val, BV, MAX_SIZE >::bmatr_, bm::base_sparse_vector< CharType, BV, STR_SIZE >::get_null_bvect(), bm::base_sparse_vector< Val, BV, MAX_SIZE >::is_nullable(), bm::base_sparse_vector< CharType, BV, STR_SIZE >::merge_matr(), remap_flags_, remap_matrix1_, remap_matrix2_, resize(), size(), bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_, and str_sparse_vector().

◆ operator=() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE > & bm::str_sparse_vector< CharType, BV, STR_SIZE >::operator= ( const str_sparse_vector< CharType, BV, STR_SIZE > & str_sv)
inline

copy assignmment operator

Definition at line 511 of file bmstrsparsevec.h.

◆ operator=() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
str_sparse_vector< CharType, BV, STR_SIZE > & bm::str_sparse_vector< CharType, BV, STR_SIZE >::operator= ( str_sparse_vector< CharType, BV, STR_SIZE > && str_sv)
inline

move assignmment operator

Definition at line 532 of file bmstrsparsevec.h.

◆ operator[]() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
reference bm::str_sparse_vector< CharType, BV, STR_SIZE >::operator[] ( size_type idx)
inline

Operator to get write access to an element.

Definition at line 552 of file bmstrsparsevec.h.

◆ operator[]() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
const const_reference bm::str_sparse_vector< CharType, BV, STR_SIZE >::operator[] ( size_type idx) const
inline

Operator to get read access to an element.

Definition at line 548 of file bmstrsparsevec.h.

◆ optimize()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::optimize ( bm::word_t * temp_block = 0,
typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics * stat = 0 )

run memory optimization for all vector planes

Parameters
temp_block- pre-allocated memory block to avoid unnecessary re-allocs
opt_mode- requested compression depth
stat- memory allocation statistics after optimization

Definition at line 1848 of file bmstrsparsevec.h.

References bm::bv_statistics::add(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::optimize().

Referenced by main(), and main().

◆ push_back() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename StrType>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::push_back ( const StrType & str)
inline

push back a string

Parameters
str- string to set (STL class with size() support, like basic_string)

Definition at line 683 of file bmstrsparsevec.h.

Referenced by main().

◆ push_back() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::push_back ( const value_type * str)
inline

push back a string (zero terminated)

Parameters
str- string to set

Definition at line 689 of file bmstrsparsevec.h.

◆ push_back_null() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::push_back_null ( )
inline

push back NULL value

Definition at line 700 of file bmstrsparsevec.h.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::push_back_null().

◆ push_back_null() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::push_back_null ( size_type count)

push back specified amount of NULL values

Parameters
count- number of NULLs to push back

Definition at line 1727 of file bmstrsparsevec.h.

References BM_ASSERT, bm::id_max, bm::base_sparse_vector< CharType, BV, STR_SIZE >::is_nullable(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

Referenced by main().

◆ recalc_remap_matrix2()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::recalc_remap_matrix2 ( )

re-calculate remap matrix2 based on matrix1

Definition at line 2245 of file bmstrsparsevec.h.

References BM_ASSERT, remap_flags_, remap_matrix1_, and remap_matrix2_.

Referenced by sync().

◆ remap() [1/2]

template<class CharType, class BV, unsigned MAX_STR_SIZE>
void bm::str_sparse_vector< CharType, BV, MAX_STR_SIZE >::remap ( )

Build remapping profile and re-load content to save memory.

Definition at line 2360 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::get_null_support(), remap_from_impl(), str_sparse_vector(), and swap().

Referenced by clear_all(), main(), main(), and remap().

◆ remap() [2/2]

◆ remap_from()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_from ( const str_sparse_vector< CharType, BV, STR_SIZE > & str_sv,
octet_freq_matrix_type * omatrix = 0 )

Build remapping profile and load content from another sparse vector Remapped vector likely saves memory (both RAM and disk) but should not be modified (should be read-only).

Parameters
str_sv- source sparse vector (assumed it is not remapped)
omatrix- pointer to externall computed char freaquency matrix (optional) \so remap, freeze

Definition at line 2389 of file bmstrsparsevec.h.

References remap_from_impl(), and str_sparse_vector().

Referenced by main(), and main().

◆ remap_from_impl()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_from_impl ( const str_sparse_vector< CharType, BV, STR_SIZE > & str_sv,
octet_freq_matrix_type * omatrix,
bool move_data )
protected

◆ remap_fromsv()

template<class CharType, class BV, unsigned MAX_STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, MAX_STR_SIZE >::remap_fromsv ( value_type *BMRESTRICT str,
size_type buf_size,
const value_type *BMRESTRICT sv_str,
const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1 )
static

remap string from internal code to external (ASCII) system

Returns
true if remapping was ok, false if found incorrect value for the plane

Definition at line 2333 of file bmstrsparsevec.h.

References BMNOEXCEPT, and BMRESTRICT.

◆ remap_n_tosv_2way() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_n_tosv_2way ( value_type *BMRESTRICT sv_str,
value_type *BMRESTRICT str_cp,
size_type buf_size,
const value_type *BMRESTRICT str,
size_t in_len ) const
inline

remap string from external (ASCII) system to matrix internal code

Definition at line 1071 of file bmstrsparsevec.h.

◆ remap_n_tosv_2way() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_n_tosv_2way ( value_type *BMRESTRICT sv_str,
value_type *BMRESTRICT str_cp,
size_type buf_size,
const value_type *BMRESTRICT str,
size_t in_len,
const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2 )
static

remap string from external (ASCII) system to matrix internal code also creates a zero terminated copy string

Returns
true if remapping was ok, false if found incorrect value for the plane

Definition at line 2304 of file bmstrsparsevec.h.

References BM_ASSERT, BMNOEXCEPT, and BMRESTRICT.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::remap_n_tosv_2way().

◆ remap_size()

template<typename CharType, typename BV, unsigned STR_SIZE>
size_t bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_size ( ) const
inlineprotected

Definition at line 1557 of file bmstrsparsevec.h.

◆ remap_tosv() [1/2]

template<class CharType, class BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_tosv ( value_type *BMRESTRICT sv_str,
size_type buf_size,
const value_type *BMRESTRICT str,
const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2 )
static

remap string from external (ASCII) system to matrix internal code

Returns
true if remapping was ok, false if found incorrect value for the plane

Definition at line 2275 of file bmstrsparsevec.h.

References BMNOEXCEPT, and BMRESTRICT.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::remap_tosv().

◆ remap_tosv() [2/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_tosv ( value_type * sv_str,
size_type buf_size,
const value_type * str ) const
inline

remap string from external (ASCII) system to matrix internal code

Definition at line 1060 of file bmstrsparsevec.h.

◆ resize()

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::resize ( size_type sz)
inline

resize vector

Parameters
sz- new size

Definition at line 867 of file bmstrsparsevec.h.

Referenced by copy_range(), merge(), and bm::str_sparse_vector< char, bm::bvector<>, 64 >::resize_internal().

◆ resize_internal()

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::resize_internal ( size_type sz)
inlineprotected

Definition at line 1555 of file bmstrsparsevec.h.

◆ resolve_range()

template<typename CharType, typename BV, unsigned STR_SIZE>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::resolve_range ( size_type from,
size_type to,
size_type * idx_from,
size_type * idx_to ) const
inlineprotected

Definition at line 1569 of file bmstrsparsevec.h.

◆ set()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set ( size_type idx,
const value_type * str )

set specified element with bounds checking and automatic resize

Parameters
idx- element index (vector auto-resized if needs to)
str- string to set (zero terminated)

Definition at line 1666 of file bmstrsparsevec.h.

References set_value(), size(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

Referenced by main(), and bm::str_sparse_vector< char, bm::bvector<>, 64 >::push_back().

◆ set_null() [1/2]

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set_null ( const bvector_type & bv_idx)
inline

Set NULL all elements set as 1 in the argument vector.

Parameters
bv_idx- index bit-vector for elements which needs to be turned to NULL

Definition at line 572 of file bmstrsparsevec.h.

◆ set_null() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set_null ( size_type idx)

set NULL status for the specified element Vector is resized automatically

Parameters
idx- element index (vector auto-resized if needs to)

Definition at line 1717 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::bmatr_, and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

Referenced by main().

◆ set_remap()

template<typename CharType, typename BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set_remap ( )
inlineprotected

Definition at line 1565 of file bmstrsparsevec.h.

◆ set_value()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set_value ( size_type idx,
const value_type * str )
protected

set value without checking boundaries

Definition at line 1739 of file bmstrsparsevec.h.

References bm::base_sparse_vector< CharType, BV, STR_SIZE >::get_null_bvect(), and set_value_no_null().

Referenced by insert(), and set().

◆ set_value_no_null()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::set_value_no_null ( size_type idx,
const value_type * str )
protected

◆ size()

◆ size_internal()

template<typename CharType, typename BV, unsigned STR_SIZE>
size_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::size_internal ( ) const
inlineprotected

Definition at line 1554 of file bmstrsparsevec.h.

◆ swap() [1/2]

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::swap ( size_type idx1,
size_type idx2 )

swap two vector elements between each other

Parameters
idx1- element index 1
idx1- element index 2

Definition at line 1693 of file bmstrsparsevec.h.

References BM_ASSERT, size(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::swap_elements().

Referenced by main(), bm::str_sparse_vector< char, bm::bvector<>, 64 >::operator=(), quicksort(), quicksort2(), remap(), and remap().

◆ swap() [2/2]

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::swap ( str_sparse_vector< CharType, BV, STR_SIZE > & str_sv)

◆ sync()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::sync ( bool force)

syncronize internal structures

Definition at line 2483 of file bmstrsparsevec.h.

References recalc_remap_matrix2(), remap_flags_, and bm::base_sparse_vector< CharType, BV, STR_SIZE >::sync_ro().

◆ sync_size()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::sync_size ( )

recalculate size to exclude tail NULL elements After this call size() will return the true size of the vector

Definition at line 3036 of file bmstrsparsevec.h.

References BMNOEXCEPT, bm::base_sparse_vector< CharType, BV, STR_SIZE >::get_null_bvector(), and bm::base_sparse_vector< CharType, BV, STR_SIZE >::size_.

◆ throw_bad_value()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::throw_bad_value ( const char * err_msg)
staticprotected

throw domain error

Definition at line 2627 of file bmstrsparsevec.h.

References BM_ASSERT_THROW.

Referenced by bm::str_sparse_vector< char, bm::bvector<>, 64 >::get().

◆ throw_range_error()

template<class CharType, class BV, unsigned STR_SIZE>
void bm::str_sparse_vector< CharType, BV, STR_SIZE >::throw_range_error ( const char * err_msg)
staticprotected

throw range error

Definition at line 2614 of file bmstrsparsevec.h.

References BM_ASSERT_THROW.

◆ try_get()

template<typename CharType, typename BV, unsigned STR_SIZE>
template<typename StrType>
bool bm::str_sparse_vector< CharType, BV, STR_SIZE >::try_get ( size_type idx,
StrType & str ) const
inline

get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<>

Parameters
idx- element index (vector auto-resized if needs to)
str- string to get [out]
Returns
true if element is not null and try-get successfull

Definition at line 710 of file bmstrsparsevec.h.

Referenced by main().

◆ sparse_vector_deserializer

template<typename CharType, typename BV, unsigned STR_SIZE>
template<class SVect>
friend class sparse_vector_deserializer
friend

Definition at line 1594 of file bmstrsparsevec.h.

◆ sparse_vector_serializer

template<typename CharType, typename BV, unsigned STR_SIZE>
template<class SVect>
friend class sparse_vector_serializer
friend

Definition at line 1593 of file bmstrsparsevec.h.

Field Documentation

◆ remap_flags_

template<typename CharType, typename BV, unsigned STR_SIZE>
unsigned bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_flags_
protected

◆ remap_matrix1_

template<typename CharType, typename BV, unsigned STR_SIZE>
slice_octet_matrix_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_matrix1_
protected

◆ remap_matrix2_

template<typename CharType, typename BV, unsigned STR_SIZE>
slice_octet_matrix_type bm::str_sparse_vector< CharType, BV, STR_SIZE >::remap_matrix2_
protected

The documentation for this class was generated from the following file: