|
BitMagic-C++
|
Utility to compress test sets of inverted lists. More...
#include <iostream>#include <chrono>#include <thread>#include <time.h>#include <stdio.h>#include <cstdlib>#include <vector>#include <map>#include "bm.h"#include "bmalgo.h"#include "bmserial.h"#include "bmsparsevec.h"#include "bmsparsevec_compr.h"#include "bmsparsevec_algo.h"#include "bmsparsevec_serial.h"#include "bmalgo_similarity.h"#include "bmdbg.h"#include "bmtimer.h"#include "bmundef.h"
Go to the source code of this file.
Typedefs | |
| typedef bm::sparse_vector< unsigned, bm::bvector<> > | sparse_vector_u32 |
| typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32 > | rsc_sparse_vector_u32 |
Functions | |
| static void | show_help () |
| static int | parse_args (int argc, char *argv[]) |
| template<class VT> | |
| int | io_read_u32_coll (std::ifstream &fin, VT &vec) |
| Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....). | |
| template<typename VT> | |
| int | validate_inp_vec (const VT &vec, typename VT::value_type &min_delta, typename VT::value_type &min_delta_cnt) |
| Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values. | |
| template<typename VT, typename BV> | |
| int | compare_vect (const VT &vec, const BV &bv) |
| Verification check if integer vector is equivalent to a bit-vector. | |
| template<typename BV> | |
| bool | is_super_sparse (const BV &bv) |
| Debug utility to detect super sparse bit-vectors which probably get bad compression rate. | |
| template<typename VT> | |
| bool | write_as_bvector (std::ofstream &bv_file, const VT &vec, bm::serializer< bm::bvector<> > &bvs, bm::serializer< bm::bvector<> >::buffer &sbuf) |
| convert vector into bit-vector and append to the file | |
| template<typename VT> | |
| void | write_as_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< sparse_vector_u32 > &sv_lay) |
| convert vector into delta coded bit-transposed vector and append to the file | |
| template<typename VT> | |
| void | write_as_rsc_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &sv_lay) |
| convert vector into delta coded bit-transposed vector and append to the file | |
| static void | compress_inv_dump_file (const std::string &fname, const std::string &bv_out_fname, const std::string &sv_out_fname) |
| read the input collection sequence, write using various compression schemes | |
| static int | read_bvector (std::ifstream &bv_file, bm::bvector<> &bv, bm::serializer< bm::bvector<> >::buffer &sbuf) |
| read and desrialize bit-bector from the dump file | |
| static void | verify_inv_dump_file (const std::string &fname, const std::string &bv_in_fname) |
| read the input collection sequence and dump file, verify correctness | |
| static void | decode_test_dump_file (const std::string &bv_in_fname) |
| read and decode the compressed dump file | |
| int | main (int argc, char *argv[]) |
Variables | |
| std::string | bv_in_file |
| std::string | bv_out_file |
| std::string | sv_in_file |
| std::string | sv_out_file |
| std::string | u32_in_file |
| std::string | u32_out_file |
| bool | is_diag = false |
| bool | is_timing = false |
| bool | is_verify = false |
| bool | is_silent = false |
| bool | is_decode = false |
| unsigned | c_level = bm::set_compression_default |
| bm::chrono_taker ::duration_map_type | timing_map |
Utility to compress test sets of inverted lists.
Definition in file inv_list.cpp.
| typedef bm::rsc_sparse_vector<unsigned, sparse_vector_u32> rsc_sparse_vector_u32 |
Definition at line 239 of file inv_list.cpp.
| typedef bm::sparse_vector<unsigned, bm::bvector<> > sparse_vector_u32 |
Definition at line 238 of file inv_list.cpp.
| int compare_vect | ( | const VT & | vec, |
| const BV & | bv ) |
Verification check if integer vector is equivalent to a bit-vector.
Definition at line 305 of file inv_list.cpp.
Referenced by verify_inv_dump_file().
|
static |
read the input collection sequence, write using various compression schemes
Definition at line 460 of file inv_list.cpp.
References bm::serializer< BV >::byte_order_serialization(), c_level, bm::serializer< BV >::gap_length_serialization(), io_read_u32_coll(), is_silent, bm::serializer< BV >::set_compression_level(), bm::sparse_vector_serial_layout< SV >::size(), timing_map, validate_inp_vec(), write_as_bvector(), and write_as_rsc_svector().
Referenced by main().
|
static |
read and decode the compressed dump file
Definition at line 763 of file inv_list.cpp.
References is_silent, read_bvector(), and timing_map.
Referenced by main().
| int io_read_u32_coll | ( | std::ifstream & | fin, |
| VT & | vec ) |
Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....).
Definition at line 248 of file inv_list.cpp.
Referenced by compress_inv_dump_file(), and verify_inv_dump_file().
| bool is_super_sparse | ( | const BV & | bv | ) |
Debug utility to detect super sparse bit-vectors which probably get bad compression rate.
Definition at line 325 of file inv_list.cpp.
| int main | ( | int | argc, |
| char * | argv[] ) |
Definition at line 817 of file inv_list.cpp.
References bv_in_file, bv_out_file, compress_inv_dump_file(), decode_test_dump_file(), is_decode, is_timing, is_verify, parse_args(), bm::chrono_taker< TOut >::print_duration_map(), show_help(), sv_out_file, timing_map, u32_in_file, and verify_inv_dump_file().
|
static |
Definition at line 103 of file inv_list.cpp.
References bv_in_file, bv_out_file, c_level, is_decode, is_diag, is_silent, is_timing, is_verify, show_help(), sv_in_file, sv_out_file, and u32_in_file.
Referenced by main().
|
static |
read and desrialize bit-bector from the dump file
Definition at line 650 of file inv_list.cpp.
References bm::deserialize().
Referenced by decode_test_dump_file(), and verify_inv_dump_file().
|
static |
Definition at line 62 of file inv_list.cpp.
Referenced by main(), and parse_args().
| int validate_inp_vec | ( | const VT & | vec, |
| typename VT::value_type & | min_delta, | ||
| typename VT::value_type & | min_delta_cnt ) |
Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values.
Definition at line 273 of file inv_list.cpp.
Referenced by compress_inv_dump_file().
|
static |
read the input collection sequence and dump file, verify correctness
Definition at line 677 of file inv_list.cpp.
References compare_vect(), io_read_u32_coll(), is_silent, read_bvector(), and timing_map.
Referenced by main().
| bool write_as_bvector | ( | std::ofstream & | bv_file, |
| const VT & | vec, | ||
| bm::serializer< bm::bvector<> > & | bvs, | ||
| bm::serializer< bm::bvector<> >::buffer & | sbuf ) |
convert vector into bit-vector and append to the file
Definition at line 343 of file inv_list.cpp.
References BM_DECLARE_TEMP_BLOCK, bm::BM_SORTED, bm::bvector< Alloc >::optimize(), and bm::bvector< Alloc >::set().
Referenced by compress_inv_dump_file().
| void write_as_rsc_svector | ( | std::ofstream & | sv_file, |
| const VT & | vec, | ||
| unsigned | min_delta, | ||
| bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > & | sv_lay ) |
convert vector into delta coded bit-transposed vector and append to the file
Definition at line 409 of file inv_list.cpp.
References BM_DECLARE_TEMP_BLOCK, bm::sparse_vector_serial_layout< SV >::data(), bm::sparse_vector< Val, BV >::get_back_inserter(), bm::rsc_sparse_vector< Val, SV >::load_from(), bm::rsc_sparse_vector< Val, SV >::optimize(), bm::sparse_vector_serial_layout< SV >::size(), bm::sparse_vector_serialize(), and bm::use_null.
Referenced by compress_inv_dump_file().
| void write_as_svector | ( | std::ofstream & | sv_file, |
| const VT & | vec, | ||
| unsigned | min_delta, | ||
| bm::sparse_vector_serial_layout< sparse_vector_u32 > & | sv_lay ) |
convert vector into delta coded bit-transposed vector and append to the file
Definition at line 366 of file inv_list.cpp.
References BM_DECLARE_TEMP_BLOCK, bm::sparse_vector_serial_layout< SV >::data(), bm::sparse_vector< Val, BV >::get_back_inserter(), bm::sparse_vector< Val, BV >::optimize(), bm::sparse_vector_serial_layout< SV >::size(), and bm::sparse_vector_serialize().
| std::string bv_in_file |
Definition at line 85 of file inv_list.cpp.
Referenced by main(), and parse_args().
| std::string bv_out_file |
Definition at line 86 of file inv_list.cpp.
Referenced by main(), and parse_args().
| unsigned c_level = bm::set_compression_default |
Definition at line 99 of file inv_list.cpp.
Referenced by compress_inv_dump_file(), and parse_args().
| bool is_decode = false |
Definition at line 97 of file inv_list.cpp.
Referenced by main(), and parse_args().
| bool is_diag = false |
Definition at line 93 of file inv_list.cpp.
| bool is_silent = false |
Definition at line 96 of file inv_list.cpp.
Referenced by compress_inv_dump_file(), decode_test_dump_file(), parse_args(), and verify_inv_dump_file().
| bool is_timing = false |
Definition at line 94 of file inv_list.cpp.
| bool is_verify = false |
Definition at line 95 of file inv_list.cpp.
Referenced by main(), and parse_args().
| std::string sv_in_file |
| std::string sv_out_file |
Definition at line 89 of file inv_list.cpp.
Referenced by main(), and parse_args().
| bm::chrono_taker ::duration_map_type timing_map |
Definition at line 242 of file inv_list.cpp.
| std::string u32_in_file |
Definition at line 90 of file inv_list.cpp.
Referenced by main(), and parse_args().
| std::string u32_out_file |
Definition at line 91 of file inv_list.cpp.