BitMagic-C++
xsample02.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2018 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example xsample02.cpp
20 Counting sort using bit-vector and sparse vector to build histogram of unsigned ints.
21 Benchmark compares different histogram buiding techniques using BitMagic and std::sort()
22
23 Histogram construction, based on integer events is a common problem,
24 this demo studies different approaches, potential for parallelization and other
25 aspects.
26*/
27
28/*! \file xsample02.cpp
29 \brief Example: sparse_vector<> used for counting sort / historgam construction
30*/
31
32
33#include <iostream>
34#include <memory>
35#include <map>
36#include <vector>
37#include <chrono>
38#include <algorithm>
39#include <random>
40#include <stdexcept>
41
42#include <future>
43#include <thread>
44
45using namespace std;
46
47#include "bm.h"
48#include "bmtimer.h"
49#include "bmsparsevec.h"
50#include "bmundef.h" /* clear the pre-proc defines from BM */
51
52// ----------------------------------------------------
53// Global parameters and types
54// ----------------------------------------------------
55
56const unsigned value_max = 1250000; // range of variants of events [0..max]
57const unsigned test_size = 250000000; // number of events (ints) to generate
58
59// -------------------------------------------
60// Random generator
61// -------------------------------------------
62
63std::random_device rand_dev;
64std::mt19937 gen(rand_dev());
65std::uniform_int_distribution<> rand_dis(1, value_max); // generate uniform numebrs for [1, vector_max]
66
67
69typedef std::map<unsigned, unsigned> map_u32;
70
71
72// timing storage for benchmarking
74
75
76// -------------------------------------------
77// Counting sort / histogram construction (std::map)
78// -------------------------------------------
79
80static
81void sort_map(map_u32& hmap, const std::vector<unsigned>& vin)
82{
83 for (auto v : vin)
84 {
85 hmap[v]++;
86 }
87}
88
89
90// -------------------------------------------
91// Counting sort / histogram construction (naive)
92// -------------------------------------------
93
94// This sorting method uses sparse_vector<> as a storage but implements increment
95// as an get-inc-put operations (decoding-encoding every value in the sum)
96//
97static
98void counting_sort_naive(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
99{
100 for (auto v : vin)
101 {
102 auto count = sv_out.get(v);
103 sv_out.set(v, count + 1);
104 }
105}
106
107// -------------------------------------------
108// Counting sort / histogram construction
109// -------------------------------------------
110
111// This sorting method uses sparse_vector<> as a storage but implements increment
112// but increment was implemented as a bm::sparse_vector::inc() method
113// which in turn is based on uses bvector<>::inc()
114//
115// This approach is faster than decoding-encoding used in naive counting sort
116//
117static
118void counting_sort(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
119{
120 for(auto v : vin)
121 sv_out.inc(v);
122}
123
124// --------------------------------------------------
125// Counting sort / histogram construction (parallel)
126// --------------------------------------------------
127
128// parallel subproblem for all even numbers: (v & 1) == 0
129inline
130unsigned counting_sort_subbatch(sparse_vector_u32* sv_out, const std::vector<unsigned>* vin)
131{
132 for (size_t i = 0; i < vin->size(); i++)
133 {
134 auto v = (*vin)[i];
135 if ((v & 1) == 0)
136 sv_out->inc(v);
137 }
138 return 0;
139}
140
141// Parallel histogram construction uses a very simple divide and conquer technique
142// splitting by even/odd numbers, uses std::async() for parallelization
143//
144// (should be possible to do a lot better than that)
145//
146static
147void counting_sort_parallel(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
148{
150 // process evens in parallel
151 std::future<unsigned> f1 = std::async(std::launch::async, counting_sort_subbatch, &sv_out2, &vin);
152
153 // process all odd elements
154 for (size_t i = 0; i < vin.size(); i++)
155 {
156 auto v = vin[i];
157 if (v & 1)
158 sv_out.inc(v);
159 }
160 f1.wait();
161
162 // merge effectively performs logical OR on all plains, only it
163 // borrows memory blocks from the argument vector, so it gets changed
164 // (which is ok here, since sv_out2 is a temporary)
165 //
166 sv_out.merge(sv_out2);
167}
168
169// Test utility. It also illustrates histogram access method
170//
171static
173{
175 sparse_vector_u32::bvector_type::enumerator en = bv_null->first();
176
177 for (; en.valid(); ++en)
178 {
179 unsigned v = *en;
180 unsigned cnt = sv.get(v);
181 for (unsigned j = 0; j < cnt; ++j)
182 {
183 std::cout << v << ", ";
184 } // for
185 } // for en
186 std::cout << std::endl;
187}
188
189// Test utility for std::map
190//
191static
192void print_sorted(const map_u32& hmap)
193{
194 map_u32::const_iterator it = hmap.begin();
195 map_u32::const_iterator it_end = hmap.end();
196
197 for (; it != it_end; ++it)
198 {
199 unsigned v = it->first;
200 unsigned cnt = it->second;
201 for (unsigned j = 0; j < cnt; ++j)
202 {
203 std::cout << v << ", ";
204 } // for
205 } // for en
206 std::cout << std::endl;
207}
208
209
210// build histogram using sorted vector
211//
212static
213void build_histogram(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
214{
215 if (vin.empty())
216 return;
217 unsigned start = vin[0];
218 unsigned count = 0; // histogram counter
219 for (auto v : vin)
220 {
221 if (v == start)
222 {
223 ++count;
224 }
225 else
226 {
227 sv_out.set(start, count);
228 start = v; count = 1;
229 }
230 }
231 if (count)
232 {
233 sv_out.set(start, count);
234 }
235}
236
237
238
239
240int main(void)
241{
242 try
243 {
244 // try simple input vector as a model
245 //
246 {
247 std::vector<unsigned> v {10, 1, 5, 4, 8, 8, 8} ;
248 sparse_vector_u32 r_sv(bm::use_null); // result vector
249
250 counting_sort(r_sv, v);
251
252 print_sorted(r_sv); // 1, 4, 5, 8, 8, 8, 10,
253
255 counting_sort_parallel(p_sv, v);
256 print_sorted(r_sv);
257
258 map_u32 h_map;
259 sort_map(h_map, v);
260 print_sorted(h_map);
261
262
263 std::sort(v.begin(), v.end());
264 sparse_vector_u32 h_sv(bm::use_null); // histogram vector
265 build_histogram(h_sv, v);
266 if (!r_sv.equal(h_sv))
267 {
268 std::cerr << "Error: Histogram comparison failed!" << std::endl;
269 print_sorted(h_sv);
270 return 1;
271 }
272
273 }
274
275 // run benchmarks
276 //
277 std::vector<unsigned> v;
278
279 // generate vector of random numbers
280 for (unsigned i = 0; i < test_size; ++i)
281 {
282 v.push_back(unsigned(rand_dis(gen)));
283 }
284 std::cout << "test vector generation ok" << std::endl;
285
290 map_u32 h_map;
291
292 {
293 bm::chrono_taker tt1(cout, "1. counting sort ", 1, &timing_map);
294 counting_sort(r_sv, v);
295 }
296
297 {
298 bm::chrono_taker tt1(cout, "3. counting sort (naive) ", 1, &timing_map);
299 counting_sort_naive(n_sv, v);
300 }
301
302 {
303 bm::chrono_taker tt1(cout, "4. counting sort (parallel) ", 1, &timing_map);
304 counting_sort_parallel(p_sv, v);
305 }
306
307 {
308 bm::chrono_taker tt1(cout, "5. counting sort (map) ", 1, &timing_map);
309 sort_map(h_map, v);
310 }
311
312 {
313 bm::chrono_taker tt1(cout, "2. std::sort() + histogram", 1, &timing_map);
314 std::sort(v.begin(), v.end());
315 build_histogram(h_sv, v);
316 }
317
318
319 // quality assurance checks
320 //
321 if (!r_sv.equal(h_sv) || !n_sv.equal(h_sv))
322 {
323 std::cerr << "Error: Histogram comparison failed!" << std::endl;
324 return 1;
325 }
326 if (!r_sv.equal(p_sv))
327 {
328 std::cerr << "Error: Histogram comparison failed for parallel sort!" << std::endl;
329 return 1;
330 }
331
332 // compute memory consumption of sparse vector
333 {
334 std::cout << std::endl;
335
337 sparse_vector_u32::statistics st;
338 r_sv.optimize(tb, sparse_vector_u32::bvector_type::opt_compress, &st);
339
340 std::cout << "Sparse vector memory usage:" << st.memory_used / (1024*1024)<< "MB" << std::endl;
341 std::cout << "vector<unsigned> usage:" << v.size() * sizeof(v[0]) / (1024 * 1024) << "MB" << std::endl << std::endl;
342 }
343
344
346
347 }
348 catch(std::exception& ex)
349 {
350 std::cerr << ex.what() << std::endl;
351 return 1;
352 }
353
354 return 0;
355}
356
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition bm.h:47
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Timing utilities for benchmarking (internal).
pre-processor un-defines to avoid global space pollution (internal)
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way).
Definition bmbmatrix.h:451
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition bm.h:1871
Utility class to collect performance measurements and statistics.
Definition bmtimer.h:41
std::map< std::string, statistics > duration_map_type
test name to duration map
Definition bmtimer.h:66
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
Definition bmtimer.h:150
succinct sparse vector with runtime compression using bit-slicing / transposition method
Definition bmsparsevec.h:87
void inc(size_type idx)
increment specified element by one
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
bool equal(const sparse_vector< Val, BV > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
sparse_vector< Val, BV > & merge(sparse_vector< Val, BV > &sv)
merge with another sparse vector using OR operation Merge is different from join(),...
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
@ use_null
support "non-assigned" or "NULL" logic
Definition bmconst.h:230
std::random_device rand_dev
Definition sample11.cpp:51
bm::chrono_taker ::duration_map_type timing_map
Definition sample11.cpp:46
std::mt19937 gen(rand_dev())
const unsigned test_size
Definition xsample02.cpp:57
static void counting_sort_parallel(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
std::map< unsigned, unsigned > map_u32
Definition xsample02.cpp:69
static void counting_sort(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
std::random_device rand_dev
Definition xsample02.cpp:63
static void build_histogram(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
Definition xsample02.cpp:68
int main(void)
static void counting_sort_naive(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
Definition xsample02.cpp:98
static void sort_map(map_u32 &hmap, const std::vector< unsigned > &vin)
Definition xsample02.cpp:81
unsigned counting_sort_subbatch(sparse_vector_u32 *sv_out, const std::vector< unsigned > *vin)
std::mt19937 gen(rand_dev())
static void print_sorted(const sparse_vector_u32 &sv)
const unsigned value_max
Definition xsample02.cpp:56
std::uniform_int_distribution rand_dis(1, value_max)