BitMagic-C++
bmalgo.h
Go to the documentation of this file.
1#ifndef BMALGO__H__INCLUDED__
2#define BMALGO__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmalgo.h
22 \brief Algorithms for bvector<> (main include)
23*/
24
25#ifndef BM__H__INCLUDED__
26// BitMagic utility headers do not include main "bm.h" declaration
27// #include "bm.h" or "bm64.h" explicitly
28# error missing include (bm.h or bm64.h)
29#endif
30
31#include "bmfunc.h"
32#include "bmdef.h"
33
34#include "bmalgo_impl.h"
35
36
37
38namespace bm
39{
40
41/*!
42 \brief Computes bitcount of AND operation of two bitsets
43 \param bv1 - Argument bit-vector.
44 \param bv2 - Argument bit-vector.
45 \return bitcount of the result
46 \ingroup setalgo
47*/
48template<class BV>
49typename BV::size_type count_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
50{
51 return bm::distance_and_operation(bv1, bv2);
52}
53
54/*!
55 \brief Computes if there is any bit in AND operation of two bitsets
56 \param bv1 - Argument bit-vector.
57 \param bv2 - Argument bit-vector.
58 \return non zero value if there is any bit
59 \ingroup setalgo
60*/
61template<class BV>
62typename BV::size_type any_and(const BV& bv1, const BV& bv2) BMNOEXCEPT
63{
65
66 distance_operation_any(bv1, bv2, &dmd, &dmd + 1);
67 return dmd.result;
68}
69
70
71
72/*!
73 \brief Computes bitcount of XOR operation of two bitsets
74 \param bv1 - Argument bit-vector.
75 \param bv2 - Argument bit-vector.
76 \return bitcount of the result
77 \ingroup setalgo
78*/
79template<class BV>
81count_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
82{
84
85 distance_operation(bv1, bv2, &dmd, &dmd + 1);
86 return dmd.result;
87}
88
89/*!
90 \brief Computes if there is any bit in XOR operation of two bitsets
91 \param bv1 - Argument bit-vector.
92 \param bv2 - Argument bit-vector.
93 \return non zero value if there is any bit
94 \ingroup setalgo
95*/
96template<class BV>
97typename BV::size_type any_xor(const BV& bv1, const BV& bv2) BMNOEXCEPT
98{
100
101 distance_operation_any(bv1, bv2, &dmd, &dmd + 1);
102 return dmd.result;
103}
104
105
106
107/*!
108 \brief Computes bitcount of SUB operation of two bitsets
109 \param bv1 - Argument bit-vector.
110 \param bv2 - Argument bit-vector.
111 \return bitcount of the result
112 \ingroup setalgo
113*/
114template<class BV>
115typename BV::size_type count_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
116{
118
119 distance_operation(bv1, bv2, &dmd, &dmd + 1);
120 return dmd.result;
121}
122
123
124/*!
125 \brief Computes if there is any bit in SUB operation of two bitsets
126 \param bv1 - Argument bit-vector.
127 \param bv2 - Argument bit-vector.
128 \return non zero value if there is any bit
129 \ingroup setalgo
130*/
131template<class BV>
132typename BV::size_type any_sub(const BV& bv1, const BV& bv2) BMNOEXCEPT
133{
135
136 distance_operation_any(bv1, bv2, &dmd, &dmd + 1);
137 return dmd.result;
138}
139
140
141/*!
142 \brief Computes bitcount of OR operation of two bitsets
143 \param bv1 - Argument bit-vector.
144 \param bv2 - Argument bit-vector.
145 \return bitcount of the result
146 \ingroup setalgo
147*/
148template<class BV>
149typename BV::size_type count_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
150{
152
153 distance_operation(bv1, bv2, &dmd, &dmd + 1);
154 return dmd.result;
155}
156
157/*!
158 \brief Computes if there is any bit in OR operation of two bitsets
159 \param bv1 - Argument bit-vector.
160 \param bv2 - Argument bit-vector.
161 \return non zero value if there is any bit
162 \ingroup setalgo
163*/
164template<class BV>
165typename BV::size_type any_or(const BV& bv1, const BV& bv2) BMNOEXCEPT
166{
168
169 distance_operation_any(bv1, bv2, &dmd, &dmd + 1);
170 return dmd.result;
171}
172
173
174
175#define BM_SCANNER_OP(x) \
176if (0 != (block = blk_blk[j+x])) \
177{ \
178 if (BM_IS_GAP(block)) \
179 { \
180 bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
181 bit_functor); \
182 } \
183 else \
184 { \
185 bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
186 } \
187}
188
189
190/**
191 @brief bit-vector visitor scanner to traverse each 1 bit using C++ visitor
192
193 @param bv - bit vector to scan
194 @param bit_functor - visitor: should support add_bits(), add_range()
195
196 \ingroup setalgo
197 @sa for_each_bit_range visit_each_bit
198*/
199template<class BV, class Func>
200void for_each_bit(const BV& bv,
201 Func& bit_functor)
202{
203 typedef typename BV::size_type size_type;
204
205 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
206 bm::word_t*** blk_root = bman.top_blocks_root();
207
208 if (!blk_root)
209 return;
210
211 unsigned tsize = bman.top_block_size();
212 for (unsigned i = 0; i < tsize; ++i)
213 {
214 bm::word_t** blk_blk = blk_root[i];
215 if (!blk_blk)
216 continue;
217
218 if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
219 blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
220
221 const bm::word_t* block;
222 size_type r = size_type(i) * bm::set_sub_array_size;
223 unsigned j = 0;
224 do
225 {
226 #ifdef BM64_AVX2
227 if (!avx2_test_all_zero_wave(blk_blk + j))
228 {
233 }
234 j += 4;
235 #elif defined(BM64_SSE4)
236 if (!sse42_test_all_zero_wave(blk_blk + j))
237 {
240 }
241 j += 2;
242 #else
244 ++j;
245 #endif
246
247 } while (j < bm::set_sub_array_size);
248
249 } // for i
250}
251
252/**
253 @brief bit-vector range visitor to traverse each 1 bit
254
255 @param bv - bit vector to scan
256 @param right - start of closed interval [from..to]
257 @param left - end of close interval [from..to]
258 @param bit_functor - visitor: should support add_bits(), add_range()
259
260 \ingroup setalgo
261 @sa for_each_bit
262*/
263template<class BV, class Func>
264void for_each_bit_range(const BV& bv,
265 typename BV::size_type left,
266 typename BV::size_type right,
267 Func& bit_functor)
268{
269 if (left > right)
270 bm::xor_swap(left, right);
271 if (right == bm::id_max)
272 --right;
273 BM_ASSERT(left < bm::id_max && right < bm::id_max);
274
275 bm::for_each_bit_range_no_check(bv, left, right, bit_functor);
276}
277
278
279#undef BM_SCANNER_OP
280
281
282/// private adaptor for C-style callbacks
283///
284/// @internal
285///
286template <class VCBT, class size_type>
288{
290
292 : handle_(h), func_(cb_func)
293 {}
294
295 void add_bits(size_type offset, const unsigned char* bits, unsigned size)
296 {
297 for (unsigned i = 0; i < size; ++i)
298 func_(handle_, offset + bits[i]);
299 }
300 void add_range(size_type offset, size_type size)
301 {
302 for (size_type i = 0; i < size; ++i)
303 func_(handle_, offset + i);
304 }
305
306 void* handle_;
308};
309
310
311/// Functor for bit-copy (for testing)
312///
313/// @internal
314///
315template <class BV>
317{
318 typedef typename BV::size_type size_type;
319
321 : bv_(bv)
322 {
323 bv_.init();
324 }
325
326 void add_bits(size_type offset, const unsigned char* bits, unsigned size)
327 {
328 BM_ASSERT(size);
329 for (unsigned i = 0; i < size; ++i)
330 bv_.set_bit_no_check(offset + bits[i]);
331 }
332 void add_range(size_type offset, size_type size)
333 {
334 BM_ASSERT(size);
335 bv_.set_range(offset, offset + size - 1);
336 }
337
338 BV& bv_;
340};
341
342
343
344/**
345 @brief bvector visitor scanner to traverse each 1 bit using C callback
346
347 @param bv - bit vector to scan
348 @param handle_ptr - handle to private memory used by callback
349 @param callback_ptr - callback function
350
351 \ingroup setalgo
352
353 @sa bit_visitor_callback_type
354*/
355template<class BV>
356void visit_each_bit(const BV& bv,
357 void* handle_ptr,
358 bit_visitor_callback_type callback_ptr)
359{
360 typedef typename BV::size_type size_type;
362 func(handle_ptr, callback_ptr);
363 bm::for_each_bit(bv, func);
364}
365
366/**
367 @brief bvector visitor scanner to traverse each bits in range (C callback)
368
369 @param bv - bit vector to scan
370 @param left - from [left..right]
371 @param right - to [left..right]
372 @param handle_ptr - handle to private memory used by callback
373 @param callback_ptr - callback function
374
375 \ingroup setalgo
376
377 @sa bit_visitor_callback_type for_each_bit
378*/
379template<class BV>
380void visit_each_bit_range(const BV& bv,
381 typename BV::size_type left,
382 typename BV::size_type right,
383 void* handle_ptr,
384 bit_visitor_callback_type callback_ptr)
385{
386 typedef typename BV::size_type size_type;
388 func(handle_ptr, callback_ptr);
389 bm::for_each_bit_range(bv, left, right, func);
390}
391
392
393
394/**
395 Algorithms for rank compression of bit-vector
396
397 1. Source vector (bv_src) is a subset of index vector (bv_idx)
398 2. As a subset it can be collapsed using bit-rank method, where each position
399 in the source vector is defined by population count (range)
400 [0..index_position] (count_range())
401 As a result all integer set of source vector gets re-mapped in
402 accord with the index vector.
403
404 \ingroup setalgo
405*/
406template<typename BV>
408{
409public:
410 typedef BV bvector_type;
411 typedef typename BV::size_type size_type;
412 typedef typename BV::rs_index_type rs_index_type;
414 {
415 n_buffer_cap = 1024
416 };
417public:
418
419 /**
420 Rank decompression
421 */
422 void decompress(BV& bv_target, const BV& bv_idx, const BV& bv_src);
423
424 /**
425 Rank compression algorithm based on two palallel iterators/enumerators
426 set of source vector gets re-mapped in accord with the index/rank vector.
427
428 \param bv_target - target bit-vector
429 \param bv_idx - index (rank) vector used for address recalculation
430 \param bv_src - source vector for re-mapping
431 */
432 void compress(BV& bv_target, const BV& bv_idx, const BV& bv_src);
433
434 /**
435 \brief Source vector priority + index based rank
436
437 @sa compress
438 */
439 void compress_by_source(BV& bv_target,
440 const BV& bv_idx,
441 const rs_index_type& bc_idx,
442 const BV& bv_src);
443};
444
445
446// ------------------------------------------------------------------------
447//
448// ------------------------------------------------------------------------
449
450
451template<class BV>
453 const BV& bv_idx,
454 const BV& bv_src)
455{
456 bv_target.clear();
457 bv_target.init();
458
459 if (&bv_idx == &bv_src)
460 {
461 bv_target = bv_src;
462 return;
463 }
464 size_type ibuffer[n_buffer_cap];
465 size_type b_size;
466
467 typedef typename BV::enumerator enumerator_t;
468 enumerator_t en_s = bv_src.first();
469 enumerator_t en_i = bv_idx.first();
470
471 size_type r_idx = b_size = 0;
472 size_type i, s;
473
474 for (; en_i.valid(); )
475 {
476 if (!en_s.valid())
477 break;
478 i = *en_i; s = *en_s;
479
480 BM_ASSERT(s >= i);
481 BM_ASSERT(bv_idx.test(i));
482
483 if (i == s)
484 {
485 ibuffer[b_size++] = r_idx++;
486 if (b_size == n_buffer_cap)
487 {
488 bv_target.set(ibuffer, b_size, bm::BM_SORTED);
489 b_size = 0;
490 }
491 ++en_i; ++en_s;
492 continue;
493 }
494 BM_ASSERT(s > i);
495
496 size_type dist = s - i;
497 if (dist >= 64) // sufficiently far away, jump
498 {
499 size_type r_dist = bv_idx.count_range(i + 1, s);
500 r_idx += r_dist;
501 en_i.go_to(s);
502 BM_ASSERT(en_i.valid());
503 }
504 else // small distance, iterate to close the gap
505 {
506 for (; s > i; ++r_idx)
507 {
508 ++en_i;
509 i = *en_i;
510 } // for
511 BM_ASSERT(en_i.valid());
512 }
513 } // for
514
515 if (b_size)
516 {
517 bv_target.set(ibuffer, b_size, bm::BM_SORTED);
518 }
519
520}
521
522// ------------------------------------------------------------------------
523
524template<class BV>
526 const BV& bv_idx,
527 const BV& bv_src)
528{
529 bv_target.clear();
530 bv_target.init();
531
532 if (&bv_idx == &bv_src)
533 {
534 bv_target = bv_src;
535 return;
536 }
537
538 size_type r_idx, i, s, b_size;
539 size_type ibuffer[n_buffer_cap];
540
541 b_size = r_idx = 0;
542
543 typedef typename BV::enumerator enumerator_t;
544 enumerator_t en_s = bv_src.first();
545 enumerator_t en_i = bv_idx.first();
546 for (; en_i.valid(); )
547 {
548 if (!en_s.valid())
549 break;
550 s = *en_s;
551 i = *en_i;
552 if (s == r_idx)
553 {
554 ibuffer[b_size++] = i;
555 if (b_size == n_buffer_cap)
556 {
557 bv_target.set(ibuffer, b_size, bm::BM_SORTED);
558 b_size = 0;
559 }
560 ++en_i; ++en_s; ++r_idx;
561 continue;
562 }
563 // source is "faster" than index, need to re-align
564 BM_ASSERT(s > r_idx);
565 size_type rank = s - r_idx + 1u;
566 size_type new_pos = 0;
567
568 if (rank < 256)
569 {
570 en_i.skip(s - r_idx);
571 BM_ASSERT(en_i.valid());
572 new_pos = *en_i;
573 }
574 else
575 {
576 bv_idx.find_rank(rank, i, new_pos);
577 BM_ASSERT(new_pos);
578 en_i.go_to(new_pos);
579 BM_ASSERT(en_i.valid());
580 }
581
582 r_idx = s;
583 ibuffer[b_size++] = new_pos;
584 if (b_size == n_buffer_cap)
585 {
586 bv_target.set(ibuffer, b_size, bm::BM_SORTED);
587 b_size = 0;
588 }
589 ++en_i; ++en_s; ++r_idx;
590
591 } // for en
592
593 if (b_size)
594 {
595 bv_target.set(ibuffer, b_size, bm::BM_SORTED);
596 }
597}
598
599// ------------------------------------------------------------------------
600
601template<class BV>
603 const BV& bv_idx,
604 const rs_index_type& bc_idx,
605 const BV& bv_src)
606{
607 /// Rank compressor visitor (functor)
608 /// @internal
609 struct visitor_func
610 {
611 visitor_func(bvector_type& bv_out,
612 const bvector_type& bv_index,
613 const rs_index_type& bc_index)
614 : bv_target_(bv_out),
615 bv_index_(bv_index),
616 bc_index_(bc_index)
617 {}
618
619 void add_bits(size_type arr_offset, const unsigned char* bits, unsigned bits_size)
620 {
621 for (unsigned i = 0; i < bits_size; ++i)
622 {
623 size_type idx = arr_offset + bits[i];
624 BM_ASSERT(bv_index_.test(idx));
625
626 size_type r_idx = bv_index_.count_to(idx, bc_index_) - 1;
627 bv_target_.set_bit_no_check(r_idx);
628 }
629 }
630 void add_range(size_type arr_offset, size_type sz)
631 {
632 for (size_type i = 0; i < sz; ++i)
633 {
634 size_type idx = i + arr_offset;
635 BM_ASSERT(bv_index_.test(idx));
636
637 size_type r_idx = bv_index_.count_to(idx, bc_index_) - 1;
638 bv_target_.set_bit_no_check(r_idx);
639 }
640 }
641
642 bvector_type& bv_target_;
643 const bvector_type& bv_index_;
644 const rs_index_type& bc_index_;
645 };
646 // ------------------------------------
647
648 bv_target.clear();
649 bv_target.init();
650
651 if (&bv_idx == &bv_src)
652 {
653 bv_target = bv_src;
654 return;
655 }
656 visitor_func func(bv_target, bv_idx, bc_idx);
657 bm::for_each_bit(bv_src, func);
658}
659
660
661
662} // bm
663
664#include "bmundef.h"
665
666#endif
#define BM_SCANNER_OP(x)
Definition bmalgo.h:175
Algorithms for bvector<>
Definitions(internal)
#define BMNOEXCEPT
Definition bmdef.h:79
#define BM_ASSERT
Definition bmdef.h:130
#define FULL_BLOCK_FAKE_ADDR
Definition bmdef.h:149
#define FULL_SUB_BLOCK_REAL_ADDR
Definition bmdef.h:150
Bit manipulation primitives (internal)
pre-processor un-defines to avoid global space pollution (internal)
Algorithms for rank compression of bit-vector.
Definition bmalgo.h:408
void decompress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank decompression.
Definition bmalgo.h:525
void compress_by_source(BV &bv_target, const BV &bv_idx, const rs_index_type &bc_idx, const BV &bv_src)
Source vector priority + index based rank.
Definition bmalgo.h:602
BV::size_type size_type
Definition bmalgo.h:411
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Definition bmalgo.h:452
BV::rs_index_type rs_index_type
Definition bmalgo.h:412
BMFORCEINLINE bool sse42_test_all_zero_wave(const void *ptr)
check if wave of pointers is all NULL
Definition bmsse4.h:595
int(* bit_visitor_callback_type)(void *handle_ptr, bm::id_t bit_idx)
Callback type to visit (callback style) bits in bit-vector(s)
Definition bm.h:71
@ BM_SORTED
input set is sorted (ascending order)
Definition bmconst.h:191
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Distance computing template function.
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Distance screening template function.
BV::size_type distance_and_operation(const BV &bv1, const BV &bv2) BMNOEXCEPT
Distance AND computing template function.
@ COUNT_XOR
(A ^ B).count()
Definition bmalgo_impl.h:60
@ COUNT_SUB_AB
(A - B).count()
Definition bmalgo_impl.h:62
@ COUNT_AND
(A & B).count()
Definition bmalgo_impl.h:59
@ COUNT_OR
(A | B).count()
Definition bmalgo_impl.h:61
void for_each_bit_range(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
bit-vector range visitor to traverse each 1 bit
Definition bmalgo.h:264
BV::size_type any_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes if there is any bit in OR operation of two bitsets.
Definition bmalgo.h:165
BV::size_type any_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes if there is any bit in XOR operation of two bitsets.
Definition bmalgo.h:97
void visit_each_bit(const BV &bv, void *handle_ptr, bit_visitor_callback_type callback_ptr)
bvector visitor scanner to traverse each 1 bit using C callback
Definition bmalgo.h:356
void visit_each_bit_range(const BV &bv, typename BV::size_type left, typename BV::size_type right, void *handle_ptr, bit_visitor_callback_type callback_ptr)
bvector visitor scanner to traverse each bits in range (C callback)
Definition bmalgo.h:380
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
Definition bmalgo.h:49
BV::size_type count_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of OR operation of two bitsets.
Definition bmalgo.h:149
BV::size_type any_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes if there is any bit in AND operation of two bitsets.
Definition bmalgo.h:62
BV::size_type count_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of SUB operation of two bitsets.
Definition bmalgo.h:115
BV::size_type any_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes if there is any bit in SUB operation of two bitsets.
Definition bmalgo.h:132
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of XOR operation of two bitsets.
Definition bmalgo.h:81
void for_each_bit(const BV &bv, Func &bit_functor)
bit-vector visitor scanner to traverse each 1 bit using C++ visitor
Definition bmalgo.h:200
Definition bm.h:77
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
Definition bmfunc.h:909
const unsigned id_max
Definition bmconst.h:108
unsigned int word_t
Definition bmconst.h:38
const unsigned set_sub_array_size
Definition bmconst.h:94
void for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
Functor for bit-copy (for testing)
Definition bmalgo.h:317
void add_bits(size_type offset, const unsigned char *bits, unsigned size)
Definition bmalgo.h:326
BV::size_type size_type
Definition bmalgo.h:318
bit_visitor_callback_type func_
Definition bmalgo.h:339
void add_range(size_type offset, size_type size)
Definition bmalgo.h:332
private adaptor for C-style callbacks
Definition bmalgo.h:288
bit_visitor_callback_type func_
Definition bmalgo.h:307
void add_range(size_type offset, size_type size)
Definition bmalgo.h:300
bit_vitor_callback_adaptor(void *h, bit_visitor_callback_type cb_func)
Definition bmalgo.h:291
void add_bits(size_type offset, const unsigned char *bits, unsigned size)
Definition bmalgo.h:295
Distance metric descriptor, holds metric code and result.
Definition bmalgo_impl.h:88