BitMagic-C++
bmalgo_impl.h
Go to the documentation of this file.
1#ifndef BMALGO_IMPL__H__INCLUDED__
2#define BMALGO_IMPL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmalgo_impl.h
22 \brief Algorithms for bvector<>
23*/
24
25#ifdef _MSC_VER
26#pragma warning( push )
27#pragma warning( disable : 4311 4312 4127)
28#endif
29
30#include "bmdef.h"
31#include "bmutil.h"
32
33namespace bm
34{
35
36/*!
37 @defgroup setalgo bvector<> algorithms
38
39 Set algebra algorithms using bit-vectors, arrays.
40 Binary metrics and distances. Random sub-sets.
41
42 @ingroup bvector
43 */
44
45/*!
46 @defgroup distance Binary-distance metrics
47
48 Distance metrics and algorithms to compute binary distances
49 @ingroup setalgo
50 */
51
52
53/*!
54 \brief Distance metrics codes defined for vectors A and B
55 \ingroup distance
56*/
58{
59 COUNT_AND = set_COUNT_AND, //!< (A & B).count()
60 COUNT_XOR = set_COUNT_XOR, //!< (A ^ B).count()
61 COUNT_OR = set_COUNT_OR, //!< (A | B).count()
62 COUNT_SUB_AB = set_COUNT_SUB_AB, //!< (A - B).count()
63 COUNT_SUB_BA = set_COUNT_SUB_BA, //!< (B - A).count()
64 COUNT_A = set_COUNT_A, //!< A.count()
65 COUNT_B = set_COUNT_B //!< B.count()
66};
67
68/**
69 Convert set operation into compatible distance metric
70 \ingroup distance
71*/
72inline
74{
76 if (op == set_COUNT) op = set_COUNT_B;
77 // distance metric is created as a set operation sub-class
78 // simple cast will work to convert
79 return (distance_metric) op;
80}
81
82/*!
83 \brief Distance metric descriptor, holds metric code and result.
84 \sa distance_operation
85*/
86
88{
89#ifdef BM64ADDR
90 typedef bm::id64_t size_type;
91#else
93#endif
94
97
106
107 /*!
108 \brief Sets metric result to 0
109 */
111 {
112 result = 0;
113 }
114};
115
116
117
118/*!
119 \brief Internal function computes different distance metrics.
120 \internal
121 \ingroup distance
122
123*/
124inline
126 const bm::word_t* arg_blk,
129
130{
131 gap_word_t* g1 = BMGAP_PTR(blk);
132 gap_word_t* g2 = BMGAP_PTR(arg_blk);
133
134 unsigned gap = BM_IS_GAP(blk);
135 unsigned arg_gap = BM_IS_GAP(arg_blk);
136
137 if (gap) // first block GAP-type
138 {
139 if (arg_gap) // both blocks GAP-type
140 {
141 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
142 {
144
145 switch (dmd.metric)
146 {
147 case bm::COUNT_AND:
148 dmd.result += gap_count_and(g1, g2);
149 break;
150 case bm::COUNT_OR:
151 dmd.result += gap_count_or(g1, g2);
152 break;
153 case bm::COUNT_SUB_AB:
154 dmd.result += gap_count_sub(g1, g2);
155 break;
156 case bm::COUNT_SUB_BA:
157 dmd.result += gap_count_sub(g2, g1);
158 break;
159 case bm::COUNT_XOR:
160 dmd.result += gap_count_xor(g1, g2);
161 break;
162 case bm::COUNT_A:
163 dmd.result += gap_bit_count_unr(g1);
164 break;
165 case bm::COUNT_B:
166 dmd.result += gap_bit_count_unr(g2);
167 break;
168 default:
169 BM_ASSERT(0);
170 } // switch
171
172 } // for it
173
174 return;
175
176 }
177 else // first block - GAP, argument - BITSET
178 {
179 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
180 {
182
183 switch (dmd.metric)
184 {
185 case bm::COUNT_AND:
186 if (arg_blk)
187 dmd.result += gap_bitset_and_count(arg_blk, g1);
188 break;
189 case bm::COUNT_OR:
190 if (!arg_blk)
191 dmd.result += gap_bit_count_unr(g1);
192 else
193 dmd.result += gap_bitset_or_count(arg_blk, g1);
194 break;
195 case bm::COUNT_SUB_AB:
196 {
198
199 gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
200 dmd.result +=
201 bit_operation_sub_count((bm::word_t*)temp_bit_blk, arg_blk);
202 }
203 break;
204 case bm::COUNT_SUB_BA:
205 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
207 blk,
208 it, it+1);
209 dmd.metric = bm::COUNT_SUB_BA; // restore status quo
210 break;
211 case bm::COUNT_XOR:
212 if (!arg_blk)
213 dmd.result += gap_bit_count_unr(g1);
214 else
215 dmd.result += gap_bitset_xor_count(arg_blk, g1);
216 break;
217 case bm::COUNT_A:
218 if (g1)
219 dmd.result += gap_bit_count_unr(g1);
220 break;
221 case bm::COUNT_B:
222 if (arg_blk)
223 {
224 dmd.result +=
225 bit_block_count(arg_blk);
226 }
227 break;
228 default:
229 BM_ASSERT(0);
230 } // switch
231
232 } // for it
233
234 return;
235
236 }
237 }
238 else // first block is BITSET-type
239 {
240 if (arg_gap) // second argument block is GAP-type
241 {
242 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
243 {
245
246 switch (dmd.metric)
247 {
248 case bm::COUNT_AND:
249 if (blk)
250 dmd.result += gap_bitset_and_count(blk, g2);
251 break;
252 case bm::COUNT_OR:
253 if (!blk)
254 dmd.result += gap_bit_count_unr(g2);
255 else
256 dmd.result += gap_bitset_or_count(blk, g2);
257 break;
258 case bm::COUNT_SUB_AB:
259 if (blk)
260 dmd.result += gap_bitset_sub_count(blk, g2);
261 break;
262 case bm::COUNT_SUB_BA:
263 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
265 //arg_gap,
266 blk,
267 //gap,
268 it, it+1);
269 dmd.metric = bm::COUNT_SUB_BA; // restore status quo
270 break;
271 case bm::COUNT_XOR:
272 if (!blk)
273 dmd.result += gap_bit_count_unr(g2);
274 else
275 dmd.result += gap_bitset_xor_count(blk, g2);
276 break;
277 case bm::COUNT_A:
278 if (blk)
279 {
280 dmd.result +=
281 bit_block_count(blk);
282 }
283 break;
284 case bm::COUNT_B:
285 if (g2)
286 dmd.result += gap_bit_count_unr(g2);
287 break;
288 default:
289 BM_ASSERT(0);
290 } // switch
291
292 } // for it
293
294 return;
295 }
296 }
297
298 // --------------------------------------------
299 //
300 // Here we combine two plain bitblocks
301
302 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
303 {
307 if (gfunc)
308 {
309 dmd.result += (*gfunc)(blk, arg_blk);
310 }
311 else
312 {
313 switch (dmd.metric)
314 {
315 case bm::COUNT_A:
316 if (blk)
317 dmd.result += bm::bit_block_count(blk);
318 break;
319 case bm::COUNT_B:
320 if (arg_blk)
321 dmd.result += bm::bit_block_count(arg_blk);
322 break;
323 case bm::COUNT_AND:
324 case bm::COUNT_XOR:
325 case bm::COUNT_OR:
326 case bm::COUNT_SUB_AB:
327 case bm::COUNT_SUB_BA:
328 default:
329 BM_ASSERT(0);
330 } // switch
331 }
332
333 } // for it
334}
335
336/*!
337\brief Internal function computes AND distance.
338\internal
339\ingroup distance
340*/
341inline
343 const bm::word_t* arg_blk) BMNOEXCEPT
344{
345 unsigned gap = BM_IS_GAP(blk);
346 unsigned arg_gap = BM_IS_GAP(arg_blk);
347 if (gap) // first block GAP-type
348 {
349 if (arg_gap) // both blocks GAP-type
350 {
351 return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
352 }
353 else // first block - GAP, argument - BITSET
354 {
355 return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
356 }
357 }
358 else // first block is BITSET-type
359 {
360 if (arg_gap) // second argument block is GAP-type
361 {
362 return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
363 }
364 }
365
366 // --------------------------------------------
367 // Here we combine two plain bitblocks
368
369 return bit_operation_and_count(blk, arg_blk);
370}
371
372
373/*!
374 \brief Internal function computes different existense of distance metric.
375 \internal
376 \ingroup distance
377*/
378inline
380 unsigned gap,
381 const bm::word_t* arg_blk,
382 unsigned arg_gap,
385
386{
387 gap_word_t* res=0;
388
389 gap_word_t* g1 = BMGAP_PTR(blk);
390 gap_word_t* g2 = BMGAP_PTR(arg_blk);
391
392 if (gap) // first block GAP-type
393 {
394 if (arg_gap) // both blocks GAP-type
395 {
396 gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
397
398 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
399 {
401 if (dmd.result)
402 {
403 continue;
404 }
405 res = 0;
406 unsigned dsize = 0;
407 switch (dmd.metric)
408 {
409 case bm::COUNT_AND:
410 dmd.result += gap_operation_any_and(g1, g2);
411 break;
412 case bm::COUNT_OR:
413 res = gap_operation_or(g1, g2, tmp_buf, dsize);
414 break;
415 case bm::COUNT_SUB_AB:
416 dmd.result += gap_operation_any_sub(g1, g2);
417 break;
418 case bm::COUNT_SUB_BA:
419 dmd.result += gap_operation_any_sub(g2, g1);
420 break;
421 case bm::COUNT_XOR:
422 dmd.result += gap_operation_any_xor(g1, g2);
423 break;
424 case bm::COUNT_A:
425 res = g1;
426 break;
427 case bm::COUNT_B:
428 res = g2;
429 break;
430 default:
431 BM_ASSERT(0);
432 } // switch
433 if (res)
434 dmd.result += !gap_is_all_zero(res);
435
436 } // for it
437
438 return;
439
440 }
441 else // first block - GAP, argument - BITSET
442 {
443 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
444 {
446 if (dmd.result)
447 {
448 continue;
449 }
450
451 switch (dmd.metric)
452 {
453 case bm::COUNT_AND:
454 if (arg_blk)
455 dmd.result += gap_bitset_and_any(arg_blk, g1);
456 break;
457 case bm::COUNT_OR:
458 if (!arg_blk)
459 dmd.result += !gap_is_all_zero(g1);
460 else
461 dmd.result += gap_bitset_or_any(arg_blk, g1);
462 break;
463 case bm::COUNT_SUB_AB:
464 {
466 gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
467 dmd.result +=
468 bit_operation_sub_any((bm::word_t*)temp_blk, arg_blk);
469 }
470 break;
471 case bm::COUNT_SUB_BA:
472 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
474 arg_gap,
475 blk,
476 gap,
477 it, it+1);
478 dmd.metric = bm::COUNT_SUB_BA; // restore status quo
479 break;
480 case bm::COUNT_XOR:
481 if (!arg_blk)
482 dmd.result += !gap_is_all_zero(g1);
483 else
484 dmd.result += gap_bitset_xor_any(arg_blk, g1);
485 break;
486 case bm::COUNT_A:
487 if (g1)
488 dmd.result += !gap_is_all_zero(g1);
489 break;
490 case bm::COUNT_B:
491 if (arg_blk)
492 {
493 dmd.result +=
494 !bit_is_all_zero(arg_blk);
495 }
496 break;
497 default:
498 BM_ASSERT(0);
499 } // switch
500
501 } // for it
502
503 return;
504
505 }
506 }
507 else // first block is BITSET-type
508 {
509 if (arg_gap) // second argument block is GAP-type
510 {
511 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
512 {
514 if (dmd.result)
515 {
516 continue;
517 }
518
519 switch (dmd.metric)
520 {
521 case bm::COUNT_AND:
522 if (blk)
523 dmd.result += gap_bitset_and_any(blk, g2);
524 break;
525 case bm::COUNT_OR:
526 if (!blk)
527 dmd.result += !gap_is_all_zero(g2);
528 else
529 dmd.result += gap_bitset_or_any(blk, g2);
530 break;
531 case bm::COUNT_SUB_AB:
532 if (blk)
533 dmd.result += gap_bitset_sub_any(blk, g2);
534 break;
535 case bm::COUNT_SUB_BA:
536 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
538 arg_gap,
539 blk,
540 gap,
541 it, it+1);
542 dmd.metric = bm::COUNT_SUB_BA; // restore status quo
543 break;
544 case bm::COUNT_XOR:
545 if (!blk)
546 dmd.result += !gap_is_all_zero(g2);
547 else
548 dmd.result += gap_bitset_xor_any(blk, g2);
549 break;
550 case bm::COUNT_A:
551 if (blk)
552 {
553 dmd.result+=
555 }
556 break;
557 case bm::COUNT_B:
558 if (g2)
559 dmd.result += !gap_is_all_zero(g2);
560 break;
561 default:
562 BM_ASSERT(0);
563 } // switch
564
565 } // for it
566
567 return;
568 }
569 }
570
571 // --------------------------------------------
572 //
573 // Here we combine two plain bitblocks
574
575 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
576 {
578 if (dmd.result)
579 {
580 continue;
581 }
582
583 switch (dmd.metric)
584 {
585 case bm::COUNT_AND:
586 dmd.result +=
587 bit_operation_and_any(blk, arg_blk);
588 break;
589 case bm::COUNT_OR:
590 dmd.result +=
591 bit_operation_or_any(blk, arg_blk);
592 break;
593 case bm::COUNT_SUB_AB:
594 dmd.result +=
595 bit_operation_sub_any(blk, arg_blk);
596 break;
597 case bm::COUNT_SUB_BA:
598 dmd.result +=
599 bit_operation_sub_any(arg_blk, blk);
600 break;
601 case bm::COUNT_XOR:
602 dmd.result +=
603 bit_operation_xor_any(blk, arg_blk);
604 break;
605 case bm::COUNT_A:
606 if (blk)
607 dmd.result += !bit_is_all_zero(blk);
608 break;
609 case bm::COUNT_B:
610 if (arg_blk)
611 dmd.result += !bit_is_all_zero(arg_blk);
612 break;
613 default:
614 BM_ASSERT(0);
615 } // switch
616
617 } // for it
618}
619
620
621
622/*!
623 Convenience internal function to compute combine count for one metric
624 \internal
625 \ingroup distance
626*/
627inline
628unsigned
630 const bm::word_t* arg_blk,
632{
633 distance_metric_descriptor dmd(metric);
635 arg_blk, //arg_gap,
636 &dmd, &dmd+1);
637 return unsigned(dmd.result);
638}
639
640
641/*!
642 Convenience internal function to compute combine any for one metric
643 \internal
644 \ingroup distance
645*/
646inline
649 unsigned gap,
650 const bm::word_t* arg_blk,
651 unsigned arg_gap,
653{
654 distance_metric_descriptor dmd(metric);
656 arg_blk, arg_gap,
657 &dmd, &dmd+1);
658 return dmd.result;
659}
660
661/*!
662 \brief Staging function for distance operation
663
664 \return temp block allocated (or NULL)
665
666 \internal
667*/
668inline
670 const distance_metric_descriptor* dmit_end,
671 bool* is_all_and) BMNOEXCEPT
672{
673 for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
674 {
675 if (it->metric != bm::COUNT_AND)
676 {
677 *is_all_and = false;
678 }
679 } // for
680}
681
682/*!
683 \brief Distance computing template function.
684
685 Function receives two bitvectors and an array of distance metrics
686 (metrics pipeline). Function computes all metrics saves result into
687 corresponding pipeline results (distance_metric_descriptor::result)
688 An important detail is that function reuses metric descriptors,
689 incrementing received values. It allows you to accumulate results
690 from different calls in the pipeline.
691
692 \param bv1 - argument bitvector 1 (A)
693 \param bv2 - argument bitvector 2 (B)
694 \param dmit - pointer to first element of metric descriptors array
695 Input-Output parameter, receives metric code as input,
696 computation is added to "result" field
697 \param dmit_end - pointer to (last+1) element of metric descriptors array
698 \ingroup distance
699
700*/
701template<class BV>
702void distance_operation(const BV& bv1,
703 const BV& bv2,
706{
707 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
708 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
709
710 bool is_all_and = true; // flag is distance operation is just COUNT_AND
711 distance_stage(dmit, dmit_end, &is_all_and);
712
713 bm::word_t*** blk_root = bman1.top_blocks_root();
714 typename BV::size_type block_idx = 0;
715 unsigned i, j;
716
717 const bm::word_t* blk;
718 const bm::word_t* arg_blk;
719
720 unsigned top_block_size = bman1.top_block_size();
721 unsigned ebs2 = bman2.top_block_size();
722 unsigned top_size;
723 if (ebs2 > top_block_size)
724 top_size = ebs2;
725 else
726 top_size = top_block_size;
727
728 for (i = 0; i < top_size; ++i)
729 {
730 bm::word_t** blk_blk = (blk_root && (i < top_block_size)) ? blk_root[i] : 0;
731 if (!blk_blk)
732 {
733 // AND operation requested - we can skip this portion here
734 if (is_all_and)
735 {
736 block_idx += bm::set_sub_array_size;
737 continue;
738 }
739 const bm::word_t* const* bvbb = bman2.get_topblock(i);
740 if (bvbb == 0)
741 {
742 block_idx += bm::set_sub_array_size;
743 continue;
744 }
745
746 blk = 0;
747 for (j = 0; j < bm::set_sub_array_size; ++j,++block_idx)
748 {
749 arg_blk = bman2.get_block(i, j);
750 if (!arg_blk)
751 continue;
753 arg_blk,
754 dmit, dmit_end);
755 } // for j
756 continue;
757 }
758
759 for (j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
760 {
761 blk = bman1.get_block(i, j);
762 if (blk == 0 && is_all_and)
763 continue;
764
765 arg_blk = bman2.get_block(i, j);
766
767 if (!blk & !arg_blk)
768 continue;
769
771 arg_blk,
772 dmit, dmit_end);
773 } // for j
774
775 } // for i
776}
777
778
779/*!
780\brief Distance AND computing template function.
781
782
783\param bv1 - argument bitvector 1 (A)
784\param bv2 - argument bitvector 2 (B)
785\ingroup distance
786
787*/
788template<class BV>
789typename BV::size_type distance_and_operation(const BV& bv1,
790 const BV& bv2) BMNOEXCEPT
791{
792 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
793 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
794
795 if (!bman1.is_init() || !bman2.is_init())
796 return 0;
797
798 bm::word_t*** blk_root = bman1.top_blocks_root();
799 bm::word_t*** blk_root_arg = bman2.top_blocks_root();
800 typename BV::size_type count = 0;
801
802 unsigned top_block_size =
803 bm::min_value(bman1.top_block_size(),bman2.top_block_size());
804
805 for (unsigned i = 0; i < top_block_size; ++i)
806 {
807 bm::word_t** blk_blk;
808 bm::word_t** blk_blk_arg;
809 if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
810 continue;
811
812 if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
813 blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
814 if ((bm::word_t*)blk_blk_arg == FULL_BLOCK_FAKE_ADDR)
815 blk_blk_arg = FULL_SUB_BLOCK_REAL_ADDR;
816
817 for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
818 {
819 (blk_blk[j] && blk_blk_arg[j]) ?
820 count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j]), BLOCK_ADDR_SAN(blk_blk_arg[j]))
821 :0;
822 (blk_blk[j+1] && blk_blk_arg[j+1]) ?
823 count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+1]), BLOCK_ADDR_SAN(blk_blk_arg[j+1]))
824 :0;
825 (blk_blk[j+2] && blk_blk_arg[j+2]) ?
826 count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+2]), BLOCK_ADDR_SAN(blk_blk_arg[j+2]))
827 :0;
828 (blk_blk[j+3] && blk_blk_arg[j+3]) ?
829 count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+3]), BLOCK_ADDR_SAN(blk_blk_arg[j+3]))
830 :0;
831 } // for j
832
833 } // for i
834 return count;
835}
836
837
838/*!
839 \brief Distance screening template function.
840
841 Function receives two bitvectors and an array of distance metrics
842 (metrics pipeline). Function computes possybility of a metric(any bit)
843 saves result into corresponding pipeline results
844 (distance_metric_descriptor::result)
845 An important detail is that function reuses metric descriptors,
846 incrementing received values. It allows you to accumulate results
847 from different calls in the pipeline.
848
849 \param bv1 - argument bitvector 1 (A)
850 \param bv2 - argument bitvector 2 (B)
851 \param dmit - pointer to first element of metric descriptors array
852 Input-Output parameter, receives metric code as input,
853 computation is added to "result" field
854 \param dmit_end - pointer to (last+1) element of metric descriptors array
855 \ingroup distance
856*/
857template<class BV>
858void distance_operation_any(const BV& bv1,
859 const BV& bv2,
862{
863 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
864 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
865
866 bool is_all_and = true; // flag is distance operation is just COUNT_AND
867 distance_stage(dmit, dmit_end, &is_all_and);
868
869 bm::word_t*** blk_root = bman1.top_blocks_root();
870 unsigned block_idx = 0;
871 unsigned i, j;
872
873 const bm::word_t* blk;
874 const bm::word_t* arg_blk;
875 bool blk_gap;
876 bool arg_gap;
877
878 unsigned top_block_size = bman1.top_block_size();
879 unsigned ebs2 = bman2.top_block_size();
880 unsigned top_size;
881 if (ebs2 > top_block_size)
882 top_size = ebs2;
883 else
884 top_size = top_block_size;
885
886 for (i = 0; i < top_size; ++i)
887 {
888 bm::word_t** blk_blk = (blk_root && (i < top_block_size)) ? blk_root[i] : 0;
889 if (blk_blk == 0) // not allocated
890 {
891 // AND operation requested - we can skip this portion here
892 if (is_all_and)
893 {
894 block_idx += bm::set_sub_array_size;
895 continue;
896 }
897
898 const bm::word_t* const* bvbb = bman2.get_topblock(i);
899 if (bvbb == 0)
900 {
901 block_idx += bm::set_sub_array_size;
902 continue;
903 }
904
905 blk = 0;
906 blk_gap = false;
907
908 for (j = 0; j < bm::set_sub_array_size; ++j,++block_idx)
909 {
910 arg_blk = bman2.get_block(i, j);
911 if (!arg_blk)
912 continue;
913 arg_gap = BM_IS_GAP(arg_blk);
914
916 arg_blk, arg_gap,
917 dmit, dmit_end);
918
919 // check if all distance requests alredy resolved
920 bool all_resolved = false;
922 do
923 {
924 if (!it->result)
925 {
926 all_resolved = false;
927 break;
928 }
929 ++it;
930 } while (it < dmit_end);
931 if (all_resolved)
932 return;
933 } // for j
934
935 continue;
936 }
937
938 for (j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
939 {
940 blk = bman1.get_block(i, j);
941 if (blk == 0 && is_all_and)
942 continue;
943
944 arg_blk = bman2.get_block(i, j);
945
946 if (blk == 0 && arg_blk == 0)
947 continue;
948
949 arg_gap = BM_IS_GAP(arg_blk);
950 blk_gap = BM_IS_GAP(blk);
951
953 arg_blk, arg_gap,
954 dmit, dmit_end);
955
956 // check if all distance requests alredy resolved
957 bool all_resolved = true;
959 do
960 {
961 if (!it->result)
962 {
963 all_resolved = false;
964 break;
965 }
966 ++it;
967 } while (it < dmit_end);
968 if (all_resolved)
969 return;
970
971 } // for j
972
973 } // for i
974}
975
976
977
978/**
979 \brief Internal algorithms scans the input for the block range limit
980 \internal
981*/
982template<typename It, typename SIZE_TYPE>
983It block_range_scan(It first, It last,
984 SIZE_TYPE nblock, SIZE_TYPE* max_id) BMNOEXCEPT
985{
986 SIZE_TYPE m = *max_id;
987 It right;
988 for (right = first; right != last; ++right)
989 {
990 SIZE_TYPE v = SIZE_TYPE(*right);
992 if (v >= m)
993 m = v;
994 SIZE_TYPE nb = v >> bm::set_block_shift;
995 if (nb != nblock)
996 break;
997 }
998 *max_id = m;
999 return right;
1000}
1001
1002/**
1003 \brief OR Combine bitvector and the iterable sequence
1004
1005 Algorithm combines bvector with sequence of integers
1006 (represents another set). When the agrument set is sorted
1007 this method can give serious increase in performance.
1008
1009 \param bv - destination bitvector
1010 \param first - first element of the iterated sequence
1011 \param last - last element of the iterated sequence
1012
1013 \ingroup setalgo
1014*/
1015template<class BV, class It>
1016void combine_or(BV& bv, It first, It last)
1017{
1018 typedef typename BV::size_type size_type;
1019 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1020 if (!bman.is_init())
1021 bman.init_tree();
1022
1023 size_type max_id = 0;
1024
1025 while (first < last)
1026 {
1027 typename BV::block_idx_type nblock = (*first) >> bm::set_block_shift;
1028 It right = bm::block_range_scan(first, last, nblock, &max_id);
1029 if (max_id >= bv.size())
1030 {
1031 BM_ASSERT(max_id < bm::id_max);
1032 bv.resize(max_id + 1);
1033 }
1034
1035 // now we have one in-block array of bits to set
1036
1037 label1:
1038
1039 int block_type;
1040 bm::word_t* blk =
1041 bman.check_allocate_block(nblock,
1042 true,
1043 bv.get_new_blocks_strat(),
1044 &block_type);
1045 if (!blk)
1046 continue;
1047
1048 if (block_type == 1) // gap
1049 {
1050 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1051 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1052
1053 for (; first < right; ++first)
1054 {
1055 unsigned is_set;
1056 unsigned nbit = (*first) & bm::set_block_mask;
1057
1058 unsigned new_block_len =
1059 bm::gap_set_value(true, gap_blk, nbit, &is_set);
1060 if (new_block_len > threshold)
1061 {
1062 bman.extend_gap_block(nblock, gap_blk);
1063 ++first;
1064 goto label1; // block pointer changed - goto reset
1065 }
1066 }
1067 }
1068 else // bit block
1069 {
1070 for (; first < right; ++first)
1071 {
1072 size_type pos = *first;
1073 unsigned nbit = unsigned(pos & bm::set_block_mask);
1074 unsigned nword = unsigned(nbit >> bm::set_word_shift);
1075 nbit &= bm::set_word_mask;
1076 blk[nword] |= (1u << nbit);
1077 } // for
1078 }
1079 } // while
1080}
1081
1082
1083/**
1084 \brief XOR Combine bitvector and the iterable sequence
1085
1086 Algorithm combines bvector with sequence of integers
1087 (represents another set). When the agrument set is sorted
1088 this method can give serious increase in performance.
1089
1090 \param bv - destination bitvector
1091 \param first - first element of the iterated sequence
1092 \param last - last element of the iterated sequence
1093
1094 \ingroup setalgo
1095*/
1096template<class BV, class It>
1097void combine_xor(BV& bv, It first, It last)
1098{
1099 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1100 if (!bman.is_init())
1101 bman.init_tree();
1102
1103 typename BV::size_type max_id = 0;
1104
1105 while (first < last)
1106 {
1107 typename BV::block_idx_type nblock = ((*first) >> bm::set_block_shift);
1108 It right = block_range_scan(first, last, nblock, &max_id);
1109
1110 if (max_id >= bv.size())
1111 {
1112 BM_ASSERT(max_id < bm::id_max);
1113 bv.resize(max_id + 1);
1114 }
1115
1116 // now we have one in-block array of bits to set
1117
1118 label1:
1119
1120 int block_type;
1121 bm::word_t* blk =
1122 bman.check_allocate_block(nblock,
1123 true,
1124 bv.get_new_blocks_strat(),
1125 &block_type,
1126 false /* no null return */);
1127 BM_ASSERT(blk);
1128
1129 if (block_type == 1) // gap
1130 {
1131 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1132 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1133
1134 for (; first < right; ++first)
1135 {
1136 unsigned is_set;
1137 unsigned nbit = (*first) & bm::set_block_mask;
1138
1139 is_set = bm::gap_test_unr(gap_blk, nbit);
1140 BM_ASSERT(is_set <= 1);
1141 is_set ^= 1;
1142
1143 unsigned new_block_len =
1144 gap_set_value(is_set, gap_blk, nbit, &is_set);
1145 if (new_block_len > threshold)
1146 {
1147 bman.extend_gap_block(nblock, gap_blk);
1148 ++first;
1149 goto label1; // block pointer changed - goto reset
1150 }
1151 }
1152 }
1153 else // bit block
1154 {
1155 for (; first < right; ++first)
1156 {
1157 unsigned nbit = unsigned(*first & bm::set_block_mask);
1158 unsigned nword = unsigned(nbit >> bm::set_word_shift);
1159 nbit &= bm::set_word_mask;
1160 blk[nword] ^= (1u << nbit);
1161 } // for
1162 }
1163 } // while
1164
1165 bv.forget_count();
1166}
1167
1168
1169
1170/**
1171 \brief SUB Combine bitvector and the iterable sequence
1172
1173 Algorithm combines bvector with sequence of integers
1174 (represents another set). When the agrument set is sorted
1175 this method can give serious increase in performance.
1176
1177 \param bv - destination bitvector
1178 \param first - first element of the iterated sequence
1179 \param last - last element of the iterated sequence
1180
1181 \ingroup setalgo
1182*/
1183template<class BV, class It>
1184void combine_sub(BV& bv, It first, It last)
1185{
1186 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1187 if (!bman.is_init())
1188 bman.init_tree();
1189
1190 typename BV::size_type max_id = 0;
1191
1192 while (first < last)
1193 {
1194 typename BV::block_idx_type nblock = (*first) >> bm::set_block_shift;
1195 It right = bm::block_range_scan(first, last, nblock, &max_id);
1196
1197 if (max_id >= bv.size())
1198 {
1199 BM_ASSERT(max_id < bm::id_max);
1200 bv.resize(max_id + 1);
1201 }
1202
1203 // now we have one in-block array of bits to set
1204
1205 label1:
1206
1207 int block_type;
1208 bm::word_t* blk =
1209 bman.check_allocate_block(nblock,
1210 false,
1211 bv.get_new_blocks_strat(),
1212 &block_type);
1213
1214 if (!blk)
1215 continue;
1216
1217 if (block_type == 1) // gap
1218 {
1219 bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1220 unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1221
1222 for (; first < right; ++first)
1223 {
1224 unsigned is_set;
1225 unsigned nbit = (*first) & bm::set_block_mask;
1226
1227 is_set = bm::gap_test_unr(gap_blk, nbit);
1228 if (!is_set)
1229 continue;
1230
1231 unsigned new_block_len =
1232 gap_set_value(false, gap_blk, nbit, &is_set);
1233 if (new_block_len > threshold)
1234 {
1235 bman.extend_gap_block(nblock, gap_blk);
1236 ++first;
1237 goto label1; // block pointer changed - goto reset
1238 }
1239 }
1240 }
1241 else // bit block
1242 {
1243 for (; first < right; ++first)
1244 {
1245 unsigned nbit = unsigned(*first & bm::set_block_mask);
1246 unsigned nword = unsigned(nbit >> bm::set_word_shift);
1247 nbit &= bm::set_word_mask;
1248 blk[nword] &= ~((bm::word_t)1 << nbit);
1249 } // for
1250 }
1251 } // while
1252
1253 bv.forget_count();
1254}
1255
1256/**
1257 \brief AND Combine bitvector and the iterable sequence
1258
1259 Algorithm combines bvector with sorted sequence of integers
1260 (represents another set).
1261
1262 \param bv - destination bitvector
1263 \param first - first element of the iterated sequence
1264 \param last - last element of the iterated sequence
1265
1266 \ingroup setalgo
1267*/
1268template<class BV, class It>
1269void combine_and_sorted(BV& bv, It first, It last)
1270{
1271 typename BV::size_type prev = 0;
1272 for ( ;first < last; ++first)
1273 {
1274 typename BV::size_type id = *first;
1275 BM_ASSERT(id >= prev); // make sure it's sorted
1276 bv.set_bit_and(id, true);
1277 if (++prev < id)
1278 {
1279 bv.set_range(prev, id-1, false);
1280 }
1281 prev = id;
1282 }
1283}
1284
1285
1286/**
1287 \brief AND Combine bitvector and the iterable sequence
1288
1289 Algorithm combines bvector with sequence of integers
1290 (represents another set). When the agrument set is sorted
1291 this method can give serious increase in performance.
1292
1293 \param bv - destination bitvector
1294 \param first - first element of the iterated sequence
1295 \param last - last element of the iterated sequence
1296
1297 \ingroup setalgo
1298 \sa combine_and_sorted
1299*/
1300template<class BV, class It>
1301void combine_and(BV& bv, It first, It last)
1302{
1303 BV bv_tmp;
1304 combine_or(bv_tmp, first, last);
1305 bv &= bv_tmp;
1306}
1307
1308/*!
1309 \brief Compute number of bit intervals (GAPs) in the bitvector
1310
1311 Algorithm traverses bit vector and count number of uninterrupted
1312 intervals of 1s and 0s.
1313 <pre>
1314 For example:
1315 empty vector - 1 interval
1316 00001111100000 - gives us 3 intervals
1317 10001111100000 - 4 intervals
1318 00000000000000 - 1 interval
1319 11111111111111 - 1 interval
1320 </pre>
1321 \return Number of intervals
1322 \ingroup setalgo
1323*/
1324template<class BV>
1325typename BV::size_type count_intervals(const BV& bv)
1326{
1327 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1328
1329 if (!bman.is_init())
1330 return 1;
1331
1332 bm::word_t*** blk_root = bman.top_blocks_root();
1333 typename BV::blocks_manager_type::block_count_change_func func(bman);
1334 typename BV::blocks_manager_type::block_idx_type st = 0;
1335 bm::for_each_block(blk_root, bman.top_block_size(), func, st);
1336
1337 typename BV::size_type intervals = func.count();
1338 bool last_bit_set = bv.test(bm::id_max-1);
1339
1340 intervals -= last_bit_set; // correct last (out of range) interval
1341 return intervals;
1342}
1343
1344/*!
1345 \brief Export bitset from an array of binary data representing
1346 the bit vector.
1347
1348 Input array can be array of ints, chars or other native C types.
1349 Method works with iterators, which makes it compatible with
1350 STL containers and C arrays
1351
1352 \param bv - destination bitvector
1353 \param first - first element of the iterated sequence
1354 \param last - last element of the iterated sequence
1355
1356 \ingroup setalgo
1357*/
1358template<typename BV, class It>
1359void export_array(BV& bv, It first, It last)
1360{
1361 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1362 if (!bman.is_init())
1363 bman.init_tree();
1364
1365 unsigned inp_word_size = sizeof(*first);
1366 size_t array_size = size_t(last - first);
1367 size_t bit_cnt = array_size * inp_word_size * 8;
1368 int block_type;
1369 bm::word_t tmp;
1370 bm::word_t b1, b2, b3, b4;
1371
1372 if (bit_cnt >= bv.size())
1373 bv.resize((bm::id_t)bit_cnt + 1);
1374 else
1375 bv.set_range((typename BV::size_type)bit_cnt, bv.size() - 1, false);
1376 switch (inp_word_size)
1377 {
1378 case 1:
1379 {
1380 size_t word_cnt = array_size / 4;
1381 for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1382 {
1383 bm::word_t* blk =
1384 bman.check_allocate_block(i,
1385 false,
1386 BM_BIT,
1387 &block_type,
1388 false /*no NULL ret*/);
1389 if (block_type == 1) // gap
1390 {
1391 blk = bman.deoptimize_block(i);
1392 }
1393
1394 bm::word_t* wrd_ptr = blk;
1395 if (word_cnt >= bm::set_block_size) {
1396 bm::word_t* wrd_end = blk + bm::set_block_size;
1397 do {
1398 b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1399 b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1400 tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1401 *wrd_ptr++ = tmp;
1402 } while (wrd_ptr < wrd_end);
1403 word_cnt -= bm::set_block_size;
1404 }
1405 else
1406 {
1407 size_t to_convert = size_t(last - first);
1408 for (size_t j = 0; j < to_convert / 4; ++j)
1409 {
1410 b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1411 b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1412 tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1413 *wrd_ptr++ = tmp;
1414 }
1415 while (1)
1416 {
1417 if (first == last) break;
1418 *wrd_ptr = bm::word_t(*first++);
1419 if (first == last) break;
1420 *wrd_ptr |= bm::word_t(*first++) << 8u;
1421 if (first == last) break;
1422 *wrd_ptr |= bm::word_t(*first++) << 16u;
1423 if (first == last) break;
1424 *wrd_ptr |= bm::word_t(*first++) << 24u;
1425 ++wrd_ptr;
1426 }
1427 }
1428 if (first == last) break;
1429 } // for
1430 }
1431 break;
1432 case 2:
1433 {
1434 size_t word_cnt = array_size / 2;
1435 for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1436 {
1437 bm::word_t* blk =
1438 bman.check_allocate_block(i,
1439 false,
1440 BM_BIT,
1441 &block_type,
1442 false /*no NULL ret*/);
1443 if (block_type) // gap
1444 blk = bman.deoptimize_block(i);
1445
1446 bm::word_t* wrd_ptr = blk;
1447 if (word_cnt >= bm::set_block_size) {
1448 bm::word_t* wrd_end = blk + bm::set_block_size;
1449 do {
1450 b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1451 tmp = b1 | (b2 << 16);
1452 *wrd_ptr++ = tmp;
1453 } while (wrd_ptr < wrd_end);
1454 word_cnt -= bm::set_block_size;
1455 }
1456 else
1457 {
1458 size_t to_convert = size_t(last - first);
1459 for (unsigned j = 0; j < to_convert / 2; ++j)
1460 {
1461 b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1462 tmp = b1 | (b2 << 16u);
1463 *wrd_ptr++ = tmp;
1464 }
1465 while (1)
1466 {
1467 if (first == last) break;
1468 *wrd_ptr = bm::word_t(*first++);
1469 if (first == last) break;
1470 *wrd_ptr |= bm::word_t((*first++) << 16u);
1471 ++wrd_ptr;
1472 }
1473 }
1474 if (first == last) break;
1475 } // for
1476 }
1477 break;
1478 case 4:
1479 {
1480 size_t word_cnt = array_size;
1481 for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1482 {
1483 bm::word_t* blk =
1484 bman.check_allocate_block(i,
1485 false,
1486 BM_BIT,
1487 &block_type,
1488 false /*no NULL ret*/);
1489 if (block_type == 1) // gap
1490 blk = bman.deoptimize_block(i);
1491
1492 bm::word_t* wrd_ptr = blk;
1493 if (word_cnt >= bm::set_block_size) {
1494 bm::word_t* wrd_end = blk + bm::set_block_size;
1495 do
1496 {
1497 *wrd_ptr++ = bm::word_t(*first++);
1498 } while (wrd_ptr < wrd_end);
1499 word_cnt -= bm::set_block_size;
1500 }
1501 else
1502 {
1503 while (1)
1504 {
1505 if (first == last) break;
1506 *wrd_ptr = bm::word_t(*first++);
1507 ++wrd_ptr;
1508 }
1509 }
1510 if (first == last) break;
1511 } // for
1512 }
1513 break;
1514 default:
1515 BM_ASSERT(0);
1516 } // switch
1517
1518}
1519
1520
1521/*!
1522 \brief for-each visitor, calls a visitor functor for each 1 bit group
1523
1524 \param block - bit block buffer pointer
1525 \param offset - global block offset (number of bits)
1526 \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
1527
1528 @ingroup bitfunc
1529 @internal
1530*/
1531template<typename Func, typename SIZE_TYPE>
1532void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
1533 Func& bit_functor)
1534{
1535 BM_ASSERT(block);
1536 if (IS_FULL_BLOCK(block))
1537 {
1538 bit_functor.add_range(offset, bm::gap_max_bits);
1539 return;
1540 }
1541 unsigned char bits[bm::set_bitscan_wave_size*32];
1542
1543 SIZE_TYPE offs = offset;
1544 unsigned cnt;
1545 const word_t* block_end = block + bm::set_block_size;
1546 do
1547 {
1548 cnt = bm::bitscan_wave(block, bits);
1549 if (cnt)
1550 bit_functor.add_bits(offs, bits, cnt);
1551 offs += bm::set_bitscan_wave_size * 32;
1553 } while (block < block_end);
1554}
1555
1556/*!
1557 \brief for-each range visitor, calls a visitor functor for each 1 bit group
1558
1559 \param block - bit block buffer pointer
1560 \param offset - global block offset (number of bits)
1561 \param left - bit addredd in block from [from..to]
1562 \param right - bit addredd in block to [from..to]
1563 \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
1564
1565 @ingroup bitfunc
1566 @internal
1567*/
1568template<typename Func, typename SIZE_TYPE>
1569void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
1570 unsigned left, unsigned right,
1571 Func& bit_functor)
1572{
1573 BM_ASSERT(block);
1574 BM_ASSERT(left <= right);
1576
1577 if (IS_FULL_BLOCK(block))
1578 {
1579 unsigned sz = right - left + 1;
1580 bit_functor.add_range(offset + left, sz);
1581 return;
1582 }
1583 unsigned char bits[bm::set_bitscan_wave_size*32];
1584
1585 unsigned cnt, nword, nbit, bitcount, temp;
1586 nbit = left & bm::set_word_mask;
1587 const bm::word_t* word =
1588 block + (nword = unsigned(left >> bm::set_word_shift));
1589 if (left == right) // special case (only 1 bit to check)
1590 {
1591 if ((*word >> nbit) & 1u)
1592 {
1593 bits[0] = (unsigned char)nbit;
1594 bit_functor.add_bits(offset + (nword * 32), bits, 1);
1595 }
1596 return;
1597 }
1598
1599 bitcount = right - left + 1u;
1600 if (nbit) // starting position is not aligned
1601 {
1602 unsigned right_margin = nbit + right - left;
1603 if (right_margin < 32)
1604 {
1605 unsigned mask =
1607 block_set_table<true>::_left[right_margin];
1608 temp = (*word & mask);
1609 cnt = bm::bitscan_popcnt(temp, bits);
1610 if (cnt)
1611 bit_functor.add_bits(offset + (nword * 32), bits, cnt);
1612
1613 return;
1614 }
1615 temp = *word & block_set_table<true>::_right[nbit];
1616 cnt = bm::bitscan_popcnt(temp, bits);
1617 if (cnt)
1618 bit_functor.add_bits(offset + (nword * 32), bits, cnt);
1619 bitcount -= 32 - nbit;
1620 ++word; ++nword;
1621 }
1622 else
1623 {
1624 bitcount = right - left + 1u;
1625 }
1627 // now when we are word aligned, we can scan the bit-stream
1628 // loop unrolled to evaluate 4 words at a time
1629 for ( ;bitcount >= 128;
1630 bitcount-=128, word+=bm::set_bitscan_wave_size,
1632 {
1633 cnt = bm::bitscan_wave(word, bits);
1634 if (cnt)
1635 bit_functor.add_bits(offset + (nword * 32), bits, cnt);
1636 } // for
1637
1638 for ( ;bitcount >= 32; bitcount-=32, ++word)
1639 {
1640 temp = *word;
1641 cnt = bm::bitscan_popcnt(temp, bits);
1642 if (cnt)
1643 bit_functor.add_bits(offset + (nword * 32), bits, cnt);
1644 ++nword;
1645 } // for
1646
1647 BM_ASSERT(bitcount < 32);
1648
1649 if (bitcount) // we have a tail to count
1650 {
1651 temp = *word & block_set_table<true>::_left[bitcount-1];
1652 cnt = bm::bitscan_popcnt(temp, bits);
1653 if (cnt)
1654 bit_functor.add_bits(offset + (nword * 32), bits, cnt);
1655 }
1656
1657}
1658
1659
1660
1661/*!
1662 \brief for-each visitor, calls a special visitor functor for each 1 bit range
1663
1664 \param buf - bit block buffer pointer
1665 \param offset - global block offset (number of bits)
1666 \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
1667
1668 @ingroup gapfunc
1669 @internal
1670*/
1671template<typename T, typename Func, typename SIZE_TYPE>
1672void for_each_gap_blk(const T* buf, SIZE_TYPE offset,
1673 Func& bit_functor)
1674{
1675 const T* pcurr = buf + 1;
1676 const T* pend = buf + (*buf >> 3);
1677
1678 if (*buf & 1)
1679 {
1680 bit_functor.add_range(offset, *pcurr + 1);
1681 ++pcurr;
1682 }
1683 for (++pcurr; pcurr <= pend; pcurr += 2)
1684 {
1685 T prev = *(pcurr-1);
1686 bit_functor.add_range(offset + prev + 1, *pcurr - prev);
1687 }
1688}
1689
1690/*!
1691 \brief for-each visitor, calls a special visitor functor for each 1 bit range
1692
1693 \param buf - bit block buffer pointer
1694 \param offset - global block offset (number of bits)
1695 \param left - interval start [left..right]
1696 \param right - intreval end [left..right]
1697 \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
1698
1699 @ingroup gapfunc
1700 @internal
1701*/
1702template<typename T, typename Func, typename SIZE_TYPE>
1704 SIZE_TYPE offset,
1705 unsigned left, unsigned right,
1706 Func& bit_functor)
1707{
1708 BM_ASSERT(left <= right);
1710
1711 unsigned is_set;
1712 unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1713 const T* BMRESTRICT pcurr = buf + start_pos;
1714
1715 if (is_set)
1716 {
1717 if (right <= *pcurr)
1718 {
1719 bit_functor.add_range(offset + left, (right + 1)-left);
1720 return;
1721 }
1722 bit_functor.add_range(offset + left, (*pcurr + 1)-left);
1723 ++pcurr;
1724 }
1725
1726 const T* BMRESTRICT pend = buf + (*buf >> 3);
1727 for (++pcurr; pcurr <= pend; pcurr += 2)
1728 {
1729 T prev = *(pcurr-1);
1730 if (right <= *pcurr)
1731 {
1732 int sz = int(right) - int(prev);
1733 if (sz > 0)
1734 bit_functor.add_range(offset + prev + 1, unsigned(sz));
1735 return;
1736 }
1737 bit_functor.add_range(offset + prev + 1, *pcurr - prev);
1738 } // for
1739}
1740
1741
1742
1743/*! For each non-zero block in [from, to] executes supplied functor
1744 \internal
1745*/
1746template<typename T, typename N, typename F>
1748 N top_size, N nb_from, N nb_to, F& f)
1749{
1750 BM_ASSERT(top_size);
1751 if (nb_from > nb_to)
1752 return;
1753 unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
1754 unsigned j_from = unsigned(nb_from & bm::set_array_mask);
1755 unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
1756 unsigned j_to = unsigned(nb_to & bm::set_array_mask);
1757
1758 if (i_from >= top_size)
1759 return;
1760 if (i_to >= top_size)
1761 {
1762 i_to = unsigned(top_size-1);
1763 j_to = bm::set_sub_array_size-1;
1764 }
1765
1766 for (unsigned i = i_from; i <= i_to; ++i)
1767 {
1768 T** blk_blk = root[i];
1769 if (!blk_blk)
1770 continue;
1771 if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1772 {
1773 unsigned j = (i == i_from) ? j_from : 0;
1774 if (!j && (i != i_to)) // full sub-block
1775 {
1776 N base_idx = bm::get_super_block_start<N>(i);
1777 f.add_range(base_idx, bm::set_sub_total_bits);
1778 }
1779 else
1780 {
1781 do
1782 {
1783 N base_idx = bm::get_block_start<N>(i, j);
1784 f.add_range(base_idx, bm::gap_max_bits);
1785 if ((i == i_to) && (j == j_to))
1786 return;
1787 } while (++j < bm::set_sub_array_size);
1788 }
1789 }
1790 else
1791 {
1792 unsigned j = (i == i_from) ? j_from : 0;
1793 do
1794 {
1795 const T* block;
1796 if (blk_blk[j])
1797 {
1798 N base_idx = bm::get_block_start<N>(i, j);
1799 if (0 != (block = blk_blk[j]))
1800 {
1801 if (BM_IS_GAP(block))
1802 {
1803 bm::for_each_gap_blk(BMGAP_PTR(block), base_idx, f);
1804 }
1805 else
1806 {
1807 bm::for_each_bit_blk(block, base_idx, f);
1808 }
1809 }
1810 }
1811
1812 if ((i == i_to) && (j == j_to))
1813 return;
1814 } while (++j < bm::set_sub_array_size);
1815 }
1816 } // for i
1817}
1818
1819
1820/**
1821 Implementation of for_each_bit_range without boilerplave checks
1822 @internal
1823*/
1824template<class BV, class Func>
1826 typename BV::size_type left,
1827 typename BV::size_type right,
1828 Func& bit_functor)
1829{
1830 typedef typename BV::size_type size_type;
1831 typedef typename BV::block_idx_type block_idx_type;
1832
1833 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1834 bm::word_t*** blk_root = bman.top_blocks_root();
1835 if (!blk_root)
1836 return;
1837
1838 block_idx_type nblock_left = (left >> bm::set_block_shift);
1839 block_idx_type nblock_right = (right >> bm::set_block_shift);
1840
1841 unsigned i0, j0;
1842 bm::get_block_coord(nblock_left, i0, j0);
1843 const bm::word_t* block = bman.get_block_ptr(i0, j0);
1844 unsigned nbit_left = unsigned(left & bm::set_block_mask);
1845 size_type offset = nblock_left * bm::bits_in_block;
1846
1847 if (nblock_left == nblock_right) // hit in the same block
1848 {
1849 if (!block)
1850 return;
1851 unsigned nbit_right = unsigned(right & bm::set_block_mask);
1852 if (BM_IS_GAP(block))
1853 {
1855 nbit_left, nbit_right, bit_functor);
1856 }
1857 else
1858 {
1859 bm::for_each_bit_blk(block, offset, nbit_left, nbit_right,
1860 bit_functor);
1861 }
1862 return;
1863 }
1864 // process left block
1865 if (nbit_left && block)
1866 {
1867 if (BM_IS_GAP(block))
1868 {
1870 nbit_left, bm::bits_in_block-1, bit_functor);
1871 }
1872 else
1873 {
1874 bm::for_each_bit_blk(block, offset, nbit_left, bm::bits_in_block-1,
1875 bit_functor);
1876 }
1877 ++nblock_left;
1878 }
1879
1880 // process all complete blocks in the middle
1881 {
1882 block_idx_type top_blocks_size = bman.top_block_size();
1883 bm::for_each_bit_block_range(blk_root, top_blocks_size,
1884 nblock_left, nblock_right-1, bit_functor);
1885 }
1886
1887 unsigned nbit_right = unsigned(right & bm::set_block_mask);
1888 bm::get_block_coord(nblock_right, i0, j0);
1889 block = bman.get_block_ptr(i0, j0);
1890
1891 if (block)
1892 {
1893 offset = nblock_right * bm::bits_in_block;
1894 if (BM_IS_GAP(block))
1895 {
1897 0, nbit_right, bit_functor);
1898 }
1899 else
1900 {
1901 bm::for_each_bit_blk(block, offset, 0, nbit_right, bit_functor);
1902 }
1903 }
1904}
1905
1906
1907
1908} // namespace bm
1909
1910#ifdef _MSC_VER
1911#pragma warning( pop )
1912#endif
1913
1914#endif
Definitions(internal)
#define IS_FULL_BLOCK(addr)
Definition bmdef.h:153
#define BMRESTRICT
Definition bmdef.h:193
#define BMNOEXCEPT
Definition bmdef.h:79
#define BMGAP_PTR(ptr)
Definition bmdef.h:179
#define BM_IS_GAP(ptr)
Definition bmdef.h:181
#define BLOCK_ADDR_SAN(addr)
Definition bmdef.h:151
#define BM_ASSERT
Definition bmdef.h:130
#define FULL_BLOCK_FAKE_ADDR
Definition bmdef.h:149
#define BM_VECT_ALIGN_ATTR
Definition bmdef.h:360
#define FULL_SUB_BLOCK_REAL_ADDR
Definition bmdef.h:150
#define BM_VECT_ALIGN
Definition bmdef.h:359
Bit manipulation primitives (internal)
bm::id_t bit_operation_and_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation test.
Definition bmfunc.h:6564
bm::id_t bit_operation_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock OR operation test.
Definition bmfunc.h:6716
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
Definition bmfunc.h:4379
bm::id_t bit_operation_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock XOR operation test.
Definition bmfunc.h:7381
bm::id_t bit_operation_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock test of SUB operation.
Definition bmfunc.h:6644
unsigned short bitscan_wave(const bm::word_t *BMRESTRICT w_ptr, unsigned char *BMRESTRICT bits) BMNOEXCEPT
Unpacks word wave (Nx 32-bit words)
Definition bmfunc.h:8433
void for_each_bit_blk(const bm::word_t *block, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a visitor functor for each 1 bit group
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start) BMNOEXCEPT
Returns "true" if all bits in the block are 0.
Definition bmfunc.h:1045
unsigned short bitscan_popcnt(bm::id_t w, B *bits, unsigned short offs) BMNOEXCEPT
Unpacks word into list of ON bit indexes using popcnt method.
Definition bmfunc.h:475
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation and calculates bitcount of the result.
Definition bmfunc.h:6540
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock SUB operation and calculates bitcount of the result.
Definition bmfunc.h:6589
set_operation
Codes of set operations.
Definition bmconst.h:153
@ set_COUNT_SUB_AB
Definition bmconst.h:163
@ set_COUNT_XOR
Definition bmconst.h:161
@ set_COUNT_OR
Definition bmconst.h:162
@ set_COUNT_B
Definition bmconst.h:166
@ set_COUNT_SUB_BA
Definition bmconst.h:164
@ set_COUNT_AND
Definition bmconst.h:160
@ set_COUNT
Definition bmconst.h:159
@ set_COUNT_A
Definition bmconst.h:165
@ BM_BIT
No GAP compression strategy. All new blocks are bit blocks.
Definition bmconst.h:143
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Distance computing template function.
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Distance screening template function.
distance_metric
Distance metrics codes defined for vectors A and B.
Definition bmalgo_impl.h:58
BV::size_type distance_and_operation(const BV &bv1, const BV &bv2) BMNOEXCEPT
Distance AND computing template function.
distance_metric operation2metric(set_operation op) BMNOEXCEPT
Convert set operation into compatible distance metric.
Definition bmalgo_impl.h:73
@ COUNT_XOR
(A ^ B).count()
Definition bmalgo_impl.h:60
@ COUNT_SUB_AB
(A - B).count()
Definition bmalgo_impl.h:62
@ COUNT_A
A.count()
Definition bmalgo_impl.h:64
@ COUNT_B
B.count()
Definition bmalgo_impl.h:65
@ COUNT_AND
(A & B).count()
Definition bmalgo_impl.h:59
@ COUNT_OR
(A | B).count()
Definition bmalgo_impl.h:61
@ COUNT_SUB_BA
(B - A).count()
Definition bmalgo_impl.h:63
gap_word_t * gap_operation_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize) BMNOEXCEPT
GAP OR operation.
Definition bmfunc.h:5790
unsigned gap_test_unr(const T *BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition bmfunc.h:1298
BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP XOR operation test.
Definition bmfunc.h:5749
void for_each_gap_blk_range(const T *BMRESTRICT buf, SIZE_TYPE offset, unsigned left, unsigned right, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
unsigned gap_bit_count_unr(const T *buf) BMNOEXCEPT
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition bmfunc.h:1801
unsigned gap_set_value(unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) BMNOEXCEPT
Sets or clears bit in the GAP buffer.
Definition bmfunc.h:2661
bm::id_t gap_bitset_or_any(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount test of bit block OR masked by GAP block.
Definition bmfunc.h:3822
bm::id_t gap_bitset_xor_count(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount of bit block XOR masked by GAP block.
Definition bmfunc.h:3714
bm::id_t gap_bitset_and_count(const unsigned *BMRESTRICT block, const T *BMRESTRICT pcurr) BMNOEXCEPT
Compute bitcount of bit block AND masked by GAP block.
Definition bmfunc.h:3583
BMFORCEINLINE unsigned gap_operation_any_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP SUB operation test.
Definition bmfunc.h:5860
bm::id_t gap_bitset_sub_count(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount of bit block SUB masked by GAP block.
Definition bmfunc.h:3641
BMFORCEINLINE unsigned gap_operation_any_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP AND operation test.
Definition bmfunc.h:5670
BMFORCEINLINE unsigned gap_count_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP bitcount XOR operation test.
Definition bmfunc.h:5765
bm::id_t gap_bitset_sub_any(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount test of bit block SUB masked by GAP block.
Definition bmfunc.h:3676
void for_each_gap_blk(const T *buf, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
bm::id_t gap_bitset_xor_any(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount test of bit block XOR masked by GAP block.
Definition bmfunc.h:3752
bm::id_t gap_bitset_or_count(const unsigned *BMRESTRICT block, const T *BMRESTRICT buf) BMNOEXCEPT
Compute bitcount of bit block OR masked by GAP block.
Definition bmfunc.h:3790
unsigned gap_count_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP bitcount AND operation test.
Definition bmfunc.h:5687
BMFORCEINLINE bool gap_is_all_zero(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Checks if GAP block is all-zero.
Definition bmfunc.h:1072
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf) BMNOEXCEPT
GAP block to bitblock conversion.
Definition bmfunc.h:3859
BMFORCEINLINE unsigned gap_count_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP bitcount SUB (AND NOT) operation test.
Definition bmfunc.h:5877
bm::id_t gap_bitset_and_any(const unsigned *BMRESTRICT block, const T *BMRESTRICT pcurr) BMNOEXCEPT
Bitcount test of bit block AND masked by GAP block.
Definition bmfunc.h:3611
unsigned gap_limit(const T *BMRESTRICT buf, const T *BMRESTRICT glevel_len) BMNOEXCEPT
Returs GAP block capacity limit.
Definition bmfunc.h:1130
BMFORCEINLINE unsigned gap_count_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2) BMNOEXCEPT
GAP bitcount OR operation test.
Definition bmfunc.h:5810
void combine_and_sorted(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
void export_array(BV &bv, It first, It last)
Export bitset from an array of binary data representing the bit vector.
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
void combine_xor(BV &bv, It first, It last)
XOR Combine bitvector and the iterable sequence.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
BV::size_type count_intervals(const BV &bv)
Compute number of bit intervals (GAPs) in the bitvector.
Definition bm.h:77
const unsigned set_array_mask
Definition bmconst.h:96
It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE *max_id) BMNOEXCEPT
Internal algorithms scans the input for the block range limit.
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT)
Definition bmfunc.h:8345
void for_each_bit_block_range(T ***root, N top_size, N nb_from, N nb_to, F &f)
const unsigned id_max
Definition bmconst.h:108
unsigned int word_t
Definition bmconst.h:38
const unsigned set_block_mask
Definition bmconst.h:56
unsigned gap_bfind(const T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) BMNOEXCEPT
Definition bmfunc.h:1220
const unsigned set_sub_array_size
Definition bmconst.h:94
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different distance metrics.
const unsigned set_total_blocks
Definition bmconst.h:110
void combine_any_operation_with_block(const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, unsigned arg_gap, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different existense of distance metric.
const unsigned set_word_shift
Definition bmconst.h:71
const unsigned set_sub_total_bits
Definition bmconst.h:99
const unsigned set_block_size
Definition bmconst.h:54
unsigned long long int id64_t
Definition bmconst.h:34
void distance_stage(const distance_metric_descriptor *dmit, const distance_metric_descriptor *dmit_end, bool *is_all_and) BMNOEXCEPT
Staging function for distance operation.
unsigned int id_t
Definition bmconst.h:37
const unsigned gap_max_buff_len
Definition bmconst.h:79
T min_value(T v1, T v2) BMNOEXCEPT
Get minimum of 2 values.
Definition bmutil.h:101
const unsigned short set_bitscan_wave_size
Size of bit decode wave in words.
Definition bmfunc.h:8421
const unsigned set_array_shift
Definition bmconst.h:95
unsigned short gap_word_t
Definition bmconst.h:77
const unsigned gap_max_bits
Definition bmconst.h:80
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
Definition bmfunc.h:147
void for_each_block(T ***root, unsigned size1, F &f, BLOCK_IDX start)
Definition bmfunc.h:1647
const unsigned set_block_shift
Definition bmconst.h:55
unsigned combine_count_and_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk) BMNOEXCEPT
Internal function computes AND distance.
const unsigned set_word_mask
Definition bmconst.h:72
const unsigned bits_in_block
Definition bmconst.h:113
void for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
bool is_const_set_operation(set_operation op) BMNOEXCEPT
Returns true if set operation is constant (bitcount)
Definition bmfunc.h:817
Structure keeps all-left/right ON bits masks.
Definition bmconst.h:343
Distance metric descriptor, holds metric code and result.
Definition bmalgo_impl.h:88
distance_metric_descriptor(distance_metric m) BMNOEXCEPT
Definition bmalgo_impl.h:98
distance_metric_descriptor() BMNOEXCEPT
void reset() BMNOEXCEPT
Sets metric result to 0.
static bit_operation_count_func_type bit_operation_count(unsigned i)
Definition bmfunc.h:8372