BitMagic-C++

Processor specific optimizations for AVX2 instructions (internals). More...

Collaboration diagram for AVX2 functions:

Functions

bm::id_t bm::avx2_bit_count (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end)
 AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016).
bm::id_t bm::avx2_bit_block_count (const bm::word_t *const block, bm::id64_t digest)
 Calculate population count based on digest.
bm::id_t bm::avx2_bit_count_and (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block)
 AND bit count for two aligned bit-blocks.
bm::id_t bm::avx2_bit_count_xor (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block)
 XOR bit count for two aligned bit-blocks.
bm::id_t bm::avx2_bit_count_sub (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block)
 AND NOT bit count for two aligned bit-blocks.
void bm::avx2_xor_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask)
 XOR array elements to specified mask dst = *src ^ mask.
void bm::avx2_andnot_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask)
 Inverts array elements and NOT them to specified mask dst = ~*src & mask.
unsigned bm::avx2_and_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AND array elements against another array dst &= *src.
bool bm::avx2_and_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AND block digest stride dst &= *src.
bool bm::avx2_and_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 AND block digest stride 2 way dst = *src1 & *src2.
bool bm::avx2_and_or_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 AND-OR block digest stride 2 way dst |= *src1 & *src2.
bool bm::avx2_and_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4)
 AND block digest stride.
bool bm::avx2_and_digest_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 AND block digest stride.
unsigned bm::avx2_and_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
 AND array elements against another array (unaligned) dst &= *src.
bool bm::avx2_or_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 OR array elements against another array dst |= *src.
bool bm::avx2_or_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
 OR array elements against another unaligned array dst |= *src.
bool bm::avx2_or_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 OR 2 arrays and copy to the destination dst = *src1 | src2.
bool bm::avx2_or_block_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 OR array elements against another 2 arrays dst |= *src1 | src2.
bool bm::avx2_or_block_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4)
 OR array elements against another 4 arrays dst |= *src1 | src2.
unsigned bm::avx2_xor_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 XOR block against another dst ^= *src.
unsigned bm::avx2_xor_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 3 operand XOR dst = *src1 ^ src2
unsigned bm::avx2_sub_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AND-NOT (SUB) array elements against another array dst &= ~*src.
bool bm::avx2_sub_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 SUB (AND NOT) block digest stride dst &= ~*src.
bool bm::avx2_sub_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2
bool bm::avx2_sub_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4)
 SUB block digest stride.
bool bm::avx2_sub_digest_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2)
 SUB block digest stride.
BMFORCEINLINE void bm::avx2_set_block (__m256i *BMRESTRICT dst, bm::word_t value)
 AVX2 block memset dst = value.
void bm::avx2_copy_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AVX2 block copy dst = *src.
void bm::avx2_copy_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AVX2 block copy (unaligned SRC) dst = *src.
void bm::avx2_stream_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AVX2 block copy dst = *src.
void bm::avx2_stream_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src)
 AVX2 block copy (unaligned SRC) dst = *src.
void bm::avx2_invert_block (__m256i *BMRESTRICT dst)
 Invert bit-block dst = ~*dst or dst ^= *dst.
bool bm::avx2_is_all_zero (const __m256i *BMRESTRICT block)
 check if block is all zero bits
bool bm::avx2_is_digest_zero (const __m256i *BMRESTRICT block)
 check if digest stride is all zero bits
void bm::avx2_block_set_digest (__m256i *dst, unsigned value)
 set digest stride to 0xFF.. or 0x0 value
bool bm::avx2_is_all_one (const __m256i *BMRESTRICT block)
 check if block is all one bits
BMFORCEINLINE bool bm::avx2_test_all_one_wave (const void *ptr)
 check if wave of pointers is all 0xFFF
BMFORCEINLINE bool bm::avx2_test_all_zero_wave (const void *ptr)
 check if wave of pointers is all NULL
BMFORCEINLINE bool bm::avx2_test_all_zero_wave2 (const void *ptr0, const void *ptr1)
 check if 2 wave of pointers are all NULL
BMFORCEINLINE bool bm::avx2_test_all_eq_wave2 (const void *ptr0, const void *ptr1)
 check if 2 wave of pointers are all the same (NULL or FULL)
bool bm::avx2_shift_l1 (__m256i *block, bm::word_t *empty_acc, unsigned co1)
 block shift left by 1
bool bm::avx2_shift_r1 (__m256i *block, bm::word_t *empty_acc, unsigned co1)
 block shift right by 1
bool bm::avx2_shift_r1_and (__m256i *BMRESTRICT block, bm::word_t co1, const __m256i *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest)
 fused block shift right by 1 plus AND
unsigned bm::avx2_bit_block_calc_change (const __m256i *BMRESTRICT block, unsigned size)
void bm::avx2_bit_block_calc_xor_change (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT xor_block, unsigned size, unsigned *BMRESTRICT gcount, unsigned *BMRESTRICT bcount)
void bm::avx2_bit_block_calc_change_bc (const __m256i *BMRESTRICT block, unsigned *gcount, unsigned *bcount)
bool bm::avx2_bit_find_first_diff (const __m256i *BMRESTRICT block1, const __m256i *BMRESTRICT block2, unsigned *pos)
 Find first bit which is different between two bit-blocks.
bool bm::avx2_bit_find_first (const __m256i *BMRESTRICT block, unsigned off, unsigned *pos)
 Find first bit set.
int bm::avx2_cmpge_u32 (__m256i vect8, unsigned value)
 Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.
int bm::avx2_cmpge_u16 (__m256i vect16, unsigned short value)
 Experimental (test) function to do SIMD vector search in sorted, growing array.
template<bool RET_TEST = false>
unsigned bm::avx2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
 Hybrid binary search, starts as binary, then switches to scan.
unsigned bm::avx2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan.
unsigned bm::avx2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to)
 lower bound (great or equal) linear scan in ascending order sorted array
unsigned bm::avx2_bit_to_gap (gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
 Convert bit block to GAP block.
void bm::avx2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest)
 Build partial XOR product of 2 bit-blocks using digest mask.
void bm::avx2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask.

Detailed Description

Processor specific optimizations for AVX2 instructions (internals).

Function Documentation

◆ avx2_and_arr_unal()

unsigned bm::avx2_and_arr_unal ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src,
const __m256i *BMRESTRICT src_end )
inline

AND array elements against another array (unaligned) dst &= *src.

Returns
0 if destination does not have any bits

Definition at line 777 of file bmavx2.h.

References BMRESTRICT.

Referenced by bm::decoder::get_32_AND().

◆ avx2_and_block()

unsigned bm::avx2_and_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AND array elements against another array dst &= *src.

Returns
0 if destination does not have any bits

Definition at line 496 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_and_digest()

bool bm::avx2_and_digest ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AND block digest stride dst &= *src.

Returns
true if stide is all zero

Definition at line 543 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_and_digest_2way()

bool bm::avx2_and_digest_2way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

AND block digest stride 2 way dst = *src1 & *src2.

Returns
true if stide is all zero

Definition at line 573 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_and_digest_3way()

bool bm::avx2_and_digest_3way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

AND block digest stride.

Definition at line 727 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_and_digest_5way()

bool bm::avx2_and_digest_5way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2,
const __m256i *BMRESTRICT src3,
const __m256i *BMRESTRICT src4 )
inline

AND block digest stride.

Definition at line 659 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_and_or_digest_2way()

bool bm::avx2_and_or_digest_2way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

AND-OR block digest stride 2 way dst |= *src1 & *src2.

Returns
true if stide is all zero

Definition at line 604 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_andnot_arr_2_mask()

void bm::avx2_andnot_arr_2_mask ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src,
const __m256i *BMRESTRICT src_end,
bm::word_t mask )
inline

Inverts array elements and NOT them to specified mask dst = ~*src & mask.

Definition at line 472 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_bit_block_calc_change()

unsigned bm::avx2_bit_block_calc_change ( const __m256i *BMRESTRICT block,
unsigned size )
inline

AVX2 calculate number of bit changes from 0 to 1

Definition at line 2083 of file bmavx2.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.

◆ avx2_bit_block_calc_change_bc()

void bm::avx2_bit_block_calc_change_bc ( const __m256i *BMRESTRICT block,
unsigned * gcount,
unsigned * bcount )
inline

AVX2 calculate number of bit changes from 0 to 1 and bitcount

Definition at line 2251 of file bmavx2.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, BMRESTRICT, and set_block_size.

◆ avx2_bit_block_calc_xor_change()

void bm::avx2_bit_block_calc_xor_change ( const __m256i *BMRESTRICT block,
const __m256i *BMRESTRICT xor_block,
unsigned size,
unsigned *BMRESTRICT gcount,
unsigned *BMRESTRICT bcount )
inline

AVX2 calculate number of bit changes from 0 to 1 from a XOR product

Definition at line 2154 of file bmavx2.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.

◆ avx2_bit_block_count()

bm::id_t bm::avx2_bit_block_count ( const bm::word_t *const block,
bm::id64_t digest )
inline

Calculate population count based on digest.

Returns
popcnt

Definition at line 232 of file bmavx2.h.

References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, bmi_blsi_u64(), bmi_bslr_u64(), BMRESTRICT, and set_block_digest_wave_size.

◆ avx2_bit_block_xor()

void bm::avx2_bit_block_xor ( bm::word_t * target_block,
const bm::word_t * block,
const bm::word_t * xor_block,
bm::id64_t digest )
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target := block ^ xor_block
block- arg1
xor_block- arg2
digest- mask for each block wave to XOR (1) or just copy (0)

Definition at line 3341 of file bmavx2.h.

References block_waves, and set_block_digest_wave_size.

◆ avx2_bit_block_xor_2way()

void bm::avx2_bit_block_xor_2way ( bm::word_t * target_block,
const bm::word_t * xor_block,
bm::id64_t digest )
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target ^= xor_block
xor_block- arg1
digest- mask for each block wave to XOR (1)

Definition at line 3392 of file bmavx2.h.

References bmi_blsi_u64(), bmi_bslr_u64(), BMNOEXCEPT, and set_block_digest_wave_size.

◆ avx2_bit_count()

bm::id_t bm::avx2_bit_count ( const __m256i *BMRESTRICT block,
const __m256i *BMRESTRICT block_end )
inline

AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016).

See also
https://arxiv.org/abs/1611.07612

Definition at line 156 of file bmavx2.h.

References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, BM_CSA256, and BMRESTRICT.

◆ avx2_bit_count_and()

bm::id_t bm::avx2_bit_count_and ( const __m256i *BMRESTRICT block,
const __m256i *BMRESTRICT block_end,
const __m256i *BMRESTRICT mask_block )
inline

AND bit count for two aligned bit-blocks.

Definition at line 290 of file bmavx2.h.

References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.

◆ avx2_bit_count_sub()

bm::id_t bm::avx2_bit_count_sub ( const __m256i *BMRESTRICT block,
const __m256i *BMRESTRICT block_end,
const __m256i *BMRESTRICT mask_block )
inline

AND NOT bit count for two aligned bit-blocks.

Definition at line 413 of file bmavx2.h.

References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.

◆ avx2_bit_count_xor()

bm::id_t bm::avx2_bit_count_xor ( const __m256i *BMRESTRICT block,
const __m256i *BMRESTRICT block_end,
const __m256i *BMRESTRICT mask_block )
inline

XOR bit count for two aligned bit-blocks.

Definition at line 368 of file bmavx2.h.

References BM_AVX2_BIT_COUNT, BM_AVX2_POPCNT_PROLOG, and BMRESTRICT.

◆ avx2_bit_find_first()

bool bm::avx2_bit_find_first ( const __m256i *BMRESTRICT block,
unsigned off,
unsigned * pos )
inline

Find first bit set.

Definition at line 2394 of file bmavx2.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BMRESTRICT, and set_block_size.

◆ avx2_bit_find_first_diff()

bool bm::avx2_bit_find_first_diff ( const __m256i *BMRESTRICT block1,
const __m256i *BMRESTRICT block2,
unsigned * pos )
inline

Find first bit which is different between two bit-blocks.

Definition at line 2336 of file bmavx2.h.

References BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BMRESTRICT, and set_block_size.

◆ avx2_bit_to_gap()

unsigned bm::avx2_bit_to_gap ( gap_word_t *BMRESTRICT dest,
const unsigned *BMRESTRICT block,
unsigned dest_len )
inline

Convert bit block to GAP block.

Definition at line 3227 of file bmavx2.h.

References BM_ASSERT, BMRESTRICT, and set_block_size.

◆ avx2_block_set_digest()

void bm::avx2_block_set_digest ( __m256i * dst,
unsigned value )
inline

set digest stride to 0xFF.. or 0x0 value

Definition at line 1752 of file bmavx2.h.

◆ avx2_cmpge_u16()

int bm::avx2_cmpge_u16 ( __m256i vect16,
unsigned short value )
inline

Experimental (test) function to do SIMD vector search in sorted, growing array.

Definition at line 2907 of file bmavx2.h.

◆ avx2_cmpge_u32()

int bm::avx2_cmpge_u32 ( __m256i vect8,
unsigned value )
inline

Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.

Definition at line 2875 of file bmavx2.h.

◆ avx2_copy_block()

void bm::avx2_copy_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AVX2 block copy dst = *src.

Definition at line 1503 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_copy_block_unalign()

void bm::avx2_copy_block_unalign ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AVX2 block copy (unaligned SRC) dst = *src.

Definition at line 1545 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_gap_bfind()

template<bool RET_TEST = false>
unsigned bm::avx2_gap_bfind ( const unsigned short *BMRESTRICT buf,
unsigned pos,
unsigned *BMRESTRICT is_set )

Hybrid binary search, starts as binary, then switches to scan.

NOTE: AVX code uses _mm256_subs_epu16 - saturated substraction which gives 0 if A-B=0 if A < B (not negative a value).

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index OR bit-test

Definition at line 2939 of file bmavx2.h.

References BM_ASSERT, BMRESTRICT, and gap_max_bits.

Referenced by avx2_gap_test().

◆ avx2_gap_test()

unsigned bm::avx2_gap_test ( const unsigned short *BMRESTRICT buf,
unsigned pos )
inline

Hybrid binary search, starts as binary, then switches to scan.

Definition at line 3057 of file bmavx2.h.

References avx2_gap_bfind(), and BMRESTRICT.

◆ avx2_invert_block()

void bm::avx2_invert_block ( __m256i *BMRESTRICT dst)
inline

Invert bit-block dst = ~*dst or dst ^= *dst.

Definition at line 1677 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_is_all_one()

bool bm::avx2_is_all_one ( const __m256i *BMRESTRICT block)
inline

check if block is all one bits

Returns
true if all bits are 1

Definition at line 1767 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_is_all_zero()

bool bm::avx2_is_all_zero ( const __m256i *BMRESTRICT block)
inline

check if block is all zero bits

Definition at line 1708 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_is_digest_zero()

bool bm::avx2_is_digest_zero ( const __m256i *BMRESTRICT block)
inline

check if digest stride is all zero bits

Definition at line 1738 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_lower_bound_scan_u32()

unsigned bm::avx2_lower_bound_scan_u32 ( const unsigned *BMRESTRICT arr,
unsigned target,
unsigned from,
unsigned to )
inline

lower bound (great or equal) linear scan in ascending order sorted array

Definition at line 3068 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_or_arr_unal()

bool bm::avx2_or_arr_unal ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src,
const __m256i *BMRESTRICT src_end )
inline

OR array elements against another unaligned array dst |= *src.

Returns
true if all bits are 1

Definition at line 888 of file bmavx2.h.

References BMRESTRICT.

Referenced by bm::decoder::get_32_OR().

◆ avx2_or_block()

bool bm::avx2_or_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

OR array elements against another array dst |= *src.

Returns
true if all bits are 1

Definition at line 835 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_or_block_2way()

bool bm::avx2_or_block_2way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

OR 2 arrays and copy to the destination dst = *src1 | src2.

Returns
true if all bits are 1

Definition at line 941 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_or_block_3way()

bool bm::avx2_or_block_3way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

OR array elements against another 2 arrays dst |= *src1 | src2.

Returns
true if all bits are 1

Definition at line 987 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_or_block_5way()

bool bm::avx2_or_block_5way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2,
const __m256i *BMRESTRICT src3,
const __m256i *BMRESTRICT src4 )
inline

OR array elements against another 4 arrays dst |= *src1 | src2.

Returns
true if all bits are 1

Definition at line 1039 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_set_block()

BMFORCEINLINE void bm::avx2_set_block ( __m256i *BMRESTRICT dst,
bm::word_t value )

AVX2 block memset dst = value.

Definition at line 1477 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_shift_l1()

bool bm::avx2_shift_l1 ( __m256i * block,
bm::word_t * empty_acc,
unsigned co1 )
inline

block shift left by 1

Definition at line 1842 of file bmavx2.h.

References set_block_size.

◆ avx2_shift_r1()

bool bm::avx2_shift_r1 ( __m256i * block,
bm::word_t * empty_acc,
unsigned co1 )
inline

block shift right by 1

Definition at line 1903 of file bmavx2.h.

References set_block_size.

◆ avx2_shift_r1_and()

bool bm::avx2_shift_r1_and ( __m256i *BMRESTRICT block,
bm::word_t co1,
const __m256i *BMRESTRICT mask_block,
bm::id64_t *BMRESTRICT digest )
inline

fused block shift right by 1 plus AND

Definition at line 1959 of file bmavx2.h.

References BM_ASSERT, BMRESTRICT, and set_block_digest_wave_size.

◆ avx2_stream_block()

void bm::avx2_stream_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AVX2 block copy dst = *src.

Definition at line 1589 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_stream_block_unalign()

void bm::avx2_stream_block_unalign ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AVX2 block copy (unaligned SRC) dst = *src.

Definition at line 1631 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_sub_block()

unsigned bm::avx2_sub_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

AND-NOT (SUB) array elements against another array dst &= ~*src.

Returns
0 if destination does not have any bits

Definition at line 1204 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_sub_digest()

bool bm::avx2_sub_digest ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

SUB (AND NOT) block digest stride dst &= ~*src.

Returns
true if stide is all zero

Definition at line 1250 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_sub_digest_2way()

bool bm::avx2_sub_digest_2way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2

Returns
true if stide is all zero

Definition at line 1280 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_sub_digest_3way()

bool bm::avx2_sub_digest_3way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

SUB block digest stride.

Definition at line 1392 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_sub_digest_5way()

bool bm::avx2_sub_digest_5way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2,
const __m256i *BMRESTRICT src3,
const __m256i *BMRESTRICT src4 )
inline

SUB block digest stride.

Definition at line 1310 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_test_all_eq_wave2()

BMFORCEINLINE bool bm::avx2_test_all_eq_wave2 ( const void * ptr0,
const void * ptr1 )

check if 2 wave of pointers are all the same (NULL or FULL)

Definition at line 1829 of file bmavx2.h.

Referenced by bm::bvector< Alloc >::combine_operation_or().

◆ avx2_test_all_one_wave()

BMFORCEINLINE bool bm::avx2_test_all_one_wave ( const void * ptr)

check if wave of pointers is all 0xFFF

Definition at line 1791 of file bmavx2.h.

◆ avx2_test_all_zero_wave()

BMFORCEINLINE bool bm::avx2_test_all_zero_wave ( const void * ptr)

◆ avx2_test_all_zero_wave2()

BMFORCEINLINE bool bm::avx2_test_all_zero_wave2 ( const void * ptr0,
const void * ptr1 )

check if 2 wave of pointers are all NULL

Definition at line 1816 of file bmavx2.h.

Referenced by bm::bvector< Alloc >::combine_operation_xor().

◆ avx2_xor_arr_2_mask()

void bm::avx2_xor_arr_2_mask ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src,
const __m256i *BMRESTRICT src_end,
bm::word_t mask )
inline

XOR array elements to specified mask dst = *src ^ mask.

Definition at line 447 of file bmavx2.h.

References BMRESTRICT.

◆ avx2_xor_block()

unsigned bm::avx2_xor_block ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src )
inline

XOR block against another dst ^= *src.

Returns
0 if destination does not have any bits

Definition at line 1108 of file bmavx2.h.

References BMRESTRICT, and set_block_size.

◆ avx2_xor_block_2way()

unsigned bm::avx2_xor_block_2way ( __m256i *BMRESTRICT dst,
const __m256i *BMRESTRICT src1,
const __m256i *BMRESTRICT src2 )
inline

3 operand XOR dst = *src1 ^ src2

Returns
0 if destination does not have any bits

Definition at line 1154 of file bmavx2.h.

References BMRESTRICT, and set_block_size.