polars_arrow/bitmap/utils/
mod.rs1#![allow(unsafe_op_in_unsafe_fn)]
2mod chunk_iterator;
4mod chunks_exact_mut;
5mod fmt;
6mod iterator;
7mod slice_iterator;
8mod zip_validity;
9
10pub(crate) use chunk_iterator::merge_reversed;
11pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact};
12pub use chunks_exact_mut::BitChunksExactMut;
13pub use fmt::fmt;
14pub use iterator::BitmapIter;
15use polars_utils::slice::load_padded_le_u64;
16pub use slice_iterator::SlicesIterator;
17pub use zip_validity::{ZipValidity, ZipValidityIter};
18
19use crate::bitmap::aligned::AlignedBitmapSlice;
20
21#[inline]
23pub fn is_set(byte: u8, i: usize) -> bool {
24 debug_assert!(i < 8);
25 byte & (1 << i) != 0
26}
27
28#[inline(always)]
30pub fn set_bit_in_byte(byte: u8, i: usize, value: bool) -> u8 {
31 debug_assert!(i < 8);
32 let mask = !(1 << i);
33 let insert = (value as u8) << i;
34 (byte & mask) | insert
35}
36
37#[inline(always)]
42pub unsafe fn get_bit_unchecked(bytes: &[u8], i: usize) -> bool {
43 let byte = *bytes.get_unchecked(i / 8);
44 let bit = (byte >> (i % 8)) & 1;
45 bit != 0
46}
47
48#[inline(always)]
52pub unsafe fn set_bit_unchecked(bytes: &mut [u8], i: usize, value: bool) {
53 let byte = bytes.get_unchecked_mut(i / 8);
54 *byte = set_bit_in_byte(*byte, i % 8, value);
55}
56
57#[inline]
59pub fn bytes_for(bits: usize) -> usize {
60 bits.saturating_add(7) / 8
61}
62
63pub fn count_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
67 if len == 0 {
68 return 0;
69 }
70
71 assert!(8 * slice.len() >= offset + len);
72
73 let first_byte_idx = offset / 8;
75 let offset_in_byte = offset % 8;
76 if offset_in_byte + len <= 64 {
77 let mut word = load_padded_le_u64(&slice[first_byte_idx..]);
78 word >>= offset_in_byte;
79 word <<= 64 - len;
80 return len - word.count_ones() as usize;
81 }
82
83 let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
84 let ones_in_prefix = aligned.prefix().count_ones() as usize;
85 let ones_in_bulk: usize = aligned.bulk_iter().map(|w| w.count_ones() as usize).sum();
86 let ones_in_suffix = aligned.suffix().count_ones() as usize;
87 len - ones_in_prefix - ones_in_bulk - ones_in_suffix
88}
89
90pub fn leading_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
96 if len == 0 {
97 return 0;
98 }
99
100 assert!(8 * slice.len() >= offset + len);
101
102 let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
103 let leading_zeros_in_prefix =
104 (aligned.prefix().trailing_zeros() as usize).min(aligned.prefix_bitlen());
105 if leading_zeros_in_prefix < aligned.prefix_bitlen() {
106 return leading_zeros_in_prefix;
107 }
108 if let Some(full_zero_bulk_words) = aligned.bulk_iter().position(|w| w != 0) {
109 return aligned.prefix_bitlen()
110 + full_zero_bulk_words * 64
111 + aligned.bulk()[full_zero_bulk_words].trailing_zeros() as usize;
112 }
113
114 aligned.prefix_bitlen()
115 + aligned.bulk_bitlen()
116 + (aligned.suffix().trailing_zeros() as usize).min(aligned.suffix_bitlen())
117}
118
119pub fn leading_ones(slice: &[u8], offset: usize, len: usize) -> usize {
125 if len == 0 {
126 return 0;
127 }
128
129 assert!(8 * slice.len() >= offset + len);
130
131 let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
132 let leading_ones_in_prefix = aligned.prefix().trailing_ones() as usize;
133 if leading_ones_in_prefix < aligned.prefix_bitlen() {
134 return leading_ones_in_prefix;
135 }
136 if let Some(full_one_bulk_words) = aligned.bulk_iter().position(|w| w != u64::MAX) {
137 return aligned.prefix_bitlen()
138 + full_one_bulk_words * 64
139 + aligned.bulk()[full_one_bulk_words].trailing_ones() as usize;
140 }
141
142 aligned.prefix_bitlen() + aligned.bulk_bitlen() + aligned.suffix().trailing_ones() as usize
143}
144
145pub fn trailing_zeros(slice: &[u8], offset: usize, len: usize) -> usize {
151 if len == 0 {
152 return 0;
153 }
154
155 assert!(8 * slice.len() >= offset + len);
156
157 let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
158 let trailing_zeros_in_suffix = ((aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64))
159 .leading_zeros() as usize)
160 .min(aligned.suffix_bitlen());
161 if trailing_zeros_in_suffix < aligned.suffix_bitlen() {
162 return trailing_zeros_in_suffix;
163 }
164 if let Some(full_zero_bulk_words) = aligned.bulk_iter().rev().position(|w| w != 0) {
165 return aligned.suffix_bitlen()
166 + full_zero_bulk_words * 64
167 + aligned.bulk()[aligned.bulk().len() - full_zero_bulk_words - 1].leading_zeros()
168 as usize;
169 }
170
171 let trailing_zeros_in_prefix = ((aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64))
172 .leading_zeros() as usize)
173 .min(aligned.prefix_bitlen());
174 aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_zeros_in_prefix
175}
176
177pub fn trailing_ones(slice: &[u8], offset: usize, len: usize) -> usize {
183 if len == 0 {
184 return 0;
185 }
186
187 assert!(8 * slice.len() >= offset + len);
188
189 let aligned = AlignedBitmapSlice::<u64>::new(slice, offset, len);
190 let trailing_ones_in_suffix =
191 (aligned.suffix() << ((64 - aligned.suffix_bitlen()) % 64)).leading_ones() as usize;
192 if trailing_ones_in_suffix < aligned.suffix_bitlen() {
193 return trailing_ones_in_suffix;
194 }
195 if let Some(full_one_bulk_words) = aligned.bulk_iter().rev().position(|w| w != u64::MAX) {
196 return aligned.suffix_bitlen()
197 + full_one_bulk_words * 64
198 + aligned.bulk()[aligned.bulk().len() - full_one_bulk_words - 1].leading_ones()
199 as usize;
200 }
201
202 let trailing_ones_in_prefix =
203 (aligned.prefix() << ((64 - aligned.prefix_bitlen()) % 64)).leading_ones() as usize;
204 aligned.suffix_bitlen() + aligned.bulk_bitlen() + trailing_ones_in_prefix
205}
206
207#[cfg(test)]
208mod tests {
209 use rand::Rng;
210
211 use super::*;
212 use crate::bitmap::Bitmap;
213
214 #[test]
215 fn leading_trailing() {
216 macro_rules! testcase {
217 ($slice:expr, $offset:expr, $length:expr => lz=$lz:expr,lo=$lo:expr,tz=$tz:expr,to=$to:expr) => {
218 assert_eq!(
219 leading_zeros($slice, $offset, $length),
220 $lz,
221 "leading_zeros"
222 );
223 assert_eq!(leading_ones($slice, $offset, $length), $lo, "leading_ones");
224 assert_eq!(
225 trailing_zeros($slice, $offset, $length),
226 $tz,
227 "trailing_zeros"
228 );
229 assert_eq!(
230 trailing_ones($slice, $offset, $length),
231 $to,
232 "trailing_ones"
233 );
234 };
235 }
236
237 testcase!(&[], 0, 0 => lz=0,lo=0,tz=0,to=0);
238 testcase!(&[0], 0, 1 => lz=1,lo=0,tz=1,to=0);
239 testcase!(&[1], 0, 1 => lz=0,lo=1,tz=0,to=1);
240
241 testcase!(&[0b010], 0, 3 => lz=1,lo=0,tz=1,to=0);
242 testcase!(&[0b101], 0, 3 => lz=0,lo=1,tz=0,to=1);
243 testcase!(&[0b100], 0, 3 => lz=2,lo=0,tz=0,to=1);
244 testcase!(&[0b110], 0, 3 => lz=1,lo=0,tz=0,to=2);
245 testcase!(&[0b001], 0, 3 => lz=0,lo=1,tz=2,to=0);
246 testcase!(&[0b011], 0, 3 => lz=0,lo=2,tz=1,to=0);
247
248 testcase!(&[0b010], 1, 2 => lz=0,lo=1,tz=1,to=0);
249 testcase!(&[0b101], 1, 2 => lz=1,lo=0,tz=0,to=1);
250 testcase!(&[0b100], 1, 2 => lz=1,lo=0,tz=0,to=1);
251 testcase!(&[0b110], 1, 2 => lz=0,lo=2,tz=0,to=2);
252 testcase!(&[0b001], 1, 2 => lz=2,lo=0,tz=2,to=0);
253 testcase!(&[0b011], 1, 2 => lz=0,lo=1,tz=1,to=0);
254 }
255
256 #[ignore = "Fuzz test. Too slow"]
257 #[test]
258 fn leading_trailing_fuzz() {
259 let mut rng = rand::rng();
260
261 const SIZE: usize = 1000;
262 const REPEATS: usize = 10_000;
263
264 let mut v = Vec::<bool>::with_capacity(SIZE);
265
266 for _ in 0..REPEATS {
267 v.clear();
268 let offset = rng.random_range(0..SIZE);
269 let length = rng.random_range(0..SIZE - offset);
270 let extra_padding = rng.random_range(0..64);
271
272 let mut num_remaining = usize::min(SIZE, offset + length + extra_padding);
273 while num_remaining > 0 {
274 let chunk_size = rng.random_range(1..=num_remaining);
275 v.extend(
276 rng.clone()
277 .sample_iter(rand::distr::slice::Choose::new(&[false, true]).unwrap())
278 .take(chunk_size),
279 );
280 num_remaining -= chunk_size;
281 }
282
283 let v_slice = &v[offset..offset + length];
284 let lz = v_slice.iter().take_while(|&v| !*v).count();
285 let lo = v_slice.iter().take_while(|&v| *v).count();
286 let tz = v_slice.iter().rev().take_while(|&v| !*v).count();
287 let to = v_slice.iter().rev().take_while(|&v| *v).count();
288
289 let bm = Bitmap::from_iter(v.iter().copied());
290 let (slice, _, _) = bm.as_slice();
291
292 assert_eq!(leading_zeros(slice, offset, length), lz);
293 assert_eq!(leading_ones(slice, offset, length), lo);
294 assert_eq!(trailing_zeros(slice, offset, length), tz);
295 assert_eq!(trailing_ones(slice, offset, length), to);
296 }
297 }
298}