polars_compute/
propagate_dictionary.rs

1use arrow::array::{Array, BinaryViewArray, PrimitiveArray, Utf8ViewArray};
2use arrow::bitmap::Bitmap;
3use arrow::datatypes::ArrowDataType::UInt32;
4
5/// Propagate the nulls from the dictionary values into the keys and remove those nulls from the
6/// values.
7pub fn propagate_dictionary_value_nulls(
8    keys: &PrimitiveArray<u32>,
9    values: &Utf8ViewArray,
10) -> (PrimitiveArray<u32>, Utf8ViewArray) {
11    let Some(values_validity) = values.validity() else {
12        return (keys.clone(), values.clone().with_validity(None));
13    };
14    if values_validity.unset_bits() == 0 {
15        return (keys.clone(), values.clone().with_validity(None));
16    }
17
18    let num_values = values.len();
19
20    // Create a map from the old indices to indices with nulls filtered out
21    let mut offset = 0;
22    let new_idx_map: Vec<u32> = (0..num_values)
23        .map(|i| {
24            let is_valid = unsafe { values_validity.get_bit_unchecked(i) };
25            offset += usize::from(!is_valid);
26            if is_valid { (i - offset) as u32 } else { 0 }
27        })
28        .collect();
29
30    let keys = match keys.validity() {
31        None => {
32            let values = keys
33                .values()
34                .iter()
35                .map(|&k| unsafe {
36                    // SAFETY: Arrow invariant that all keys are in range of values
37                    *new_idx_map.get_unchecked(k as usize)
38                })
39                .collect();
40            let validity = Bitmap::from_iter(keys.values().iter().map(|&k| unsafe {
41                // SAFETY: Arrow invariant that all keys are in range of values
42                values_validity.get_bit_unchecked(k as usize)
43            }));
44
45            PrimitiveArray::new(UInt32, values, Some(validity))
46        },
47        Some(keys_validity) => {
48            let values = keys
49                .values()
50                .iter()
51                .map(|&k| {
52                    // deal with nulls in keys
53                    let idx = (k as usize).min(num_values);
54                    // SAFETY: Arrow invariant that all keys are in range of values
55                    *unsafe { new_idx_map.get_unchecked(idx) }
56                })
57                .collect();
58            let propagated_validity = Bitmap::from_iter(keys.values().iter().map(|&k| {
59                // deal with nulls in keys
60                let idx = (k as usize).min(num_values);
61                // SAFETY: Arrow invariant that all keys are in range of values
62                unsafe { values_validity.get_bit_unchecked(idx) }
63            }));
64
65            let validity = &propagated_validity & keys_validity;
66            PrimitiveArray::new(UInt32, values, Some(validity))
67        },
68    };
69
70    // Filter only handles binary
71    let values = values.to_binview();
72
73    // Filter out the null values
74    let values = crate::filter::filter_with_bitmap(&values, values_validity);
75    let values = values.as_any().downcast_ref::<BinaryViewArray>().unwrap();
76    let values = unsafe { values.to_utf8view_unchecked() };
77
78    // Explicitly set the values validity to none.
79    assert_eq!(values.null_count(), 0);
80    let values = values.with_validity(None);
81
82    (keys, values)
83}