polars_compute/
trim_lists_to_normalized_offsets.rs

1use arrow::array::{Array, FixedSizeListArray, ListArray, StructArray};
2use arrow::offset::OffsetsBuffer;
3use arrow::types::Offset;
4
5/// Trim all lists of unused start and end elements recursively.
6pub fn trim_lists_to_normalized_offsets(arr: &dyn Array) -> Option<Box<dyn Array>> {
7    let arr = arr.as_any();
8    if let Some(arr) = arr.downcast_ref::<ListArray<i32>>() {
9        return trim_lists_to_normalized_offsets_list(arr).map(|arr| Box::new(arr) as _);
10    }
11    if let Some(arr) = arr.downcast_ref::<ListArray<i64>>() {
12        return trim_lists_to_normalized_offsets_list(arr).map(|arr| Box::new(arr) as _);
13    }
14    if let Some(arr) = arr.downcast_ref::<FixedSizeListArray>() {
15        return trim_lists_to_normalized_offsets_fsl(arr).map(|arr| Box::new(arr) as _);
16    }
17    if let Some(arr) = arr.downcast_ref::<StructArray>() {
18        return trim_lists_to_normalized_offsets_struct(arr).map(|arr| Box::new(arr) as _);
19    }
20
21    None
22}
23
24pub fn trim_lists_to_normalized_offsets_list<O: Offset>(
25    arr: &ListArray<O>,
26) -> Option<ListArray<O>> {
27    let offsets = arr.offsets();
28    let values = arr.values();
29
30    let len = offsets.range().to_usize();
31
32    let (values, offsets) = if values.len() == len {
33        let values = trim_lists_to_normalized_offsets(values.as_ref())?;
34        (values, offsets.clone())
35    } else {
36        let first_idx = *offsets.first();
37        let v = offsets.iter().map(|x| *x - first_idx).collect::<Vec<_>>();
38        let offsets = unsafe { OffsetsBuffer::<O>::new_unchecked(v.into()) };
39        let values = values.sliced(first_idx.to_usize(), len);
40        let values = trim_lists_to_normalized_offsets(values.as_ref()).unwrap_or(values);
41        (values, offsets)
42    };
43
44    assert_eq!(offsets.first().to_usize(), 0);
45    assert_eq!(values.len(), offsets.range().to_usize());
46
47    Some(ListArray::new(
48        arr.dtype().clone(),
49        offsets,
50        values,
51        arr.validity().cloned(),
52    ))
53}
54
55pub fn trim_lists_to_normalized_offsets_fsl(
56    arr: &FixedSizeListArray,
57) -> Option<FixedSizeListArray> {
58    let values = trim_lists_to_normalized_offsets(arr.values().as_ref())?;
59
60    Some(FixedSizeListArray::new(
61        arr.dtype().clone(),
62        arr.len(),
63        values,
64        arr.validity().cloned(),
65    ))
66}
67
68pub fn trim_lists_to_normalized_offsets_struct(arr: &StructArray) -> Option<StructArray> {
69    let mut new_values = Vec::new();
70    for (i, field_array) in arr.values().iter().enumerate() {
71        let Some(field_array) = trim_lists_to_normalized_offsets(field_array.as_ref()) else {
72            // Nothing was changed. Return the original array.
73            continue;
74        };
75
76        new_values.reserve(arr.values().len());
77        new_values.extend(arr.values()[..i].iter().cloned());
78        new_values.push(field_array);
79        break;
80    }
81
82    if new_values.is_empty() {
83        return None;
84    }
85
86    new_values.extend(arr.values()[new_values.len()..].iter().map(|field_array| {
87        trim_lists_to_normalized_offsets(field_array.as_ref())
88            .unwrap_or_else(|| field_array.clone())
89    }));
90
91    Some(StructArray::new(
92        arr.dtype().clone(),
93        arr.len(),
94        new_values,
95        arr.validity().cloned(),
96    ))
97}