polars_compute/
trim_lists_to_normalized_offsets.rs1use arrow::array::{Array, FixedSizeListArray, ListArray, StructArray};
2use arrow::offset::OffsetsBuffer;
3use arrow::types::Offset;
4
5pub fn trim_lists_to_normalized_offsets(arr: &dyn Array) -> Option<Box<dyn Array>> {
7 let arr = arr.as_any();
8 if let Some(arr) = arr.downcast_ref::<ListArray<i32>>() {
9 return trim_lists_to_normalized_offsets_list(arr).map(|arr| Box::new(arr) as _);
10 }
11 if let Some(arr) = arr.downcast_ref::<ListArray<i64>>() {
12 return trim_lists_to_normalized_offsets_list(arr).map(|arr| Box::new(arr) as _);
13 }
14 if let Some(arr) = arr.downcast_ref::<FixedSizeListArray>() {
15 return trim_lists_to_normalized_offsets_fsl(arr).map(|arr| Box::new(arr) as _);
16 }
17 if let Some(arr) = arr.downcast_ref::<StructArray>() {
18 return trim_lists_to_normalized_offsets_struct(arr).map(|arr| Box::new(arr) as _);
19 }
20
21 None
22}
23
24pub fn trim_lists_to_normalized_offsets_list<O: Offset>(
25 arr: &ListArray<O>,
26) -> Option<ListArray<O>> {
27 let offsets = arr.offsets();
28 let values = arr.values();
29
30 let len = offsets.range().to_usize();
31
32 let (values, offsets) = if values.len() == len {
33 let values = trim_lists_to_normalized_offsets(values.as_ref())?;
34 (values, offsets.clone())
35 } else {
36 let first_idx = *offsets.first();
37 let v = offsets.iter().map(|x| *x - first_idx).collect::<Vec<_>>();
38 let offsets = unsafe { OffsetsBuffer::<O>::new_unchecked(v.into()) };
39 let values = values.sliced(first_idx.to_usize(), len);
40 let values = trim_lists_to_normalized_offsets(values.as_ref()).unwrap_or(values);
41 (values, offsets)
42 };
43
44 assert_eq!(offsets.first().to_usize(), 0);
45 assert_eq!(values.len(), offsets.range().to_usize());
46
47 Some(ListArray::new(
48 arr.dtype().clone(),
49 offsets,
50 values,
51 arr.validity().cloned(),
52 ))
53}
54
55pub fn trim_lists_to_normalized_offsets_fsl(
56 arr: &FixedSizeListArray,
57) -> Option<FixedSizeListArray> {
58 let values = trim_lists_to_normalized_offsets(arr.values().as_ref())?;
59
60 Some(FixedSizeListArray::new(
61 arr.dtype().clone(),
62 arr.len(),
63 values,
64 arr.validity().cloned(),
65 ))
66}
67
68pub fn trim_lists_to_normalized_offsets_struct(arr: &StructArray) -> Option<StructArray> {
69 let mut new_values = Vec::new();
70 for (i, field_array) in arr.values().iter().enumerate() {
71 let Some(field_array) = trim_lists_to_normalized_offsets(field_array.as_ref()) else {
72 continue;
74 };
75
76 new_values.reserve(arr.values().len());
77 new_values.extend(arr.values()[..i].iter().cloned());
78 new_values.push(field_array);
79 break;
80 }
81
82 if new_values.is_empty() {
83 return None;
84 }
85
86 new_values.extend(arr.values()[new_values.len()..].iter().map(|field_array| {
87 trim_lists_to_normalized_offsets(field_array.as_ref())
88 .unwrap_or_else(|| field_array.clone())
89 }));
90
91 Some(StructArray::new(
92 arr.dtype().clone(),
93 arr.len(),
94 new_values,
95 arr.validity().cloned(),
96 ))
97}