polars_arrow/legacy/kernels/
sort_partition.rs1use std::fmt::Debug;
2
3use polars_utils::IdxSize;
4use polars_utils::itertools::Itertools;
5use polars_utils::total_ord::TotalEq;
6
7use crate::types::NativeType;
8
9fn find_partition_points<T>(values: &[T], n: usize, descending: bool) -> Vec<usize>
11where
12 T: Debug + NativeType,
13{
14 let len = values.len();
15 if n > len {
16 return find_partition_points(values, len / 2, descending);
17 }
18 if n < 2 {
19 return vec![];
20 }
21 let chunk_size = len / n;
22
23 let mut partition_points = Vec::with_capacity(n + 1);
24
25 let mut start_idx = 0;
26 loop {
27 let end_idx = start_idx + chunk_size;
28 if end_idx >= len {
29 break;
30 }
31 let part = &values[start_idx..end_idx];
34
35 let latest_val = values[end_idx];
36 let idx = if descending {
37 part.partition_point(|v| v.tot_gt(&latest_val))
38 } else {
39 part.partition_point(|v| v.tot_lt(&latest_val))
40 };
41
42 if idx != 0 {
43 partition_points.push(idx + start_idx)
44 }
45
46 start_idx += chunk_size;
47 }
48 partition_points
49}
50
51pub fn create_clean_partitions<T>(values: &[T], n: usize, descending: bool) -> Vec<&[T]>
52where
53 T: Debug + NativeType,
54{
55 let part_idx = find_partition_points(values, n, descending);
56 let mut out = Vec::with_capacity(n + 1);
57
58 let mut start_idx = 0_usize;
59 for end_idx in part_idx {
60 if end_idx != start_idx {
61 out.push(&values[start_idx..end_idx]);
62 start_idx = end_idx;
63 }
64 }
65 let latest = &values[start_idx..];
66 if !latest.is_empty() {
67 out.push(latest)
68 }
69
70 out
71}
72
73pub fn partition_to_groups_amortized_varsize<T, I>(
74 values: I,
75 values_len: IdxSize,
76 first_group_offset: IdxSize,
77 nulls_first: bool,
78 offset: IdxSize,
79 out: &mut Vec<[IdxSize; 2]>,
80) where
81 T: Debug + TotalEq,
82 I: IntoIterator<Item = T>,
83{
84 let mut values = values.into_iter().enumerate_idx();
85 if let Some((i, mut first)) = values.next() {
86 out.clear();
87 if nulls_first && first_group_offset > 0 {
88 out.push([0, first_group_offset])
89 }
90
91 let mut first_idx = if nulls_first { first_group_offset } else { 0 } + offset;
92 let mut start = i;
93
94 for (i, val) in values {
95 if val.tot_ne(&first) {
97 let len = i - start;
98 start = i;
99 out.push([first_idx, len]);
100 first_idx += len;
101 first = val;
102 }
103 }
104 if nulls_first {
106 out.push([first_idx, values_len + first_group_offset - first_idx]);
107 } else {
108 out.push([first_idx, values_len - (first_idx - offset)]);
109 }
110
111 if !nulls_first && first_group_offset > 0 {
112 out.push([values_len + offset, first_group_offset])
113 }
114 }
115}
116
117pub fn partition_to_groups_amortized<T>(
118 values: &[T],
119 first_group_offset: IdxSize,
120 nulls_first: bool,
121 offset: IdxSize,
122 out: &mut Vec<[IdxSize; 2]>,
123) where
124 T: Debug + TotalEq + Sized,
125{
126 if let Some(mut first) = values.first() {
127 out.clear();
128 if nulls_first && first_group_offset > 0 {
129 out.push([0, first_group_offset])
130 }
131
132 let mut first_idx = if nulls_first { first_group_offset } else { 0 } + offset;
133
134 for val in values {
135 if val.tot_ne(first) {
137 let val_ptr = val as *const T;
138 let first_ptr = first as *const T;
139
140 let len = unsafe { val_ptr.offset_from(first_ptr) } as IdxSize;
143 out.push([first_idx, len]);
144 first_idx += len;
145 first = val;
146 }
147 }
148 if nulls_first {
150 out.push([
151 first_idx,
152 values.len() as IdxSize + first_group_offset - first_idx,
153 ]);
154 } else {
155 out.push([first_idx, values.len() as IdxSize - (first_idx - offset)]);
156 }
157
158 if !nulls_first && first_group_offset > 0 {
159 out.push([values.len() as IdxSize + offset, first_group_offset])
160 }
161 }
162}
163
164pub fn partition_to_groups<T>(
169 values: &[T],
170 first_group_offset: IdxSize,
171 nulls_first: bool,
172 offset: IdxSize,
173) -> Vec<[IdxSize; 2]>
174where
175 T: Debug + NativeType,
176{
177 if values.is_empty() {
178 return vec![];
179 }
180 let mut out = Vec::with_capacity(values.len() / 10);
181 partition_to_groups_amortized(values, first_group_offset, nulls_first, offset, &mut out);
182 out
183}
184
185#[cfg(test)]
186mod test {
187 use super::*;
188
189 #[test]
190 fn test_partition_points() {
191 let values = &[1, 3, 3, 3, 3, 5, 5, 5, 9, 9, 10];
192
193 assert_eq!(find_partition_points(values, 4, false), &[1, 5, 8, 10]);
194 assert_eq!(
195 partition_to_groups(values, 0, true, 0),
196 &[[0, 1], [1, 4], [5, 3], [8, 2], [10, 1]]
197 );
198 assert_eq!(
199 partition_to_groups(values, 5, true, 0),
200 &[[0, 5], [5, 1], [6, 4], [10, 3], [13, 2], [15, 1]]
201 );
202 assert_eq!(
203 partition_to_groups(values, 5, false, 0),
204 &[[0, 1], [1, 4], [5, 3], [8, 2], [10, 1], [11, 5]]
205 );
206 }
207}