polars_compute/cast/
primitive_to.rs

1use std::hash::Hash;
2
3use arrow::array::*;
4use arrow::bitmap::{Bitmap, BitmapBuilder};
5use arrow::compute::arity::unary;
6use arrow::datatypes::{ArrowDataType, TimeUnit};
7use arrow::offset::{Offset, Offsets};
8use arrow::types::{NativeType, f16};
9use num_traits::{AsPrimitive, Float, ToPrimitive};
10use polars_error::PolarsResult;
11use polars_utils::pl_str::PlSmallStr;
12use polars_utils::vec::PushUnchecked;
13
14use super::CastOptionsImpl;
15use super::temporal::*;
16
17pub trait SerPrimitive {
18    fn write(f: &mut Vec<u8>, val: Self) -> usize
19    where
20        Self: Sized;
21}
22
23macro_rules! impl_ser_primitive {
24    ($ptype:ident) => {
25        impl SerPrimitive for $ptype {
26            fn write(f: &mut Vec<u8>, val: Self) -> usize
27            where
28                Self: Sized,
29            {
30                let mut buffer = itoa::Buffer::new();
31                let value = buffer.format(val);
32                f.extend_from_slice(value.as_bytes());
33                value.len()
34            }
35        }
36    };
37}
38
39impl_ser_primitive!(i8);
40impl_ser_primitive!(i16);
41impl_ser_primitive!(i32);
42impl_ser_primitive!(i64);
43impl_ser_primitive!(i128);
44impl_ser_primitive!(u8);
45impl_ser_primitive!(u16);
46impl_ser_primitive!(u32);
47impl_ser_primitive!(u64);
48
49impl SerPrimitive for f32 {
50    fn write(f: &mut Vec<u8>, val: Self) -> usize
51    where
52        Self: Sized,
53    {
54        let mut buffer = ryu::Buffer::new();
55        let value = buffer.format(val);
56        f.extend_from_slice(value.as_bytes());
57        value.len()
58    }
59}
60
61impl SerPrimitive for f64 {
62    fn write(f: &mut Vec<u8>, val: Self) -> usize
63    where
64        Self: Sized,
65    {
66        let mut buffer = ryu::Buffer::new();
67        let value = buffer.format(val);
68        f.extend_from_slice(value.as_bytes());
69        value.len()
70    }
71}
72
73fn fallible_unary<I, F, G, O>(
74    array: &PrimitiveArray<I>,
75    op: F,
76    fail: G,
77    dtype: ArrowDataType,
78) -> PrimitiveArray<O>
79where
80    I: NativeType,
81    O: NativeType,
82    F: Fn(I) -> O,
83    G: Fn(I) -> bool,
84{
85    let values = array.values();
86    let mut out = Vec::with_capacity(array.len());
87    let mut i = 0;
88
89    while i < array.len() && !fail(values[i]) {
90        // SAFETY: We allocated enough before.
91        unsafe { out.push_unchecked(op(values[i])) };
92        i += 1;
93    }
94
95    if out.len() == array.len() {
96        return PrimitiveArray::<O>::new(dtype, out.into(), array.validity().cloned());
97    }
98
99    let mut validity = BitmapBuilder::with_capacity(array.len());
100    validity.extend_constant(out.len(), true);
101
102    for &value in &values[out.len()..] {
103        // SAFETY: We allocated enough before.
104        unsafe {
105            out.push_unchecked(op(value));
106            validity.push_unchecked(!fail(value));
107        }
108    }
109
110    debug_assert_eq!(out.len(), array.len());
111    debug_assert_eq!(validity.len(), array.len());
112
113    let validity = validity.freeze();
114    let validity = match array.validity() {
115        None => validity,
116        Some(arr_validity) => arrow::bitmap::and(&validity, arr_validity),
117    };
118
119    PrimitiveArray::<O>::new(dtype, out.into(), Some(validity))
120}
121
122fn primitive_to_values_and_offsets<T: NativeType + SerPrimitive, O: Offset>(
123    from: &PrimitiveArray<T>,
124) -> (Vec<u8>, Offsets<O>) {
125    let mut values: Vec<u8> = Vec::with_capacity(from.len());
126    let mut offsets: Vec<O> = Vec::with_capacity(from.len() + 1);
127    offsets.push(O::default());
128
129    let mut offset: usize = 0;
130
131    unsafe {
132        for &x in from.values().iter() {
133            let len = T::write(&mut values, x);
134
135            offset += len;
136            offsets.push(O::from_as_usize(offset));
137        }
138        values.set_len(offset);
139        values.shrink_to_fit();
140        // SAFETY: offsets _are_ monotonically increasing
141        let offsets = Offsets::new_unchecked(offsets);
142
143        (values, offsets)
144    }
145}
146
147/// Returns a [`BooleanArray`] where every element is different from zero.
148/// Validity is preserved.
149pub fn primitive_to_boolean<T: NativeType>(
150    from: &PrimitiveArray<T>,
151    to_type: ArrowDataType,
152) -> BooleanArray {
153    let iter = from.values().iter().map(|v| *v != T::default());
154    let values = Bitmap::from_trusted_len_iter(iter);
155
156    BooleanArray::new(to_type, values, from.validity().cloned())
157}
158
159pub(super) fn primitive_to_boolean_dyn<T>(
160    from: &dyn Array,
161    to_type: ArrowDataType,
162) -> PolarsResult<Box<dyn Array>>
163where
164    T: NativeType,
165{
166    let from = from.as_any().downcast_ref().unwrap();
167    Ok(Box::new(primitive_to_boolean::<T>(from, to_type)))
168}
169
170/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
171pub(super) fn primitive_to_utf8<T: NativeType + SerPrimitive, O: Offset>(
172    from: &PrimitiveArray<T>,
173) -> Utf8Array<O> {
174    let (values, offsets) = primitive_to_values_and_offsets(from);
175    unsafe {
176        Utf8Array::<O>::new_unchecked(
177            Utf8Array::<O>::default_dtype(),
178            offsets.into(),
179            values.into(),
180            from.validity().cloned(),
181        )
182    }
183}
184
185pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> PolarsResult<Box<dyn Array>>
186where
187    O: Offset,
188    T: NativeType + SerPrimitive,
189{
190    let from = from.as_any().downcast_ref().unwrap();
191    Ok(Box::new(primitive_to_utf8::<T, O>(from)))
192}
193
194pub(super) fn primitive_to_primitive_dyn<I, O>(
195    from: &dyn Array,
196    to_type: &ArrowDataType,
197    options: CastOptionsImpl,
198) -> PolarsResult<Box<dyn Array>>
199where
200    I: NativeType + num_traits::NumCast + num_traits::AsPrimitive<O>,
201    O: NativeType + num_traits::NumCast,
202{
203    let from = from.as_any().downcast_ref::<PrimitiveArray<I>>().unwrap();
204    if options.wrapped {
205        Ok(Box::new(primitive_as_primitive::<I, O>(from, to_type)))
206    } else {
207        Ok(Box::new(primitive_to_primitive::<I, O>(from, to_type)))
208    }
209}
210
211/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of another physical type via numeric conversion.
212pub fn primitive_to_primitive<I, O>(
213    from: &PrimitiveArray<I>,
214    to_type: &ArrowDataType,
215) -> PrimitiveArray<O>
216where
217    I: NativeType + num_traits::NumCast,
218    O: NativeType + num_traits::NumCast,
219{
220    let iter = from
221        .iter()
222        .map(|v| v.and_then(|x| num_traits::cast::cast::<I, O>(*x)));
223    PrimitiveArray::<O>::from_trusted_len_iter(iter).to(to_type.clone())
224}
225
226/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
227pub fn integer_to_decimal<T: NativeType + AsPrimitive<i128>>(
228    from: &PrimitiveArray<T>,
229    to_precision: usize,
230    to_scale: usize,
231) -> PrimitiveArray<i128> {
232    let multiplier = 10_i128.pow(to_scale as u32);
233
234    let min_for_precision = 9_i128
235        .saturating_pow(1 + to_precision as u32)
236        .saturating_neg();
237    let max_for_precision = 9_i128.saturating_pow(1 + to_precision as u32);
238
239    let values = from.iter().map(|x| {
240        x.and_then(|x| {
241            x.as_().checked_mul(multiplier).and_then(|x| {
242                if x > max_for_precision || x < min_for_precision {
243                    None
244                } else {
245                    Some(x)
246                }
247            })
248        })
249    });
250
251    PrimitiveArray::<i128>::from_trusted_len_iter(values)
252        .to(ArrowDataType::Decimal(to_precision, to_scale))
253}
254
255pub(super) fn integer_to_decimal_dyn<T>(
256    from: &dyn Array,
257    precision: usize,
258    scale: usize,
259) -> PolarsResult<Box<dyn Array>>
260where
261    T: NativeType + AsPrimitive<i128>,
262{
263    let from = from.as_any().downcast_ref().unwrap();
264    Ok(Box::new(integer_to_decimal::<T>(from, precision, scale)))
265}
266
267/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
268pub fn float_to_decimal<T>(
269    from: &PrimitiveArray<T>,
270    to_precision: usize,
271    to_scale: usize,
272) -> PrimitiveArray<i128>
273where
274    T: NativeType + Float + ToPrimitive,
275    f64: AsPrimitive<T>,
276{
277    // 1.2 => 12
278    let multiplier: T = (10_f64).powi(to_scale as i32).as_();
279
280    let min_for_precision = 9_i128
281        .saturating_pow(1 + to_precision as u32)
282        .saturating_neg();
283    let max_for_precision = 9_i128.saturating_pow(1 + to_precision as u32);
284
285    let values = from.iter().map(|x| {
286        x.and_then(|x| {
287            let x = (*x * multiplier).to_i128()?;
288            if x > max_for_precision || x < min_for_precision {
289                None
290            } else {
291                Some(x)
292            }
293        })
294    });
295
296    PrimitiveArray::<i128>::from_trusted_len_iter(values)
297        .to(ArrowDataType::Decimal(to_precision, to_scale))
298}
299
300pub(super) fn float_to_decimal_dyn<T>(
301    from: &dyn Array,
302    precision: usize,
303    scale: usize,
304) -> PolarsResult<Box<dyn Array>>
305where
306    T: NativeType + Float + ToPrimitive,
307    f64: AsPrimitive<T>,
308{
309    let from = from.as_any().downcast_ref().unwrap();
310    Ok(Box::new(float_to_decimal::<T>(from, precision, scale)))
311}
312
313/// Cast [`PrimitiveArray`] as a [`PrimitiveArray`]
314/// Same as `number as to_number_type` in rust
315pub fn primitive_as_primitive<I, O>(
316    from: &PrimitiveArray<I>,
317    to_type: &ArrowDataType,
318) -> PrimitiveArray<O>
319where
320    I: NativeType + num_traits::AsPrimitive<O>,
321    O: NativeType,
322{
323    unary(from, num_traits::AsPrimitive::<O>::as_, to_type.clone())
324}
325
326/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
327/// This is O(1).
328pub fn primitive_to_same_primitive<T>(
329    from: &PrimitiveArray<T>,
330    to_type: &ArrowDataType,
331) -> PrimitiveArray<T>
332where
333    T: NativeType,
334{
335    PrimitiveArray::<T>::new(
336        to_type.clone(),
337        from.values().clone(),
338        from.validity().cloned(),
339    )
340}
341
342/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
343/// This is O(1).
344pub(super) fn primitive_to_same_primitive_dyn<T>(
345    from: &dyn Array,
346    to_type: &ArrowDataType,
347) -> PolarsResult<Box<dyn Array>>
348where
349    T: NativeType,
350{
351    let from = from.as_any().downcast_ref().unwrap();
352    Ok(Box::new(primitive_to_same_primitive::<T>(from, to_type)))
353}
354
355pub(super) fn primitive_to_dictionary_dyn<T: NativeType + Eq + Hash, K: DictionaryKey>(
356    from: &dyn Array,
357) -> PolarsResult<Box<dyn Array>> {
358    let from = from.as_any().downcast_ref().unwrap();
359    primitive_to_dictionary::<T, K>(from).map(|x| Box::new(x) as Box<dyn Array>)
360}
361
362/// Cast [`PrimitiveArray`] to [`DictionaryArray`]. Also known as packing.
363/// # Errors
364/// This function errors if the maximum key is smaller than the number of distinct elements
365/// in the array.
366pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
367    from: &PrimitiveArray<T>,
368) -> PolarsResult<DictionaryArray<K>> {
369    let iter = from.iter().map(|x| x.copied());
370    let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
371        from.dtype().clone(),
372    ))?;
373    array.reserve(from.len());
374    array.try_extend(iter)?;
375
376    Ok(array.into())
377}
378
379/// # Safety
380///
381/// `dtype` should be valid for primitive.
382pub unsafe fn primitive_map_is_valid<T: NativeType>(
383    from: &PrimitiveArray<T>,
384    f: impl Fn(T) -> bool,
385    dtype: ArrowDataType,
386) -> PrimitiveArray<T> {
387    let values = from.values().clone();
388
389    let validity: Bitmap = values.iter().map(|&v| f(v)).collect();
390
391    let validity = if validity.unset_bits() > 0 {
392        let new_validity = match from.validity() {
393            None => validity,
394            Some(v) => v & &validity,
395        };
396
397        Some(new_validity)
398    } else {
399        from.validity().cloned()
400    };
401
402    // SAFETY:
403    // - Validity did not change length
404    // - dtype should be valid
405    unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) }
406}
407
408/// Conversion of `Int32` to `Time32(TimeUnit::Second)`
409pub fn int32_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
410    // SAFETY: Time32(TimeUnit::Second) is valid for Int32
411    unsafe {
412        primitive_map_is_valid(
413            from,
414            |v| (0..SECONDS_IN_DAY as i32).contains(&v),
415            ArrowDataType::Time32(TimeUnit::Second),
416        )
417    }
418}
419
420/// Conversion of `Int32` to `Time32(TimeUnit::Millisecond)`
421pub fn int32_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
422    // SAFETY: Time32(TimeUnit::Millisecond) is valid for Int32
423    unsafe {
424        primitive_map_is_valid(
425            from,
426            |v| (0..MILLISECONDS_IN_DAY as i32).contains(&v),
427            ArrowDataType::Time32(TimeUnit::Millisecond),
428        )
429    }
430}
431
432/// Conversion of `Int64` to `Time32(TimeUnit::Microsecond)`
433pub fn int64_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
434    // SAFETY: Time64(TimeUnit::Microsecond) is valid for Int64
435    unsafe {
436        primitive_map_is_valid(
437            from,
438            |v| (0..MICROSECONDS_IN_DAY).contains(&v),
439            ArrowDataType::Time32(TimeUnit::Microsecond),
440        )
441    }
442}
443
444/// Conversion of `Int64` to `Time32(TimeUnit::Nanosecond)`
445pub fn int64_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
446    // SAFETY: Time64(TimeUnit::Nanosecond) is valid for Int64
447    unsafe {
448        primitive_map_is_valid(
449            from,
450            |v| (0..NANOSECONDS_IN_DAY).contains(&v),
451            ArrowDataType::Time64(TimeUnit::Nanosecond),
452        )
453    }
454}
455
456/// Conversion of dates
457pub fn date32_to_date64(from: &PrimitiveArray<i32>) -> PrimitiveArray<i64> {
458    unary(
459        from,
460        |x| x as i64 * MILLISECONDS_IN_DAY,
461        ArrowDataType::Date64,
462    )
463}
464
465/// Conversion of dates
466pub fn date64_to_date32(from: &PrimitiveArray<i64>) -> PrimitiveArray<i32> {
467    unary(
468        from,
469        |x| (x / MILLISECONDS_IN_DAY) as i32,
470        ArrowDataType::Date32,
471    )
472}
473
474/// Conversion of times
475pub fn time32s_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
476    fallible_unary(
477        from,
478        |x| x.wrapping_mul(1000),
479        |x| x.checked_mul(1000).is_none(),
480        ArrowDataType::Time32(TimeUnit::Millisecond),
481    )
482}
483
484/// Conversion of times
485pub fn time32ms_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
486    unary(from, |x| x / 1000, ArrowDataType::Time32(TimeUnit::Second))
487}
488
489/// Conversion of times
490pub fn time64us_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
491    fallible_unary(
492        from,
493        |x| x.wrapping_mul(1000),
494        |x| x.checked_mul(1000).is_none(),
495        ArrowDataType::Time64(TimeUnit::Nanosecond),
496    )
497}
498
499/// Conversion of times
500pub fn time64ns_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
501    unary(
502        from,
503        |x| x / 1000,
504        ArrowDataType::Time64(TimeUnit::Microsecond),
505    )
506}
507
508/// Conversion of timestamp
509pub fn timestamp_to_date64(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i64> {
510    let from_size = time_unit_multiple(from_unit);
511    let to_size = MILLISECONDS;
512    let to_type = ArrowDataType::Date64;
513
514    // Scale time_array by (to_size / from_size) using a
515    // single integer operation, but need to avoid integer
516    // math rounding down to zero
517
518    match to_size.cmp(&from_size) {
519        std::cmp::Ordering::Less => unary(from, |x| (x / (from_size / to_size)), to_type),
520        std::cmp::Ordering::Equal => primitive_to_same_primitive(from, &to_type),
521        std::cmp::Ordering::Greater => fallible_unary(
522            from,
523            |x| x.wrapping_mul(to_size / from_size),
524            |x| x.checked_mul(to_size / from_size).is_none(),
525            to_type,
526        ),
527    }
528}
529
530/// Conversion of timestamp
531pub fn timestamp_to_date32(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i32> {
532    let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
533    unary(from, |x| (x / from_size) as i32, ArrowDataType::Date32)
534}
535
536/// Conversion of time
537pub fn time32_to_time64(
538    from: &PrimitiveArray<i32>,
539    from_unit: TimeUnit,
540    to_unit: TimeUnit,
541) -> PrimitiveArray<i64> {
542    let from_size = time_unit_multiple(from_unit);
543    let to_size = time_unit_multiple(to_unit);
544    let divisor = to_size / from_size;
545    fallible_unary(
546        from,
547        |x| (x as i64).wrapping_mul(divisor),
548        |x| (x as i64).checked_mul(divisor).is_none(),
549        ArrowDataType::Time64(to_unit),
550    )
551}
552
553/// Conversion of time
554pub fn time64_to_time32(
555    from: &PrimitiveArray<i64>,
556    from_unit: TimeUnit,
557    to_unit: TimeUnit,
558) -> PrimitiveArray<i32> {
559    let from_size = time_unit_multiple(from_unit);
560    let to_size = time_unit_multiple(to_unit);
561    let divisor = from_size / to_size;
562    unary(
563        from,
564        |x| (x / divisor) as i32,
565        ArrowDataType::Time32(to_unit),
566    )
567}
568
569/// Conversion of timestamp
570pub fn timestamp_to_timestamp(
571    from: &PrimitiveArray<i64>,
572    from_unit: TimeUnit,
573    to_unit: TimeUnit,
574    tz: &Option<PlSmallStr>,
575) -> PrimitiveArray<i64> {
576    let from_size = time_unit_multiple(from_unit);
577    let to_size = time_unit_multiple(to_unit);
578    let to_type = ArrowDataType::Timestamp(to_unit, tz.clone());
579    // we either divide or multiply, depending on size of each unit
580    if from_size >= to_size {
581        unary(from, |x| (x / (from_size / to_size)), to_type)
582    } else {
583        fallible_unary(
584            from,
585            |x| x.wrapping_mul(to_size / from_size),
586            |x| x.checked_mul(to_size / from_size).is_none(),
587            to_type,
588        )
589    }
590}
591
592/// Casts f16 into f32
593pub fn f16_to_f32(from: &PrimitiveArray<f16>) -> PrimitiveArray<f32> {
594    unary(from, |x| x.to_f32(), ArrowDataType::Float32)
595}
596
597/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
598pub(super) fn primitive_to_binview<T: NativeType + SerPrimitive>(
599    from: &PrimitiveArray<T>,
600) -> BinaryViewArray {
601    let mut mutable = MutableBinaryViewArray::with_capacity(from.len());
602
603    let mut scratch = vec![];
604    for &x in from.values().iter() {
605        unsafe { scratch.set_len(0) };
606        T::write(&mut scratch, x);
607        mutable.push_value_ignore_validity(&scratch)
608    }
609
610    mutable.freeze().with_validity(from.validity().cloned())
611}
612
613pub(super) fn primitive_to_binview_dyn<T>(from: &dyn Array) -> BinaryViewArray
614where
615    T: NativeType + SerPrimitive,
616{
617    let from = from.as_any().downcast_ref().unwrap();
618    primitive_to_binview::<T>(from)
619}