polars_compute/cast/
binview_to.rs

1use arrow::array::*;
2#[cfg(feature = "dtype-decimal")]
3use arrow::compute::decimal::deserialize_decimal;
4use arrow::datatypes::{ArrowDataType, TimeUnit};
5use arrow::offset::Offset;
6use arrow::types::NativeType;
7use chrono::Datelike;
8use num_traits::FromBytes;
9use polars_error::PolarsResult;
10
11use super::CastOptionsImpl;
12use super::binary_to::Parse;
13use super::temporal::EPOCH_DAYS_FROM_CE;
14
15pub(super) const RFC3339: &str = "%Y-%m-%dT%H:%M:%S%.f%:z";
16
17/// Cast [`BinaryViewArray`] to [`DictionaryArray`], also known as packing.
18/// # Errors
19/// This function errors if the maximum key is smaller than the number of distinct elements
20/// in the array.
21pub(super) fn binview_to_dictionary<K: DictionaryKey>(
22    from: &BinaryViewArray,
23) -> PolarsResult<DictionaryArray<K>> {
24    let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<[u8]>>::new();
25    array.reserve(from.len());
26    array.try_extend(from.iter())?;
27
28    Ok(array.into())
29}
30
31pub(super) fn utf8view_to_dictionary<K: DictionaryKey>(
32    from: &Utf8ViewArray,
33) -> PolarsResult<DictionaryArray<K>> {
34    let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<str>>::new();
35    array.reserve(from.len());
36    array.try_extend(from.iter())?;
37
38    Ok(array.into())
39}
40
41pub(super) fn view_to_binary<O: Offset>(array: &BinaryViewArray) -> BinaryArray<O> {
42    let len: usize = Array::len(array);
43    let mut mutable = MutableBinaryValuesArray::<O>::with_capacities(len, array.total_bytes_len());
44    for slice in array.values_iter() {
45        mutable.push(slice)
46    }
47    let out: BinaryArray<O> = mutable.into();
48    out.with_validity(array.validity().cloned())
49}
50
51pub fn utf8view_to_utf8<O: Offset>(array: &Utf8ViewArray) -> Utf8Array<O> {
52    let array = array.to_binview();
53    let out = view_to_binary::<O>(&array);
54
55    let dtype = Utf8Array::<O>::default_dtype();
56    unsafe {
57        Utf8Array::new_unchecked(
58            dtype,
59            out.offsets().clone(),
60            out.values().clone(),
61            out.validity().cloned(),
62        )
63    }
64}
65
66/// Parses a [`Utf8ViewArray`] with text representations of numbers into a
67/// [`PrimitiveArray`], making any unparsable value a Null.
68pub(super) fn utf8view_to_primitive<T>(
69    from: &Utf8ViewArray,
70    to: &ArrowDataType,
71) -> PrimitiveArray<T>
72where
73    T: NativeType + Parse,
74{
75    let iter = from
76        .iter()
77        .map(|x| x.and_then::<T, _>(|x| T::parse(x.as_bytes())));
78
79    PrimitiveArray::<T>::from_trusted_len_iter(iter).to(to.clone())
80}
81
82/// Parses a `&dyn` [`Array`] of UTF-8 encoded string representations of numbers
83/// into a [`PrimitiveArray`], making any unparsable value a Null.
84pub(super) fn utf8view_to_primitive_dyn<T>(
85    from: &dyn Array,
86    to: &ArrowDataType,
87    options: CastOptionsImpl,
88) -> PolarsResult<Box<dyn Array>>
89where
90    T: NativeType + Parse,
91{
92    let from = from.as_any().downcast_ref().unwrap();
93    if options.partial {
94        unimplemented!()
95    } else {
96        Ok(Box::new(utf8view_to_primitive::<T>(from, to)))
97    }
98}
99
100#[cfg(feature = "dtype-decimal")]
101pub fn binview_to_decimal(
102    array: &BinaryViewArray,
103    precision: Option<usize>,
104    scale: usize,
105) -> PrimitiveArray<i128> {
106    let precision = precision.map(|p| p as u8);
107    PrimitiveArray::<i128>::from_trusted_len_iter(
108        array
109            .iter()
110            .map(|val| val.and_then(|val| deserialize_decimal(val, precision, scale as u8))),
111    )
112    .to(ArrowDataType::Decimal(
113        precision.unwrap_or(38).into(),
114        scale,
115    ))
116}
117
118pub(super) fn utf8view_to_naive_timestamp_dyn(
119    from: &dyn Array,
120    time_unit: TimeUnit,
121) -> PolarsResult<Box<dyn Array>> {
122    let from = from.as_any().downcast_ref().unwrap();
123    Ok(Box::new(utf8view_to_naive_timestamp(from, time_unit)))
124}
125
126/// [`super::temporal::utf8view_to_timestamp`] applied for RFC3339 formatting
127pub fn utf8view_to_naive_timestamp(
128    from: &Utf8ViewArray,
129    time_unit: TimeUnit,
130) -> PrimitiveArray<i64> {
131    super::temporal::utf8view_to_naive_timestamp(from, RFC3339, time_unit)
132}
133
134pub(super) fn utf8view_to_date32(from: &Utf8ViewArray) -> PrimitiveArray<i32> {
135    let iter = from.iter().map(|x| {
136        x.and_then(|x| {
137            x.parse::<chrono::NaiveDate>()
138                .ok()
139                .map(|x| x.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
140        })
141    });
142    PrimitiveArray::<i32>::from_trusted_len_iter(iter).to(ArrowDataType::Date32)
143}
144
145pub(super) fn utf8view_to_date32_dyn(from: &dyn Array) -> PolarsResult<Box<dyn Array>> {
146    let from = from.as_any().downcast_ref().unwrap();
147    Ok(Box::new(utf8view_to_date32(from)))
148}
149
150/// Casts a [`BinaryViewArray`] containing binary-encoded numbers to a
151/// [`PrimitiveArray`], making any uncastable value a Null.
152pub(super) fn binview_to_primitive<T>(
153    from: &BinaryViewArray,
154    to: &ArrowDataType,
155    is_little_endian: bool,
156) -> PrimitiveArray<T>
157where
158    T: FromBytes + NativeType,
159    for<'a> &'a <T as FromBytes>::Bytes: TryFrom<&'a [u8]>,
160{
161    let iter = from.iter().map(|x| {
162        x.and_then::<T, _>(|x| {
163            if is_little_endian {
164                Some(<T as FromBytes>::from_le_bytes(x.try_into().ok()?))
165            } else {
166                Some(<T as FromBytes>::from_be_bytes(x.try_into().ok()?))
167            }
168        })
169    });
170
171    PrimitiveArray::<T>::from_trusted_len_iter(iter).to(to.clone())
172}
173
174/// Casts a `&dyn` [`Array`] containing binary-encoded numbers to a
175/// [`PrimitiveArray`], making any uncastable value a Null.
176/// # Panics
177/// Panics if `Array` is not a `BinaryViewArray`
178pub fn binview_to_primitive_dyn<T>(
179    from: &dyn Array,
180    to: &ArrowDataType,
181    is_little_endian: bool,
182) -> PolarsResult<Box<dyn Array>>
183where
184    T: FromBytes + NativeType,
185    for<'a> &'a <T as FromBytes>::Bytes: TryFrom<&'a [u8]>,
186{
187    let from = from.as_any().downcast_ref().unwrap();
188    Ok(Box::new(binview_to_primitive::<T>(
189        from,
190        to,
191        is_little_endian,
192    )))
193}