polars_compute/cast/
mod.rs

1//! Defines different casting operators such as [`cast`] or [`primitive_to_binary`].
2
3mod binary_to;
4mod binview_to;
5mod boolean_to;
6mod decimal_to;
7mod dictionary_to;
8mod primitive_to;
9mod utf8_to;
10
11use arrow::bitmap::MutableBitmap;
12pub use binary_to::*;
13#[cfg(feature = "dtype-decimal")]
14pub use binview_to::binview_to_decimal;
15use binview_to::utf8view_to_primitive_dyn;
16pub use binview_to::utf8view_to_utf8;
17pub use boolean_to::*;
18pub use decimal_to::*;
19pub mod temporal;
20use arrow::array::*;
21use arrow::datatypes::*;
22use arrow::match_integer_type;
23use arrow::offset::{Offset, Offsets};
24pub use binview_to::binview_to_primitive_dyn;
25use binview_to::{
26    binview_to_dictionary, utf8view_to_date32_dyn, utf8view_to_dictionary,
27    utf8view_to_naive_timestamp_dyn, view_to_binary,
28};
29use dictionary_to::*;
30use polars_error::{PolarsResult, polars_bail, polars_ensure, polars_err};
31use polars_utils::IdxSize;
32pub use primitive_to::*;
33use temporal::utf8view_to_timestamp;
34pub use utf8_to::*;
35
36/// options defining how Cast kernels behave
37#[derive(Clone, Copy, Debug, Default)]
38pub struct CastOptionsImpl {
39    /// default to false
40    /// whether an overflowing cast should be converted to `None` (default), or be wrapped (i.e. `256i16 as u8 = 0` vectorized).
41    /// Settings this to `true` is 5-6x faster for numeric types.
42    pub wrapped: bool,
43    /// default to false
44    /// whether to cast to an integer at the best-effort
45    pub partial: bool,
46}
47
48impl CastOptionsImpl {
49    pub fn unchecked() -> Self {
50        Self {
51            wrapped: true,
52            partial: false,
53        }
54    }
55}
56
57impl CastOptionsImpl {
58    fn with_wrapped(&self, v: bool) -> Self {
59        let mut option = *self;
60        option.wrapped = v;
61        option
62    }
63}
64
65macro_rules! primitive_dyn {
66    ($from:expr, $expr:tt) => {{
67        let from = $from.as_any().downcast_ref().unwrap();
68        Ok(Box::new($expr(from)))
69    }};
70    ($from:expr, $expr:tt, $to:expr) => {{
71        let from = $from.as_any().downcast_ref().unwrap();
72        Ok(Box::new($expr(from, $to)))
73    }};
74    ($from:expr, $expr:tt, $from_t:expr, $to:expr) => {{
75        let from = $from.as_any().downcast_ref().unwrap();
76        Ok(Box::new($expr(from, $from_t, $to)))
77    }};
78    ($from:expr, $expr:tt, $arg1:expr, $arg2:expr, $arg3:expr) => {{
79        let from = $from.as_any().downcast_ref().unwrap();
80        Ok(Box::new($expr(from, $arg1, $arg2, $arg3)))
81    }};
82}
83
84fn cast_struct(
85    array: &StructArray,
86    to_type: &ArrowDataType,
87    options: CastOptionsImpl,
88) -> PolarsResult<StructArray> {
89    let values = array.values();
90    let fields = StructArray::get_fields(to_type);
91    let new_values = values
92        .iter()
93        .zip(fields)
94        .map(|(arr, field)| cast(arr.as_ref(), field.dtype(), options))
95        .collect::<PolarsResult<Vec<_>>>()?;
96
97    Ok(StructArray::new(
98        to_type.clone(),
99        array.len(),
100        new_values,
101        array.validity().cloned(),
102    ))
103}
104
105fn cast_list<O: Offset>(
106    array: &ListArray<O>,
107    to_type: &ArrowDataType,
108    options: CastOptionsImpl,
109) -> PolarsResult<ListArray<O>> {
110    let values = array.values();
111    let new_values = cast(
112        values.as_ref(),
113        ListArray::<O>::get_child_type(to_type),
114        options,
115    )?;
116
117    Ok(ListArray::<O>::new(
118        to_type.clone(),
119        array.offsets().clone(),
120        new_values,
121        array.validity().cloned(),
122    ))
123}
124
125fn cast_list_to_large_list(array: &ListArray<i32>, to_type: &ArrowDataType) -> ListArray<i64> {
126    let offsets = array.offsets().into();
127
128    ListArray::<i64>::new(
129        to_type.clone(),
130        offsets,
131        array.values().clone(),
132        array.validity().cloned(),
133    )
134}
135
136fn cast_large_to_list(array: &ListArray<i64>, to_type: &ArrowDataType) -> ListArray<i32> {
137    let offsets = array.offsets().try_into().expect("Convertme to error");
138
139    ListArray::<i32>::new(
140        to_type.clone(),
141        offsets,
142        array.values().clone(),
143        array.validity().cloned(),
144    )
145}
146
147fn cast_fixed_size_list_to_list<O: Offset>(
148    fixed: &FixedSizeListArray,
149    to_type: &ArrowDataType,
150    options: CastOptionsImpl,
151) -> PolarsResult<ListArray<O>> {
152    let new_values = cast(
153        fixed.values().as_ref(),
154        ListArray::<O>::get_child_type(to_type),
155        options,
156    )?;
157
158    let offsets = (0..=fixed.len())
159        .map(|ix| O::from_as_usize(ix * fixed.size()))
160        .collect::<Vec<_>>();
161    // SAFETY: offsets _are_ monotonically increasing
162    let offsets = unsafe { Offsets::new_unchecked(offsets) };
163
164    Ok(ListArray::<O>::new(
165        to_type.clone(),
166        offsets.into(),
167        new_values,
168        fixed.validity().cloned(),
169    ))
170}
171
172pub(super) fn cast_list_to_fixed_size_list<O: Offset>(
173    list: &ListArray<O>,
174    inner: &Field,
175    size: usize,
176    options: CastOptionsImpl,
177) -> PolarsResult<FixedSizeListArray> {
178    let null_cnt = list.null_count();
179    let new_values = if null_cnt == 0 {
180        let start_offset = list.offsets().first().to_usize();
181        let offsets = list.offsets().buffer();
182
183        let mut is_valid = true;
184        for (i, offset) in offsets.iter().enumerate() {
185            is_valid &= offset.to_usize() == start_offset + i * size;
186        }
187
188        polars_ensure!(is_valid, ComputeError: "not all elements have the specified width {size}");
189
190        let sliced_values = list
191            .values()
192            .sliced(start_offset, list.offsets().range().to_usize());
193        cast(sliced_values.as_ref(), inner.dtype(), options)?
194    } else {
195        let offsets = list.offsets().as_slice();
196        // Check the lengths of each list are equal to the fixed size.
197        // SAFETY: we know the index is in bound.
198        let mut expected_offset = unsafe { *offsets.get_unchecked(0) } + O::from_as_usize(size);
199        for i in 1..=list.len() {
200            // SAFETY: we know the index is in bound.
201            let current_offset = unsafe { *offsets.get_unchecked(i) };
202            if list.is_null(i - 1) {
203                expected_offset = current_offset + O::from_as_usize(size);
204            } else {
205                polars_ensure!(current_offset == expected_offset, ComputeError:
206            "not all elements have the specified width {size}");
207                expected_offset += O::from_as_usize(size);
208            }
209        }
210
211        // Build take indices for the values. This is used to fill in the null slots.
212        let mut indices =
213            MutablePrimitiveArray::<IdxSize>::with_capacity(list.values().len() + null_cnt * size);
214        for i in 0..list.len() {
215            if list.is_null(i) {
216                indices.extend_constant(size, None)
217            } else {
218                // SAFETY: we know the index is in bound.
219                let current_offset = unsafe { *offsets.get_unchecked(i) };
220                for j in 0..size {
221                    indices.push(Some(
222                        (current_offset + O::from_as_usize(j)).to_usize() as IdxSize
223                    ));
224                }
225            }
226        }
227        let take_values =
228            unsafe { crate::gather::take_unchecked(list.values().as_ref(), &indices.freeze()) };
229
230        cast(take_values.as_ref(), inner.dtype(), options)?
231    };
232
233    FixedSizeListArray::try_new(
234        ArrowDataType::FixedSizeList(Box::new(inner.clone()), size),
235        list.len(),
236        new_values,
237        list.validity().cloned(),
238    )
239    .map_err(|_| polars_err!(ComputeError: "not all elements have the specified width {size}"))
240}
241
242fn cast_list_uint8_to_binary<O: Offset>(list: &ListArray<O>) -> PolarsResult<BinaryViewArray> {
243    let mut views = Vec::with_capacity(list.len());
244    let mut result_validity = MutableBitmap::from_len_set(list.len());
245
246    let u8array: &PrimitiveArray<u8> = list.values().as_any().downcast_ref().unwrap();
247    let slice = u8array.values().as_slice();
248    let mut cloned_buffers = vec![u8array.values().clone()];
249    let mut buf_index = 0;
250    let mut previous_buf_lengths = 0;
251    let validity = list.validity();
252    let internal_validity = list.values().validity();
253    let offsets = list.offsets();
254
255    let mut all_views_inline = true;
256
257    // In a View for BinaryViewArray, both length and offset are u32.
258    #[cfg(not(test))]
259    const MAX_BUF_SIZE: usize = u32::MAX as usize;
260
261    // This allows us to test some invariants without using 4GB of RAM; see mod
262    // tests below.
263    #[cfg(test)]
264    const MAX_BUF_SIZE: usize = 15;
265
266    for index in 0..list.len() {
267        // Check if there's a null instead of a list:
268        if let Some(validity) = validity {
269            // SAFETY: We are generating indexes limited to < list.len().
270            debug_assert!(index < validity.len());
271            if unsafe { !validity.get_bit_unchecked(index) } {
272                debug_assert!(index < result_validity.len());
273                unsafe {
274                    result_validity.set_unchecked(index, false);
275                }
276                views.push(View::default());
277                continue;
278            }
279        }
280
281        // SAFETY: We are generating indexes limited to < list.len().
282        debug_assert!(index < offsets.len());
283        let (start, end) = unsafe { offsets.start_end_unchecked(index) };
284        let length = end - start;
285        polars_ensure!(
286            length <= MAX_BUF_SIZE,
287            InvalidOperation: format!("when casting to BinaryView, list lengths must be <= {MAX_BUF_SIZE}")
288        );
289
290        // Check if the list contains nulls:
291        if let Some(internal_validity) = internal_validity {
292            if internal_validity.null_count_range(start, length) > 0 {
293                debug_assert!(index < result_validity.len());
294                unsafe {
295                    result_validity.set_unchecked(index, false);
296                }
297                views.push(View::default());
298                continue;
299            }
300        }
301
302        if end - previous_buf_lengths > MAX_BUF_SIZE {
303            // View offsets must fit in u32 (or smaller value when running Rust
304            // tests), and we've determined the end of the next view will be
305            // past that.
306            buf_index += 1;
307            let (previous, next) = cloned_buffers
308                .last()
309                .unwrap()
310                .split_at(start - previous_buf_lengths);
311            debug_assert!(previous.len() <= MAX_BUF_SIZE);
312            previous_buf_lengths += previous.len();
313            *(cloned_buffers.last_mut().unwrap()) = previous;
314            cloned_buffers.push(next);
315        }
316        let view = View::new_from_bytes(
317            &slice[start..end],
318            buf_index,
319            (start - previous_buf_lengths) as u32,
320        );
321        if !view.is_inline() {
322            all_views_inline = false;
323        }
324        debug_assert_eq!(
325            unsafe { view.get_slice_unchecked(&cloned_buffers) },
326            &slice[start..end]
327        );
328        views.push(view);
329    }
330
331    // Optimization: don't actually need buffers if Views are all inline.
332    if all_views_inline {
333        cloned_buffers.clear();
334    }
335
336    let result_buffers = cloned_buffers.into_boxed_slice().into();
337    let result = if cfg!(debug_assertions) {
338        // A safer wrapper around new_unchecked_unknown_md; it shouldn't ever
339        // fail in practice.
340        BinaryViewArrayGeneric::try_new(
341            ArrowDataType::BinaryView,
342            views.into(),
343            result_buffers,
344            result_validity.into(),
345        )?
346    } else {
347        unsafe {
348            BinaryViewArrayGeneric::new_unchecked_unknown_md(
349                ArrowDataType::BinaryView,
350                views.into(),
351                result_buffers,
352                result_validity.into(),
353                // We could compute this ourselves, but we want to make this code
354                // match debug_assertions path as much as possible.
355                None,
356            )
357        }
358    };
359
360    Ok(result)
361}
362
363pub fn cast_default(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
364    cast(array, to_type, Default::default())
365}
366
367pub fn cast_unchecked(array: &dyn Array, to_type: &ArrowDataType) -> PolarsResult<Box<dyn Array>> {
368    cast(array, to_type, CastOptionsImpl::unchecked())
369}
370
371/// Cast `array` to the provided data type and return a new [`Array`] with
372/// type `to_type`, if possible.
373///
374/// Behavior:
375/// * PrimitiveArray to PrimitiveArray: overflowing cast will be None
376/// * Boolean to Utf8: `true` => '1', `false` => `0`
377/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
378///   in integer casts return null
379/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
380/// * List to List: the underlying data type is cast
381/// * Fixed Size List to List: the underlying data type is cast
382/// * List to Fixed Size List: the offsets are checked for valid order, then the
383///   underlying type is cast.
384/// * List of UInt8 to Binary: the list of integers becomes binary data, nulls in the list means it becomes a null
385/// * Struct to Struct: the underlying fields are cast.
386/// * PrimitiveArray to List: a list array with 1 value per slot is created
387/// * Date32 and Date64: precision lost when going to higher interval
388/// * Time32 and Time64: precision lost when going to higher interval
389/// * Timestamp and Date{32|64}: precision lost when going to higher interval
390/// * Temporal to/from backing primitive: zero-copy with data type change
391///
392/// Unsupported Casts
393/// * non-`StructArray` to `StructArray` or `StructArray` to non-`StructArray`
394/// * List to primitive (other than UInt8)
395/// * Utf8 to boolean
396/// * Interval and duration
397pub fn cast(
398    array: &dyn Array,
399    to_type: &ArrowDataType,
400    options: CastOptionsImpl,
401) -> PolarsResult<Box<dyn Array>> {
402    use ArrowDataType::*;
403    let from_type = array.dtype();
404
405    // clone array if types are the same
406    if from_type == to_type {
407        return Ok(clone(array));
408    }
409
410    let as_options = options.with_wrapped(true);
411    match (from_type, to_type) {
412        (Null, _) | (_, Null) => Ok(new_null_array(to_type.clone(), array.len())),
413        (Struct(from_fd), Struct(to_fd)) => {
414            polars_ensure!(from_fd.len() == to_fd.len(), InvalidOperation: "Cannot cast struct with different number of fields.");
415            cast_struct(array.as_any().downcast_ref().unwrap(), to_type, options).map(|x| x.boxed())
416        },
417        (Struct(_), _) | (_, Struct(_)) => polars_bail!(InvalidOperation:
418            "Cannot cast from struct to other types"
419        ),
420        (Dictionary(index_type, ..), _) => match_integer_type!(index_type, |$T| {
421            dictionary_cast_dyn::<$T>(array, to_type, options)
422        }),
423        (_, Dictionary(index_type, value_type, _)) => match_integer_type!(index_type, |$T| {
424            cast_to_dictionary::<$T>(array, value_type, options)
425        }),
426        // not supported by polars
427        // (List(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i32>(
428        //     array.as_any().downcast_ref().unwrap(),
429        //     inner.as_ref(),
430        //     *size,
431        //     options,
432        // )
433        // .map(|x| x.boxed()),
434        (LargeList(_), FixedSizeList(inner, size)) => cast_list_to_fixed_size_list::<i64>(
435            array.as_any().downcast_ref().unwrap(),
436            inner.as_ref(),
437            *size,
438            options,
439        )
440        .map(|x| x.boxed()),
441        (FixedSizeList(_, _), List(_)) => cast_fixed_size_list_to_list::<i32>(
442            array.as_any().downcast_ref().unwrap(),
443            to_type,
444            options,
445        )
446        .map(|x| x.boxed()),
447        (FixedSizeList(_, _), LargeList(_)) => cast_fixed_size_list_to_list::<i64>(
448            array.as_any().downcast_ref().unwrap(),
449            to_type,
450            options,
451        )
452        .map(|x| x.boxed()),
453        (List(field), BinaryView) if matches!(field.dtype(), UInt8) => {
454            cast_list_uint8_to_binary::<i32>(array.as_any().downcast_ref().unwrap())
455                .map(|arr| arr.boxed())
456        },
457        (LargeList(field), BinaryView) if matches!(field.dtype(), UInt8) => {
458            cast_list_uint8_to_binary::<i64>(array.as_any().downcast_ref().unwrap())
459                .map(|arr| arr.boxed())
460        },
461        (BinaryView, _) => match to_type {
462            Utf8View => array
463                .as_any()
464                .downcast_ref::<BinaryViewArray>()
465                .unwrap()
466                .to_utf8view()
467                .map(|arr| arr.boxed()),
468            LargeBinary => Ok(binview_to::view_to_binary::<i64>(
469                array.as_any().downcast_ref().unwrap(),
470            )
471            .boxed()),
472            LargeList(inner) if matches!(inner.dtype, ArrowDataType::UInt8) => {
473                let bin_array = view_to_binary::<i64>(array.as_any().downcast_ref().unwrap());
474                Ok(binary_to_list(&bin_array, to_type.clone()).boxed())
475            },
476            _ => polars_bail!(InvalidOperation:
477                "casting from {from_type:?} to {to_type:?} not supported",
478            ),
479        },
480        (LargeList(_), LargeList(_)) => {
481            cast_list::<i64>(array.as_any().downcast_ref().unwrap(), to_type, options)
482                .map(|x| x.boxed())
483        },
484        (List(lhs), LargeList(rhs)) if lhs == rhs => {
485            Ok(cast_list_to_large_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
486        },
487        (LargeList(lhs), List(rhs)) if lhs == rhs => {
488            Ok(cast_large_to_list(array.as_any().downcast_ref().unwrap(), to_type).boxed())
489        },
490
491        (_, List(to)) => {
492            // cast primitive to list's primitive
493            let values = cast(array, &to.dtype, options)?;
494            // create offsets, where if array.len() = 2, we have [0,1,2]
495            let offsets = (0..=array.len() as i32).collect::<Vec<_>>();
496            // SAFETY: offsets _are_ monotonically increasing
497            let offsets = unsafe { Offsets::new_unchecked(offsets) };
498
499            let list_array = ListArray::<i32>::new(to_type.clone(), offsets.into(), values, None);
500
501            Ok(Box::new(list_array))
502        },
503
504        (_, LargeList(to)) if from_type != &LargeBinary => {
505            // cast primitive to list's primitive
506            let values = cast(array, &to.dtype, options)?;
507            // create offsets, where if array.len() = 2, we have [0,1,2]
508            let offsets = (0..=array.len() as i64).collect::<Vec<_>>();
509            // SAFETY: offsets _are_ monotonically increasing
510            let offsets = unsafe { Offsets::new_unchecked(offsets) };
511
512            let list_array = ListArray::<i64>::new(
513                to_type.clone(),
514                offsets.into(),
515                values,
516                array.validity().cloned(),
517            );
518
519            Ok(Box::new(list_array))
520        },
521
522        (Utf8View, _) => {
523            let arr = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
524
525            match to_type {
526                BinaryView => Ok(arr.to_binview().boxed()),
527                LargeUtf8 => Ok(binview_to::utf8view_to_utf8::<i64>(arr).boxed()),
528                UInt8 => utf8view_to_primitive_dyn::<u8>(arr, to_type, options),
529                UInt16 => utf8view_to_primitive_dyn::<u16>(arr, to_type, options),
530                UInt32 => utf8view_to_primitive_dyn::<u32>(arr, to_type, options),
531                UInt64 => utf8view_to_primitive_dyn::<u64>(arr, to_type, options),
532                Int8 => utf8view_to_primitive_dyn::<i8>(arr, to_type, options),
533                Int16 => utf8view_to_primitive_dyn::<i16>(arr, to_type, options),
534                Int32 => utf8view_to_primitive_dyn::<i32>(arr, to_type, options),
535                Int64 => utf8view_to_primitive_dyn::<i64>(arr, to_type, options),
536                #[cfg(feature = "dtype-i128")]
537                Int128 => utf8view_to_primitive_dyn::<i128>(arr, to_type, options),
538                Float32 => utf8view_to_primitive_dyn::<f32>(arr, to_type, options),
539                Float64 => utf8view_to_primitive_dyn::<f64>(arr, to_type, options),
540                Timestamp(time_unit, None) => {
541                    utf8view_to_naive_timestamp_dyn(array, time_unit.to_owned())
542                },
543                Timestamp(time_unit, Some(time_zone)) => utf8view_to_timestamp(
544                    array.as_any().downcast_ref().unwrap(),
545                    RFC3339,
546                    time_zone.clone(),
547                    time_unit.to_owned(),
548                )
549                .map(|arr| arr.boxed()),
550                Date32 => utf8view_to_date32_dyn(array),
551                #[cfg(feature = "dtype-decimal")]
552                Decimal(precision, scale) => {
553                    Ok(binview_to_decimal(&arr.to_binview(), Some(*precision), *scale).to_boxed())
554                },
555                _ => polars_bail!(InvalidOperation:
556                    "casting from {from_type:?} to {to_type:?} not supported",
557                ),
558            }
559        },
560
561        (_, Boolean) => match from_type {
562            UInt8 => primitive_to_boolean_dyn::<u8>(array, to_type.clone()),
563            UInt16 => primitive_to_boolean_dyn::<u16>(array, to_type.clone()),
564            UInt32 => primitive_to_boolean_dyn::<u32>(array, to_type.clone()),
565            UInt64 => primitive_to_boolean_dyn::<u64>(array, to_type.clone()),
566            Int8 => primitive_to_boolean_dyn::<i8>(array, to_type.clone()),
567            Int16 => primitive_to_boolean_dyn::<i16>(array, to_type.clone()),
568            Int32 => primitive_to_boolean_dyn::<i32>(array, to_type.clone()),
569            Int64 => primitive_to_boolean_dyn::<i64>(array, to_type.clone()),
570            #[cfg(feature = "dtype-i128")]
571            Int128 => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
572            Float32 => primitive_to_boolean_dyn::<f32>(array, to_type.clone()),
573            Float64 => primitive_to_boolean_dyn::<f64>(array, to_type.clone()),
574            Decimal(_, _) => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
575            _ => polars_bail!(InvalidOperation:
576                "casting from {from_type:?} to {to_type:?} not supported",
577            ),
578        },
579        (Boolean, _) => match to_type {
580            UInt8 => boolean_to_primitive_dyn::<u8>(array),
581            UInt16 => boolean_to_primitive_dyn::<u16>(array),
582            UInt32 => boolean_to_primitive_dyn::<u32>(array),
583            UInt64 => boolean_to_primitive_dyn::<u64>(array),
584            Int8 => boolean_to_primitive_dyn::<i8>(array),
585            Int16 => boolean_to_primitive_dyn::<i16>(array),
586            Int32 => boolean_to_primitive_dyn::<i32>(array),
587            Int64 => boolean_to_primitive_dyn::<i64>(array),
588            #[cfg(feature = "dtype-i128")]
589            Int128 => boolean_to_primitive_dyn::<i128>(array),
590            Float32 => boolean_to_primitive_dyn::<f32>(array),
591            Float64 => boolean_to_primitive_dyn::<f64>(array),
592            Utf8View => boolean_to_utf8view_dyn(array),
593            BinaryView => boolean_to_binaryview_dyn(array),
594            _ => polars_bail!(InvalidOperation:
595                "casting from {from_type:?} to {to_type:?} not supported",
596            ),
597        },
598        (_, BinaryView) => from_to_binview(array, from_type, to_type).map(|arr| arr.boxed()),
599        (_, Utf8View) => match from_type {
600            LargeUtf8 => Ok(utf8_to_utf8view(
601                array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap(),
602            )
603            .boxed()),
604            Utf8 => Ok(
605                utf8_to_utf8view(array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap()).boxed(),
606            ),
607            #[cfg(feature = "dtype-decimal")]
608            Decimal(_, _) => Ok(decimal_to_utf8view_dyn(array).boxed()),
609            _ => from_to_binview(array, from_type, to_type)
610                .map(|arr| unsafe { arr.to_utf8view_unchecked() }.boxed()),
611        },
612        (Utf8, _) => match to_type {
613            LargeUtf8 => Ok(Box::new(utf8_to_large_utf8(
614                array.as_any().downcast_ref().unwrap(),
615            ))),
616            _ => polars_bail!(InvalidOperation:
617                "casting from {from_type:?} to {to_type:?} not supported",
618            ),
619        },
620        (LargeUtf8, _) => match to_type {
621            LargeBinary => Ok(utf8_to_binary::<i64>(
622                array.as_any().downcast_ref().unwrap(),
623                to_type.clone(),
624            )
625            .boxed()),
626            _ => polars_bail!(InvalidOperation:
627                "casting from {from_type:?} to {to_type:?} not supported",
628            ),
629        },
630        (_, LargeUtf8) => match from_type {
631            UInt8 => primitive_to_utf8_dyn::<u8, i64>(array),
632            LargeBinary => {
633                binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
634                    .map(|x| x.boxed())
635            },
636            _ => polars_bail!(InvalidOperation:
637                "casting from {from_type:?} to {to_type:?} not supported",
638            ),
639        },
640
641        (Binary, _) => match to_type {
642            LargeBinary => Ok(Box::new(binary_to_large_binary(
643                array.as_any().downcast_ref().unwrap(),
644                to_type.clone(),
645            ))),
646            _ => polars_bail!(InvalidOperation:
647                "casting from {from_type:?} to {to_type:?} not supported",
648            ),
649        },
650
651        (LargeBinary, _) => match to_type {
652            UInt8 => binary_to_primitive_dyn::<i64, u8>(array, to_type, options),
653            UInt16 => binary_to_primitive_dyn::<i64, u16>(array, to_type, options),
654            UInt32 => binary_to_primitive_dyn::<i64, u32>(array, to_type, options),
655            UInt64 => binary_to_primitive_dyn::<i64, u64>(array, to_type, options),
656            Int8 => binary_to_primitive_dyn::<i64, i8>(array, to_type, options),
657            Int16 => binary_to_primitive_dyn::<i64, i16>(array, to_type, options),
658            Int32 => binary_to_primitive_dyn::<i64, i32>(array, to_type, options),
659            Int64 => binary_to_primitive_dyn::<i64, i64>(array, to_type, options),
660            #[cfg(feature = "dtype-i128")]
661            Int128 => binary_to_primitive_dyn::<i64, i128>(array, to_type, options),
662            Float32 => binary_to_primitive_dyn::<i64, f32>(array, to_type, options),
663            Float64 => binary_to_primitive_dyn::<i64, f64>(array, to_type, options),
664            Binary => {
665                binary_large_to_binary(array.as_any().downcast_ref().unwrap(), to_type.clone())
666                    .map(|x| x.boxed())
667            },
668            LargeUtf8 => {
669                binary_to_utf8::<i64>(array.as_any().downcast_ref().unwrap(), to_type.clone())
670                    .map(|x| x.boxed())
671            },
672            _ => polars_bail!(InvalidOperation:
673                "casting from {from_type:?} to {to_type:?} not supported",
674            ),
675        },
676        (FixedSizeBinary(_), _) => match to_type {
677            Binary => Ok(fixed_size_binary_binary::<i32>(
678                array.as_any().downcast_ref().unwrap(),
679                to_type.clone(),
680            )
681            .boxed()),
682            LargeBinary => Ok(fixed_size_binary_binary::<i64>(
683                array.as_any().downcast_ref().unwrap(),
684                to_type.clone(),
685            )
686            .boxed()),
687            _ => polars_bail!(InvalidOperation:
688                "casting from {from_type:?} to {to_type:?} not supported",
689            ),
690        },
691        // start numeric casts
692        (UInt8, UInt16) => primitive_to_primitive_dyn::<u8, u16>(array, to_type, as_options),
693        (UInt8, UInt32) => primitive_to_primitive_dyn::<u8, u32>(array, to_type, as_options),
694        (UInt8, UInt64) => primitive_to_primitive_dyn::<u8, u64>(array, to_type, as_options),
695        (UInt8, Int8) => primitive_to_primitive_dyn::<u8, i8>(array, to_type, options),
696        (UInt8, Int16) => primitive_to_primitive_dyn::<u8, i16>(array, to_type, options),
697        (UInt8, Int32) => primitive_to_primitive_dyn::<u8, i32>(array, to_type, options),
698        (UInt8, Int64) => primitive_to_primitive_dyn::<u8, i64>(array, to_type, options),
699        #[cfg(feature = "dtype-i128")]
700        (UInt8, Int128) => primitive_to_primitive_dyn::<u8, i128>(array, to_type, options),
701        (UInt8, Float32) => primitive_to_primitive_dyn::<u8, f32>(array, to_type, as_options),
702        (UInt8, Float64) => primitive_to_primitive_dyn::<u8, f64>(array, to_type, as_options),
703        (UInt8, Decimal(p, s)) => integer_to_decimal_dyn::<u8>(array, *p, *s),
704
705        (UInt16, UInt8) => primitive_to_primitive_dyn::<u16, u8>(array, to_type, options),
706        (UInt16, UInt32) => primitive_to_primitive_dyn::<u16, u32>(array, to_type, as_options),
707        (UInt16, UInt64) => primitive_to_primitive_dyn::<u16, u64>(array, to_type, as_options),
708        (UInt16, Int8) => primitive_to_primitive_dyn::<u16, i8>(array, to_type, options),
709        (UInt16, Int16) => primitive_to_primitive_dyn::<u16, i16>(array, to_type, options),
710        (UInt16, Int32) => primitive_to_primitive_dyn::<u16, i32>(array, to_type, options),
711        (UInt16, Int64) => primitive_to_primitive_dyn::<u16, i64>(array, to_type, options),
712        #[cfg(feature = "dtype-i128")]
713        (UInt16, Int128) => primitive_to_primitive_dyn::<u16, i128>(array, to_type, options),
714        (UInt16, Float32) => primitive_to_primitive_dyn::<u16, f32>(array, to_type, as_options),
715        (UInt16, Float64) => primitive_to_primitive_dyn::<u16, f64>(array, to_type, as_options),
716        (UInt16, Decimal(p, s)) => integer_to_decimal_dyn::<u16>(array, *p, *s),
717
718        (UInt32, UInt8) => primitive_to_primitive_dyn::<u32, u8>(array, to_type, options),
719        (UInt32, UInt16) => primitive_to_primitive_dyn::<u32, u16>(array, to_type, options),
720        (UInt32, UInt64) => primitive_to_primitive_dyn::<u32, u64>(array, to_type, as_options),
721        (UInt32, Int8) => primitive_to_primitive_dyn::<u32, i8>(array, to_type, options),
722        (UInt32, Int16) => primitive_to_primitive_dyn::<u32, i16>(array, to_type, options),
723        (UInt32, Int32) => primitive_to_primitive_dyn::<u32, i32>(array, to_type, options),
724        (UInt32, Int64) => primitive_to_primitive_dyn::<u32, i64>(array, to_type, options),
725        #[cfg(feature = "dtype-i128")]
726        (UInt32, Int128) => primitive_to_primitive_dyn::<u32, i128>(array, to_type, options),
727        (UInt32, Float32) => primitive_to_primitive_dyn::<u32, f32>(array, to_type, as_options),
728        (UInt32, Float64) => primitive_to_primitive_dyn::<u32, f64>(array, to_type, as_options),
729        (UInt32, Decimal(p, s)) => integer_to_decimal_dyn::<u32>(array, *p, *s),
730
731        (UInt64, UInt8) => primitive_to_primitive_dyn::<u64, u8>(array, to_type, options),
732        (UInt64, UInt16) => primitive_to_primitive_dyn::<u64, u16>(array, to_type, options),
733        (UInt64, UInt32) => primitive_to_primitive_dyn::<u64, u32>(array, to_type, options),
734        (UInt64, Int8) => primitive_to_primitive_dyn::<u64, i8>(array, to_type, options),
735        (UInt64, Int16) => primitive_to_primitive_dyn::<u64, i16>(array, to_type, options),
736        (UInt64, Int32) => primitive_to_primitive_dyn::<u64, i32>(array, to_type, options),
737        (UInt64, Int64) => primitive_to_primitive_dyn::<u64, i64>(array, to_type, options),
738        #[cfg(feature = "dtype-i128")]
739        (UInt64, Int128) => primitive_to_primitive_dyn::<u64, i128>(array, to_type, options),
740        (UInt64, Float32) => primitive_to_primitive_dyn::<u64, f32>(array, to_type, as_options),
741        (UInt64, Float64) => primitive_to_primitive_dyn::<u64, f64>(array, to_type, as_options),
742        (UInt64, Decimal(p, s)) => integer_to_decimal_dyn::<u64>(array, *p, *s),
743
744        (Int8, UInt8) => primitive_to_primitive_dyn::<i8, u8>(array, to_type, options),
745        (Int8, UInt16) => primitive_to_primitive_dyn::<i8, u16>(array, to_type, options),
746        (Int8, UInt32) => primitive_to_primitive_dyn::<i8, u32>(array, to_type, options),
747        (Int8, UInt64) => primitive_to_primitive_dyn::<i8, u64>(array, to_type, options),
748        (Int8, Int16) => primitive_to_primitive_dyn::<i8, i16>(array, to_type, as_options),
749        (Int8, Int32) => primitive_to_primitive_dyn::<i8, i32>(array, to_type, as_options),
750        (Int8, Int64) => primitive_to_primitive_dyn::<i8, i64>(array, to_type, as_options),
751        #[cfg(feature = "dtype-i128")]
752        (Int8, Int128) => primitive_to_primitive_dyn::<i8, i128>(array, to_type, as_options),
753        (Int8, Float32) => primitive_to_primitive_dyn::<i8, f32>(array, to_type, as_options),
754        (Int8, Float64) => primitive_to_primitive_dyn::<i8, f64>(array, to_type, as_options),
755        (Int8, Decimal(p, s)) => integer_to_decimal_dyn::<i8>(array, *p, *s),
756
757        (Int16, UInt8) => primitive_to_primitive_dyn::<i16, u8>(array, to_type, options),
758        (Int16, UInt16) => primitive_to_primitive_dyn::<i16, u16>(array, to_type, options),
759        (Int16, UInt32) => primitive_to_primitive_dyn::<i16, u32>(array, to_type, options),
760        (Int16, UInt64) => primitive_to_primitive_dyn::<i16, u64>(array, to_type, options),
761        (Int16, Int8) => primitive_to_primitive_dyn::<i16, i8>(array, to_type, options),
762        (Int16, Int32) => primitive_to_primitive_dyn::<i16, i32>(array, to_type, as_options),
763        (Int16, Int64) => primitive_to_primitive_dyn::<i16, i64>(array, to_type, as_options),
764        #[cfg(feature = "dtype-i128")]
765        (Int16, Int128) => primitive_to_primitive_dyn::<i16, i128>(array, to_type, as_options),
766        (Int16, Float32) => primitive_to_primitive_dyn::<i16, f32>(array, to_type, as_options),
767        (Int16, Float64) => primitive_to_primitive_dyn::<i16, f64>(array, to_type, as_options),
768        (Int16, Decimal(p, s)) => integer_to_decimal_dyn::<i16>(array, *p, *s),
769
770        (Int32, UInt8) => primitive_to_primitive_dyn::<i32, u8>(array, to_type, options),
771        (Int32, UInt16) => primitive_to_primitive_dyn::<i32, u16>(array, to_type, options),
772        (Int32, UInt32) => primitive_to_primitive_dyn::<i32, u32>(array, to_type, options),
773        (Int32, UInt64) => primitive_to_primitive_dyn::<i32, u64>(array, to_type, options),
774        (Int32, Int8) => primitive_to_primitive_dyn::<i32, i8>(array, to_type, options),
775        (Int32, Int16) => primitive_to_primitive_dyn::<i32, i16>(array, to_type, options),
776        (Int32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, as_options),
777        #[cfg(feature = "dtype-i128")]
778        (Int32, Int128) => primitive_to_primitive_dyn::<i32, i128>(array, to_type, as_options),
779        (Int32, Float32) => primitive_to_primitive_dyn::<i32, f32>(array, to_type, as_options),
780        (Int32, Float64) => primitive_to_primitive_dyn::<i32, f64>(array, to_type, as_options),
781        (Int32, Decimal(p, s)) => integer_to_decimal_dyn::<i32>(array, *p, *s),
782
783        (Int64, UInt8) => primitive_to_primitive_dyn::<i64, u8>(array, to_type, options),
784        (Int64, UInt16) => primitive_to_primitive_dyn::<i64, u16>(array, to_type, options),
785        (Int64, UInt32) => primitive_to_primitive_dyn::<i64, u32>(array, to_type, options),
786        (Int64, UInt64) => primitive_to_primitive_dyn::<i64, u64>(array, to_type, options),
787        (Int64, Int8) => primitive_to_primitive_dyn::<i64, i8>(array, to_type, options),
788        (Int64, Int16) => primitive_to_primitive_dyn::<i64, i16>(array, to_type, options),
789        (Int64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
790        #[cfg(feature = "dtype-i128")]
791        (Int64, Int128) => primitive_to_primitive_dyn::<i64, i128>(array, to_type, options),
792        (Int64, Float32) => primitive_to_primitive_dyn::<i64, f32>(array, to_type, options),
793        (Int64, Float64) => primitive_to_primitive_dyn::<i64, f64>(array, to_type, as_options),
794        (Int64, Decimal(p, s)) => integer_to_decimal_dyn::<i64>(array, *p, *s),
795
796        #[cfg(feature = "dtype-i128")]
797        (Int128, UInt8) => primitive_to_primitive_dyn::<i128, u8>(array, to_type, options),
798        #[cfg(feature = "dtype-i128")]
799        (Int128, UInt16) => primitive_to_primitive_dyn::<i128, u16>(array, to_type, options),
800        #[cfg(feature = "dtype-i128")]
801        (Int128, UInt32) => primitive_to_primitive_dyn::<i128, u32>(array, to_type, options),
802        #[cfg(feature = "dtype-i128")]
803        (Int128, UInt64) => primitive_to_primitive_dyn::<i128, u64>(array, to_type, options),
804        #[cfg(feature = "dtype-i128")]
805        (Int128, Int8) => primitive_to_primitive_dyn::<i128, i8>(array, to_type, options),
806        #[cfg(feature = "dtype-i128")]
807        (Int128, Int16) => primitive_to_primitive_dyn::<i128, i16>(array, to_type, options),
808        #[cfg(feature = "dtype-i128")]
809        (Int128, Int32) => primitive_to_primitive_dyn::<i128, i32>(array, to_type, options),
810        #[cfg(feature = "dtype-i128")]
811        (Int128, Int64) => primitive_to_primitive_dyn::<i128, i64>(array, to_type, options),
812        #[cfg(feature = "dtype-i128")]
813        (Int128, Float32) => primitive_to_primitive_dyn::<i128, f32>(array, to_type, options),
814        #[cfg(feature = "dtype-i128")]
815        (Int128, Float64) => primitive_to_primitive_dyn::<i128, f64>(array, to_type, as_options),
816        #[cfg(feature = "dtype-i128")]
817        (Int128, Decimal(p, s)) => integer_to_decimal_dyn::<i128>(array, *p, *s),
818
819        (Float16, Float32) => {
820            let from = array.as_any().downcast_ref().unwrap();
821            Ok(f16_to_f32(from).boxed())
822        },
823
824        (Float32, UInt8) => primitive_to_primitive_dyn::<f32, u8>(array, to_type, options),
825        (Float32, UInt16) => primitive_to_primitive_dyn::<f32, u16>(array, to_type, options),
826        (Float32, UInt32) => primitive_to_primitive_dyn::<f32, u32>(array, to_type, options),
827        (Float32, UInt64) => primitive_to_primitive_dyn::<f32, u64>(array, to_type, options),
828        (Float32, Int8) => primitive_to_primitive_dyn::<f32, i8>(array, to_type, options),
829        (Float32, Int16) => primitive_to_primitive_dyn::<f32, i16>(array, to_type, options),
830        (Float32, Int32) => primitive_to_primitive_dyn::<f32, i32>(array, to_type, options),
831        (Float32, Int64) => primitive_to_primitive_dyn::<f32, i64>(array, to_type, options),
832        (Float32, Int128) => primitive_to_primitive_dyn::<f32, i128>(array, to_type, options),
833        (Float32, Float64) => primitive_to_primitive_dyn::<f32, f64>(array, to_type, as_options),
834        (Float32, Decimal(p, s)) => float_to_decimal_dyn::<f32>(array, *p, *s),
835
836        (Float64, UInt8) => primitive_to_primitive_dyn::<f64, u8>(array, to_type, options),
837        (Float64, UInt16) => primitive_to_primitive_dyn::<f64, u16>(array, to_type, options),
838        (Float64, UInt32) => primitive_to_primitive_dyn::<f64, u32>(array, to_type, options),
839        (Float64, UInt64) => primitive_to_primitive_dyn::<f64, u64>(array, to_type, options),
840        (Float64, Int8) => primitive_to_primitive_dyn::<f64, i8>(array, to_type, options),
841        (Float64, Int16) => primitive_to_primitive_dyn::<f64, i16>(array, to_type, options),
842        (Float64, Int32) => primitive_to_primitive_dyn::<f64, i32>(array, to_type, options),
843        (Float64, Int64) => primitive_to_primitive_dyn::<f64, i64>(array, to_type, options),
844        (Float64, Int128) => primitive_to_primitive_dyn::<f64, i128>(array, to_type, options),
845        (Float64, Float32) => primitive_to_primitive_dyn::<f64, f32>(array, to_type, options),
846        (Float64, Decimal(p, s)) => float_to_decimal_dyn::<f64>(array, *p, *s),
847
848        (Decimal(_, _), UInt8) => decimal_to_integer_dyn::<u8>(array),
849        (Decimal(_, _), UInt16) => decimal_to_integer_dyn::<u16>(array),
850        (Decimal(_, _), UInt32) => decimal_to_integer_dyn::<u32>(array),
851        (Decimal(_, _), UInt64) => decimal_to_integer_dyn::<u64>(array),
852        (Decimal(_, _), Int8) => decimal_to_integer_dyn::<i8>(array),
853        (Decimal(_, _), Int16) => decimal_to_integer_dyn::<i16>(array),
854        (Decimal(_, _), Int32) => decimal_to_integer_dyn::<i32>(array),
855        (Decimal(_, _), Int64) => decimal_to_integer_dyn::<i64>(array),
856        (Decimal(_, _), Int128) => decimal_to_integer_dyn::<i128>(array),
857        (Decimal(_, _), Float32) => decimal_to_float_dyn::<f32>(array),
858        (Decimal(_, _), Float64) => decimal_to_float_dyn::<f64>(array),
859        (Decimal(_, _), Decimal(to_p, to_s)) => decimal_to_decimal_dyn(array, *to_p, *to_s),
860        // end numeric casts
861
862        // temporal casts
863        (Int32, Date32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
864        (Int32, Time32(TimeUnit::Second)) => primitive_dyn!(array, int32_to_time32s),
865        (Int32, Time32(TimeUnit::Millisecond)) => primitive_dyn!(array, int32_to_time32ms),
866        // No support for microsecond/nanosecond with i32
867        (Date32, Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
868        (Date32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, options),
869        (Time32(_), Int32) => primitive_to_same_primitive_dyn::<i32>(array, to_type),
870        (Int64, Date64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
871        // No support for second/milliseconds with i64
872        (Int64, Time64(TimeUnit::Microsecond)) => primitive_dyn!(array, int64_to_time64us),
873        (Int64, Time64(TimeUnit::Nanosecond)) => primitive_dyn!(array, int64_to_time64ns),
874
875        (Date64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
876        (Date64, Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
877        (Time64(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
878        (Date32, Date64) => primitive_dyn!(array, date32_to_date64),
879        (Date64, Date32) => primitive_dyn!(array, date64_to_date32),
880        (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => {
881            primitive_dyn!(array, time32s_to_time32ms)
882        },
883        (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => {
884            primitive_dyn!(array, time32ms_to_time32s)
885        },
886        (Time32(from_unit), Time64(to_unit)) => {
887            primitive_dyn!(array, time32_to_time64, *from_unit, *to_unit)
888        },
889        (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => {
890            primitive_dyn!(array, time64us_to_time64ns)
891        },
892        (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => {
893            primitive_dyn!(array, time64ns_to_time64us)
894        },
895        (Time64(from_unit), Time32(to_unit)) => {
896            primitive_dyn!(array, time64_to_time32, *from_unit, *to_unit)
897        },
898        (Timestamp(_, _), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
899        (Int64, Timestamp(_, _)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
900        (Timestamp(from_unit, _), Timestamp(to_unit, tz)) => {
901            primitive_dyn!(array, timestamp_to_timestamp, *from_unit, *to_unit, tz)
902        },
903        (Timestamp(from_unit, _), Date32) => primitive_dyn!(array, timestamp_to_date32, *from_unit),
904        (Timestamp(from_unit, _), Date64) => primitive_dyn!(array, timestamp_to_date64, *from_unit),
905
906        (Int64, Duration(_)) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
907        (Duration(_), Int64) => primitive_to_same_primitive_dyn::<i64>(array, to_type),
908
909        // Not supported by Polars.
910        // (Interval(IntervalUnit::DayTime), Interval(IntervalUnit::MonthDayNano)) => {
911        //     primitive_dyn!(array, days_ms_to_months_days_ns)
912        // },
913        // (Interval(IntervalUnit::YearMonth), Interval(IntervalUnit::MonthDayNano)) => {
914        //     primitive_dyn!(array, months_to_months_days_ns)
915        // },
916        _ => polars_bail!(InvalidOperation:
917            "casting from {from_type:?} to {to_type:?} not supported",
918        ),
919    }
920}
921
922/// Attempts to encode an array into an `ArrayDictionary` with index
923/// type K and value (dictionary) type value_type
924///
925/// K is the key type
926fn cast_to_dictionary<K: DictionaryKey>(
927    array: &dyn Array,
928    dict_value_type: &ArrowDataType,
929    options: CastOptionsImpl,
930) -> PolarsResult<Box<dyn Array>> {
931    let array = cast(array, dict_value_type, options)?;
932    let array = array.as_ref();
933    match *dict_value_type {
934        ArrowDataType::Int8 => primitive_to_dictionary_dyn::<i8, K>(array),
935        ArrowDataType::Int16 => primitive_to_dictionary_dyn::<i16, K>(array),
936        ArrowDataType::Int32 => primitive_to_dictionary_dyn::<i32, K>(array),
937        ArrowDataType::Int64 => primitive_to_dictionary_dyn::<i64, K>(array),
938        ArrowDataType::UInt8 => primitive_to_dictionary_dyn::<u8, K>(array),
939        ArrowDataType::UInt16 => primitive_to_dictionary_dyn::<u16, K>(array),
940        ArrowDataType::UInt32 => primitive_to_dictionary_dyn::<u32, K>(array),
941        ArrowDataType::UInt64 => primitive_to_dictionary_dyn::<u64, K>(array),
942        ArrowDataType::BinaryView => {
943            binview_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
944                .map(|arr| arr.boxed())
945        },
946        ArrowDataType::Utf8View => {
947            utf8view_to_dictionary::<K>(array.as_any().downcast_ref().unwrap())
948                .map(|arr| arr.boxed())
949        },
950        ArrowDataType::LargeUtf8 => utf8_to_dictionary_dyn::<i64, K>(array),
951        ArrowDataType::LargeBinary => binary_to_dictionary_dyn::<i64, K>(array),
952        ArrowDataType::Time64(_) => primitive_to_dictionary_dyn::<i64, K>(array),
953        ArrowDataType::Timestamp(_, _) => primitive_to_dictionary_dyn::<i64, K>(array),
954        ArrowDataType::Date32 => primitive_to_dictionary_dyn::<i32, K>(array),
955        _ => polars_bail!(ComputeError:
956            "unsupported output type for dictionary packing: {dict_value_type:?}"
957        ),
958    }
959}
960
961fn from_to_binview(
962    array: &dyn Array,
963    from_type: &ArrowDataType,
964    to_type: &ArrowDataType,
965) -> PolarsResult<BinaryViewArray> {
966    use ArrowDataType::*;
967    let binview = match from_type {
968        UInt8 => primitive_to_binview_dyn::<u8>(array),
969        UInt16 => primitive_to_binview_dyn::<u16>(array),
970        UInt32 => primitive_to_binview_dyn::<u32>(array),
971        UInt64 => primitive_to_binview_dyn::<u64>(array),
972        Int8 => primitive_to_binview_dyn::<i8>(array),
973        Int16 => primitive_to_binview_dyn::<i16>(array),
974        Int32 => primitive_to_binview_dyn::<i32>(array),
975        Int64 => primitive_to_binview_dyn::<i64>(array),
976        Int128 => primitive_to_binview_dyn::<i128>(array),
977        Float32 => primitive_to_binview_dyn::<f32>(array),
978        Float64 => primitive_to_binview_dyn::<f64>(array),
979        Binary => binary_to_binview::<i32>(array.as_any().downcast_ref().unwrap()),
980        FixedSizeBinary(_) => fixed_size_binary_to_binview(array.as_any().downcast_ref().unwrap()),
981        LargeBinary => binary_to_binview::<i64>(array.as_any().downcast_ref().unwrap()),
982        _ => polars_bail!(InvalidOperation:
983            "casting from {from_type:?} to {to_type:?} not supported",
984        ),
985    };
986    Ok(binview)
987}
988
989#[cfg(test)]
990mod tests {
991    use arrow::offset::OffsetsBuffer;
992    use polars_error::PolarsError;
993
994    use super::*;
995
996    /// When cfg(test), offsets for ``View``s generated by
997    /// cast_list_uint8_to_binary() are limited to max value of 3, so buffers
998    /// need to be split aggressively.
999    #[test]
1000    fn cast_list_uint8_to_binary_across_buffer_max_size() {
1001        let dtype =
1002            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1003        let values = PrimitiveArray::from_slice((0u8..20).collect::<Vec<_>>()).boxed();
1004        let list_u8 = ListArray::try_new(
1005            dtype,
1006            unsafe { OffsetsBuffer::new_unchecked(vec![0, 13, 18, 20].into()) },
1007            values,
1008            None,
1009        )
1010        .unwrap();
1011
1012        let binary = cast(
1013            &list_u8,
1014            &ArrowDataType::BinaryView,
1015            CastOptionsImpl::default(),
1016        )
1017        .unwrap();
1018        let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1019        assert_eq!(
1020            binary_array
1021                .values_iter()
1022                .map(|s| s.to_vec())
1023                .collect::<Vec<Vec<u8>>>(),
1024            vec![
1025                vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
1026                vec![13, 14, 15, 16, 17],
1027                vec![18, 19]
1028            ]
1029        );
1030        // max offset of 15 so we need to split:
1031        assert_eq!(
1032            binary_array
1033                .data_buffers()
1034                .iter()
1035                .map(|buf| buf.len())
1036                .collect::<Vec<_>>(),
1037            vec![13, 7]
1038        );
1039    }
1040
1041    /// Arrow spec requires views to fit in a single buffer. When cfg(test),
1042    /// buffers generated by cast_list_uint8_to_binary are of size 15 or
1043    /// smaller, so a list of size 16 should cause an error.
1044    #[test]
1045    fn cast_list_uint8_to_binary_errors_too_large_list() {
1046        let values = PrimitiveArray::from_slice(vec![0u8; 16]);
1047        let dtype =
1048            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1049        let list_u8 = ListArray::new(
1050            dtype,
1051            OffsetsBuffer::one_with_length(16),
1052            values.boxed(),
1053            None,
1054        );
1055
1056        let err = cast(
1057            &list_u8,
1058            &ArrowDataType::BinaryView,
1059            CastOptionsImpl::default(),
1060        )
1061        .unwrap_err();
1062        assert!(matches!(
1063            err,
1064            PolarsError::InvalidOperation(msg)
1065                if msg.as_ref() == "when casting to BinaryView, list lengths must be <= 15"
1066        ));
1067    }
1068
1069    /// When all views are <=12, cast_list_uint8_to_binary drops buffers in the
1070    /// result because all views are inline.
1071    #[test]
1072    fn cast_list_uint8_to_binary_drops_small_buffers() {
1073        let values = PrimitiveArray::from_slice(vec![10u8; 12]);
1074        let dtype =
1075            ArrowDataType::List(Box::new(Field::new("".into(), ArrowDataType::UInt8, true)));
1076        let list_u8 = ListArray::new(
1077            dtype,
1078            OffsetsBuffer::one_with_length(12),
1079            values.boxed(),
1080            None,
1081        );
1082        let binary = cast(
1083            &list_u8,
1084            &ArrowDataType::BinaryView,
1085            CastOptionsImpl::default(),
1086        )
1087        .unwrap();
1088        let binary_array: &BinaryViewArray = binary.as_ref().as_any().downcast_ref().unwrap();
1089        assert!(binary_array.data_buffers().is_empty());
1090        assert_eq!(
1091            binary_array
1092                .values_iter()
1093                .map(|s| s.to_vec())
1094                .collect::<Vec<Vec<u8>>>(),
1095            vec![vec![10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10],]
1096        );
1097    }
1098}