polars_arrow/datatypes/
field.rs

1use std::sync::Arc;
2
3use polars_utils::pl_str::PlSmallStr;
4#[cfg(feature = "serde")]
5use serde::{Deserialize, Serialize};
6
7use super::{ArrowDataType, Metadata};
8
9// These two have the same encoding, but because older versions of Polars
10// were unable to read non-u32-key arrow dictionaries while _PL_ENUM_VALUES
11// is set we switched to a new version.
12pub static DTYPE_ENUM_VALUES_LEGACY: &str = "_PL_ENUM_VALUES";
13pub static DTYPE_ENUM_VALUES_NEW: &str = "_PL_ENUM_VALUES2";
14
15// These have different encodings.
16pub static DTYPE_CATEGORICAL_LEGACY: &str = "_PL_CATEGORICAL";
17pub static DTYPE_CATEGORICAL_NEW: &str = "_PL_CATEGORICAL2";
18
19/// Represents Arrow's metadata of a "column".
20///
21/// A [`Field`] is the closest representation of the traditional "column": a logical type
22/// ([`ArrowDataType`]) with a name and nullability.
23/// A Field has optional [`Metadata`] that can be used to annotate the field with custom metadata.
24///
25/// Almost all IO in this crate uses [`Field`] to represent logical information about the data
26/// to be serialized.
27#[derive(Debug, Clone, Eq, PartialEq, Hash, Default)]
28#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
29#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
30pub struct Field {
31    /// Its name
32    pub name: PlSmallStr,
33    /// Its logical [`ArrowDataType`]
34    pub dtype: ArrowDataType,
35    /// Its nullability
36    pub is_nullable: bool,
37    /// Additional custom (opaque) metadata.
38    pub metadata: Option<Arc<Metadata>>,
39}
40
41/// Support for `ArrowSchema::from_iter([field, ..])`
42impl From<Field> for (PlSmallStr, Field) {
43    fn from(value: Field) -> Self {
44        (value.name.clone(), value)
45    }
46}
47
48impl Field {
49    /// Creates a new [`Field`].
50    pub fn new(name: PlSmallStr, dtype: ArrowDataType, is_nullable: bool) -> Self {
51        Field {
52            name,
53            dtype,
54            is_nullable,
55            metadata: Default::default(),
56        }
57    }
58
59    /// Creates a new [`Field`] with metadata.
60    #[inline]
61    pub fn with_metadata(self, metadata: Metadata) -> Self {
62        if metadata.is_empty() {
63            return self;
64        }
65        Self {
66            name: self.name,
67            dtype: self.dtype,
68            is_nullable: self.is_nullable,
69            metadata: Some(Arc::new(metadata)),
70        }
71    }
72
73    /// Returns the [`Field`]'s [`ArrowDataType`].
74    #[inline]
75    pub fn dtype(&self) -> &ArrowDataType {
76        &self.dtype
77    }
78
79    pub fn is_enum(&self) -> bool {
80        if let Some(md) = &self.metadata {
81            md.get(DTYPE_ENUM_VALUES_LEGACY).is_some() || md.get(DTYPE_ENUM_VALUES_NEW).is_some()
82        } else {
83            false
84        }
85    }
86
87    pub fn is_categorical(&self) -> bool {
88        if let Some(md) = &self.metadata {
89            md.get(DTYPE_CATEGORICAL_LEGACY).is_some() || md.get(DTYPE_CATEGORICAL_NEW).is_some()
90        } else {
91            false
92        }
93    }
94
95    pub fn map_dtype(mut self, f: impl FnOnce(ArrowDataType) -> ArrowDataType) -> Self {
96        self.dtype = f(self.dtype);
97        self
98    }
99
100    pub fn map_dtype_mut(&mut self, f: impl FnOnce(&mut ArrowDataType)) {
101        f(&mut self.dtype);
102    }
103
104    pub fn with_dtype(&self, dtype: ArrowDataType) -> Self {
105        let mut field = self.clone();
106        field.dtype = dtype;
107        field
108    }
109}