polars_core/series/
comparison.rs

1//! Comparison operations on Series.
2
3use polars_error::feature_gated;
4
5use crate::prelude::*;
6use crate::series::arithmetic::coerce_lhs_rhs;
7use crate::series::nulls::replace_non_null;
8
9macro_rules! impl_eq_compare {
10    ($self:expr, $rhs:expr, $method:ident) => {{
11        use DataType::*;
12        let (lhs, rhs) = ($self, $rhs);
13        validate_types(lhs.dtype(), rhs.dtype())?;
14
15        polars_ensure!(
16            lhs.len() == rhs.len() ||
17
18            // Broadcast
19            lhs.len() == 1 ||
20            rhs.len() == 1,
21            ShapeMismatch: "could not compare between two series of different length ({} != {})",
22            lhs.len(),
23            rhs.len()
24        );
25
26        #[cfg(feature = "dtype-categorical")]
27        match (lhs.dtype(), rhs.dtype()) {
28            (Categorical(lcats, _), Categorical(rcats, _)) => {
29                ensure_same_categories(lcats, rcats)?;
30                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
31                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
32                })
33            },
34            (Enum(lfcats, _), Enum(rfcats, _)) => {
35                ensure_same_frozen_categories(lfcats, rfcats)?;
36                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
37                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
38                })
39            },
40            (Categorical(_, _) | Enum(_, _), String) => {
41                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
42                    Ok(lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap()))
43                })
44            },
45            (String, Categorical(_, _) | Enum(_, _)) => {
46                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
47                    Ok(rhs.cat::<$C>().unwrap().$method(lhs.str().unwrap()))
48                })
49            },
50            _ => (),
51        };
52
53        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs)
54            .map_err(|_| polars_err!(
55                    SchemaMismatch: "could not evaluate comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
56                    lhs.name(), lhs.dtype(), rhs.name(), rhs.dtype()
57            ))?;
58        let lhs = lhs.to_physical_repr();
59        let rhs = rhs.to_physical_repr();
60        let mut out = match lhs.dtype() {
61            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
62            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
63            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
64            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
65            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
66            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
67            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
68            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
69            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
70            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
71            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
72            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
73            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
74            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
75            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
76            List(_) => lhs.list().unwrap().$method(rhs.list().unwrap()),
77            #[cfg(feature = "dtype-array")]
78            Array(_, _) => lhs.array().unwrap().$method(rhs.array().unwrap()),
79            #[cfg(feature = "dtype-struct")]
80            Struct(_) => lhs.struct_().unwrap().$method(rhs.struct_().unwrap()),
81
82            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
83        };
84        out.rename(lhs.name().clone());
85        PolarsResult::Ok(out)
86    }};
87}
88
89macro_rules! bail_invalid_ineq {
90    ($lhs:expr, $rhs:expr, $op:literal) => {
91        polars_bail!(
92            InvalidOperation: "cannot perform '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
93            $op,
94            $lhs.name(), $lhs.dtype(),
95            $rhs.name(), $rhs.dtype(),
96        )
97    };
98}
99
100macro_rules! impl_ineq_compare {
101    ($self:expr, $rhs:expr, $method:ident, $op:literal, $rev_method:ident) => {{
102        use DataType::*;
103        let (lhs, rhs) = ($self, $rhs);
104        validate_types(lhs.dtype(), rhs.dtype())?;
105
106        polars_ensure!(
107            lhs.len() == rhs.len() ||
108
109            // Broadcast
110            lhs.len() == 1 ||
111            rhs.len() == 1,
112            ShapeMismatch:
113                "could not perform '{}' comparison between series '{}' of length: {} and series '{}' of length: {}, because they have different lengths",
114            $op,
115            lhs.name(), lhs.len(),
116            rhs.name(), rhs.len()
117        );
118
119        #[cfg(feature = "dtype-categorical")]
120        match (lhs.dtype(), rhs.dtype()) {
121            (Categorical(lcats, _), Categorical(rcats, _)) => {
122                ensure_same_categories(lcats, rcats)?;
123                return with_match_categorical_physical_type!(lcats.physical(), |$C| {
124                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
125                })
126            },
127            (Enum(lfcats, _), Enum(rfcats, _)) => {
128                ensure_same_frozen_categories(lfcats, rfcats)?;
129                return with_match_categorical_physical_type!(lfcats.physical(), |$C| {
130                    lhs.cat::<$C>().unwrap().$method(rhs.cat::<$C>().unwrap())
131                })
132            },
133            (Categorical(_, _) | Enum(_, _), String) => {
134                return with_match_categorical_physical_type!(lhs.dtype().cat_physical().unwrap(), |$C| {
135                    lhs.cat::<$C>().unwrap().$method(rhs.str().unwrap())
136                })
137            },
138            (String, Categorical(_, _) | Enum(_, _)) => {
139                return with_match_categorical_physical_type!(rhs.dtype().cat_physical().unwrap(), |$C| {
140                    // We use the reverse method as string <-> enum comparisons are only implemented one-way.
141                    rhs.cat::<$C>().unwrap().$rev_method(lhs.str().unwrap())
142                })
143            },
144            _ => (),
145        };
146
147        let (lhs, rhs) = coerce_lhs_rhs(lhs, rhs).map_err(|_|
148            polars_err!(
149                SchemaMismatch: "could not evaluate '{}' comparison between series '{}' of dtype: {} and series '{}' of dtype: {}",
150                $op,
151                lhs.name(), lhs.dtype(),
152                rhs.name(), rhs.dtype()
153            )
154        )?;
155        let lhs = lhs.to_physical_repr();
156        let rhs = rhs.to_physical_repr();
157        let mut out = match lhs.dtype() {
158            Null => lhs.null().unwrap().$method(rhs.null().unwrap()),
159            Boolean => lhs.bool().unwrap().$method(rhs.bool().unwrap()),
160            String => lhs.str().unwrap().$method(rhs.str().unwrap()),
161            Binary => lhs.binary().unwrap().$method(rhs.binary().unwrap()),
162            UInt8 => feature_gated!("dtype-u8", lhs.u8().unwrap().$method(rhs.u8().unwrap())),
163            UInt16 => feature_gated!("dtype-u16", lhs.u16().unwrap().$method(rhs.u16().unwrap())),
164            UInt32 => lhs.u32().unwrap().$method(rhs.u32().unwrap()),
165            UInt64 => lhs.u64().unwrap().$method(rhs.u64().unwrap()),
166            Int8 => feature_gated!("dtype-i8", lhs.i8().unwrap().$method(rhs.i8().unwrap())),
167            Int16 => feature_gated!("dtype-i16", lhs.i16().unwrap().$method(rhs.i16().unwrap())),
168            Int32 => lhs.i32().unwrap().$method(rhs.i32().unwrap()),
169            Int64 => lhs.i64().unwrap().$method(rhs.i64().unwrap()),
170            Int128 => feature_gated!("dtype-i128", lhs.i128().unwrap().$method(rhs.i128().unwrap())),
171            Float32 => lhs.f32().unwrap().$method(rhs.f32().unwrap()),
172            Float64 => lhs.f64().unwrap().$method(rhs.f64().unwrap()),
173            List(_) => bail_invalid_ineq!(lhs, rhs, $op),
174            #[cfg(feature = "dtype-array")]
175            Array(_, _) => bail_invalid_ineq!(lhs, rhs, $op),
176            #[cfg(feature = "dtype-struct")]
177            Struct(_) => bail_invalid_ineq!(lhs, rhs, $op),
178
179            dt => polars_bail!(InvalidOperation: "could not apply comparison on series of dtype '{}; operand names: '{}', '{}'", dt, lhs.name(), rhs.name()),
180        };
181        out.rename(lhs.name().clone());
182        PolarsResult::Ok(out)
183    }};
184}
185
186fn validate_types(left: &DataType, right: &DataType) -> PolarsResult<()> {
187    use DataType::*;
188
189    match (left, right) {
190        (String, dt) | (dt, String) if dt.is_primitive_numeric() => {
191            polars_bail!(ComputeError: "cannot compare string with numeric type ({})", dt)
192        },
193        #[cfg(feature = "dtype-categorical")]
194        (Categorical(_, _) | Enum(_, _), dt) | (dt, Categorical(_, _) | Enum(_, _))
195            if !(dt.is_categorical() | dt.is_string() | dt.is_enum()) =>
196        {
197            polars_bail!(ComputeError: "cannot compare categorical with {}", dt);
198        },
199        _ => (),
200    };
201    Ok(())
202}
203
204impl ChunkCompareEq<&Series> for Series {
205    type Item = PolarsResult<BooleanChunked>;
206
207    /// Create a boolean mask by checking for equality.
208    fn equal(&self, rhs: &Series) -> Self::Item {
209        impl_eq_compare!(self, rhs, equal)
210    }
211
212    /// Create a boolean mask by checking for equality.
213    fn equal_missing(&self, rhs: &Series) -> Self::Item {
214        impl_eq_compare!(self, rhs, equal_missing)
215    }
216
217    /// Create a boolean mask by checking for inequality.
218    fn not_equal(&self, rhs: &Series) -> Self::Item {
219        impl_eq_compare!(self, rhs, not_equal)
220    }
221
222    /// Create a boolean mask by checking for inequality.
223    fn not_equal_missing(&self, rhs: &Series) -> Self::Item {
224        impl_eq_compare!(self, rhs, not_equal_missing)
225    }
226}
227
228impl ChunkCompareIneq<&Series> for Series {
229    type Item = PolarsResult<BooleanChunked>;
230
231    /// Create a boolean mask by checking if self > rhs.
232    fn gt(&self, rhs: &Series) -> Self::Item {
233        impl_ineq_compare!(self, rhs, gt, ">", lt)
234    }
235
236    /// Create a boolean mask by checking if self >= rhs.
237    fn gt_eq(&self, rhs: &Series) -> Self::Item {
238        impl_ineq_compare!(self, rhs, gt_eq, ">=", lt_eq)
239    }
240
241    /// Create a boolean mask by checking if self < rhs.
242    fn lt(&self, rhs: &Series) -> Self::Item {
243        impl_ineq_compare!(self, rhs, lt, "<", gt)
244    }
245
246    /// Create a boolean mask by checking if self <= rhs.
247    fn lt_eq(&self, rhs: &Series) -> Self::Item {
248        impl_ineq_compare!(self, rhs, lt_eq, "<=", gt_eq)
249    }
250}
251
252impl<Rhs> ChunkCompareEq<Rhs> for Series
253where
254    Rhs: NumericNative,
255{
256    type Item = PolarsResult<BooleanChunked>;
257
258    fn equal(&self, rhs: Rhs) -> Self::Item {
259        validate_types(self.dtype(), &DataType::Int8)?;
260        let s = self.to_physical_repr();
261        Ok(apply_method_physical_numeric!(&s, equal, rhs))
262    }
263
264    fn equal_missing(&self, rhs: Rhs) -> Self::Item {
265        validate_types(self.dtype(), &DataType::Int8)?;
266        let s = self.to_physical_repr();
267        Ok(apply_method_physical_numeric!(&s, equal_missing, rhs))
268    }
269
270    fn not_equal(&self, rhs: Rhs) -> Self::Item {
271        validate_types(self.dtype(), &DataType::Int8)?;
272        let s = self.to_physical_repr();
273        Ok(apply_method_physical_numeric!(&s, not_equal, rhs))
274    }
275
276    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item {
277        validate_types(self.dtype(), &DataType::Int8)?;
278        let s = self.to_physical_repr();
279        Ok(apply_method_physical_numeric!(&s, not_equal_missing, rhs))
280    }
281}
282
283impl<Rhs> ChunkCompareIneq<Rhs> for Series
284where
285    Rhs: NumericNative,
286{
287    type Item = PolarsResult<BooleanChunked>;
288
289    fn gt(&self, rhs: Rhs) -> Self::Item {
290        validate_types(self.dtype(), &DataType::Int8)?;
291        let s = self.to_physical_repr();
292        Ok(apply_method_physical_numeric!(&s, gt, rhs))
293    }
294
295    fn gt_eq(&self, rhs: Rhs) -> Self::Item {
296        validate_types(self.dtype(), &DataType::Int8)?;
297        let s = self.to_physical_repr();
298        Ok(apply_method_physical_numeric!(&s, gt_eq, rhs))
299    }
300
301    fn lt(&self, rhs: Rhs) -> Self::Item {
302        validate_types(self.dtype(), &DataType::Int8)?;
303        let s = self.to_physical_repr();
304        Ok(apply_method_physical_numeric!(&s, lt, rhs))
305    }
306
307    fn lt_eq(&self, rhs: Rhs) -> Self::Item {
308        validate_types(self.dtype(), &DataType::Int8)?;
309        let s = self.to_physical_repr();
310        Ok(apply_method_physical_numeric!(&s, lt_eq, rhs))
311    }
312}
313
314impl ChunkCompareEq<&str> for Series {
315    type Item = PolarsResult<BooleanChunked>;
316
317    fn equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
318        validate_types(self.dtype(), &DataType::String)?;
319        match self.dtype() {
320            DataType::String => Ok(self.str().unwrap().equal(rhs)),
321            #[cfg(feature = "dtype-categorical")]
322            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
323                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
324                    self.cat::<$C>().unwrap().equal(rhs)
325                }),
326            ),
327            _ => Ok(BooleanChunked::full(self.name().clone(), false, self.len())),
328        }
329    }
330
331    fn equal_missing(&self, rhs: &str) -> Self::Item {
332        validate_types(self.dtype(), &DataType::String)?;
333        match self.dtype() {
334            DataType::String => Ok(self.str().unwrap().equal_missing(rhs)),
335            #[cfg(feature = "dtype-categorical")]
336            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
337                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
338                    self.cat::<$C>().unwrap().equal_missing(rhs)
339                }),
340            ),
341            _ => Ok(replace_non_null(
342                self.name().clone(),
343                self.0.chunks(),
344                false,
345            )),
346        }
347    }
348
349    fn not_equal(&self, rhs: &str) -> PolarsResult<BooleanChunked> {
350        validate_types(self.dtype(), &DataType::String)?;
351        match self.dtype() {
352            DataType::String => Ok(self.str().unwrap().not_equal(rhs)),
353            #[cfg(feature = "dtype-categorical")]
354            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
355                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
356                    self.cat::<$C>().unwrap().not_equal(rhs)
357                }),
358            ),
359            _ => Ok(BooleanChunked::full(self.name().clone(), true, self.len())),
360        }
361    }
362
363    fn not_equal_missing(&self, rhs: &str) -> Self::Item {
364        validate_types(self.dtype(), &DataType::String)?;
365        match self.dtype() {
366            DataType::String => Ok(self.str().unwrap().not_equal_missing(rhs)),
367            #[cfg(feature = "dtype-categorical")]
368            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
369                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
370                    self.cat::<$C>().unwrap().not_equal_missing(rhs)
371                }),
372            ),
373            _ => Ok(replace_non_null(self.name().clone(), self.0.chunks(), true)),
374        }
375    }
376}
377
378impl ChunkCompareIneq<&str> for Series {
379    type Item = PolarsResult<BooleanChunked>;
380
381    fn gt(&self, rhs: &str) -> Self::Item {
382        validate_types(self.dtype(), &DataType::String)?;
383        match self.dtype() {
384            DataType::String => Ok(self.str().unwrap().gt(rhs)),
385            #[cfg(feature = "dtype-categorical")]
386            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
387                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
388                    self.cat::<$C>().unwrap().gt(rhs)
389                }),
390            ),
391            _ => polars_bail!(
392                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
393            ),
394        }
395    }
396
397    fn gt_eq(&self, rhs: &str) -> Self::Item {
398        validate_types(self.dtype(), &DataType::String)?;
399        match self.dtype() {
400            DataType::String => Ok(self.str().unwrap().gt_eq(rhs)),
401            #[cfg(feature = "dtype-categorical")]
402            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
403                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
404                    self.cat::<$C>().unwrap().gt_eq(rhs)
405                }),
406            ),
407            _ => polars_bail!(
408                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
409            ),
410        }
411    }
412
413    fn lt(&self, rhs: &str) -> Self::Item {
414        validate_types(self.dtype(), &DataType::String)?;
415        match self.dtype() {
416            DataType::String => Ok(self.str().unwrap().lt(rhs)),
417            #[cfg(feature = "dtype-categorical")]
418            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
419                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
420                    self.cat::<$C>().unwrap().lt(rhs)
421                }),
422            ),
423            _ => polars_bail!(
424                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
425            ),
426        }
427    }
428
429    fn lt_eq(&self, rhs: &str) -> Self::Item {
430        validate_types(self.dtype(), &DataType::String)?;
431        match self.dtype() {
432            DataType::String => Ok(self.str().unwrap().lt_eq(rhs)),
433            #[cfg(feature = "dtype-categorical")]
434            DataType::Categorical(_, _) | DataType::Enum(_, _) => Ok(
435                with_match_categorical_physical_type!(self.dtype().cat_physical().unwrap(), |$C| {
436                    self.cat::<$C>().unwrap().lt_eq(rhs)
437                }),
438            ),
439            _ => polars_bail!(
440                ComputeError: "cannot compare str value to series of type {}", self.dtype(),
441            ),
442        }
443    }
444}