iceberg/expr/
term.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Term definition.
19
20use std::fmt::{Display, Formatter};
21
22use fnv::FnvHashSet;
23use serde::{Deserialize, Serialize};
24
25use crate::expr::accessor::{StructAccessor, StructAccessorRef};
26use crate::expr::{
27    BinaryExpression, Bind, Predicate, PredicateOperator, SetExpression, UnaryExpression,
28};
29use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
30use crate::{Error, ErrorKind};
31
32/// Unbound term before binding to a schema.
33pub type Term = Reference;
34
35/// A named reference in an unbound expression.
36/// For example, `a` in `a > 10`.
37#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
38pub struct Reference {
39    name: String,
40}
41
42impl Reference {
43    /// Create a new unbound reference.
44    pub fn new(name: impl Into<String>) -> Self {
45        Self { name: name.into() }
46    }
47
48    /// Return the name of this reference.
49    pub fn name(&self) -> &str {
50        &self.name
51    }
52}
53
54impl Reference {
55    /// Creates an less than expression. For example, `a < 10`.
56    ///
57    /// # Example
58    ///
59    /// ```rust
60    /// use iceberg::expr::Reference;
61    /// use iceberg::spec::Datum;
62    /// let expr = Reference::new("a").less_than(Datum::long(10));
63    ///
64    /// assert_eq!(&format!("{expr}"), "a < 10");
65    /// ```
66    pub fn less_than(self, datum: Datum) -> Predicate {
67        Predicate::Binary(BinaryExpression::new(
68            PredicateOperator::LessThan,
69            self,
70            datum,
71        ))
72    }
73
74    /// Creates an less than or equal to expression. For example, `a <= 10`.
75    ///
76    /// # Example
77    ///
78    /// ```rust
79    /// use iceberg::expr::Reference;
80    /// use iceberg::spec::Datum;
81    /// let expr = Reference::new("a").less_than_or_equal_to(Datum::long(10));
82    ///
83    /// assert_eq!(&format!("{expr}"), "a <= 10");
84    /// ```
85    pub fn less_than_or_equal_to(self, datum: Datum) -> Predicate {
86        Predicate::Binary(BinaryExpression::new(
87            PredicateOperator::LessThanOrEq,
88            self,
89            datum,
90        ))
91    }
92
93    /// Creates an greater than expression. For example, `a > 10`.
94    ///
95    /// # Example
96    ///
97    /// ```rust
98    /// use iceberg::expr::Reference;
99    /// use iceberg::spec::Datum;
100    /// let expr = Reference::new("a").greater_than(Datum::long(10));
101    ///
102    /// assert_eq!(&format!("{expr}"), "a > 10");
103    /// ```
104    pub fn greater_than(self, datum: Datum) -> Predicate {
105        Predicate::Binary(BinaryExpression::new(
106            PredicateOperator::GreaterThan,
107            self,
108            datum,
109        ))
110    }
111
112    /// Creates a greater-than-or-equal-to than expression. For example, `a >= 10`.
113    ///
114    /// # Example
115    ///
116    /// ```rust
117    /// use iceberg::expr::Reference;
118    /// use iceberg::spec::Datum;
119    /// let expr = Reference::new("a").greater_than_or_equal_to(Datum::long(10));
120    ///
121    /// assert_eq!(&format!("{expr}"), "a >= 10");
122    /// ```
123    pub fn greater_than_or_equal_to(self, datum: Datum) -> Predicate {
124        Predicate::Binary(BinaryExpression::new(
125            PredicateOperator::GreaterThanOrEq,
126            self,
127            datum,
128        ))
129    }
130
131    /// Creates an equal-to expression. For example, `a = 10`.
132    ///
133    /// # Example
134    ///
135    /// ```rust
136    /// use iceberg::expr::Reference;
137    /// use iceberg::spec::Datum;
138    /// let expr = Reference::new("a").equal_to(Datum::long(10));
139    ///
140    /// assert_eq!(&format!("{expr}"), "a = 10");
141    /// ```
142    pub fn equal_to(self, datum: Datum) -> Predicate {
143        Predicate::Binary(BinaryExpression::new(PredicateOperator::Eq, self, datum))
144    }
145
146    /// Creates a not equal-to expression. For example, `a!= 10`.
147    ///
148    /// # Example
149    ///
150    /// ```rust
151    /// use iceberg::expr::Reference;
152    /// use iceberg::spec::Datum;
153    /// let expr = Reference::new("a").not_equal_to(Datum::long(10));
154    ///
155    /// assert_eq!(&format!("{expr}"), "a != 10");
156    /// ```
157    pub fn not_equal_to(self, datum: Datum) -> Predicate {
158        Predicate::Binary(BinaryExpression::new(PredicateOperator::NotEq, self, datum))
159    }
160
161    /// Creates a start-with expression. For example, `a STARTS WITH "foo"`.
162    ///
163    /// # Example
164    ///
165    /// ```rust
166    /// use iceberg::expr::Reference;
167    /// use iceberg::spec::Datum;
168    /// let expr = Reference::new("a").starts_with(Datum::string("foo"));
169    ///
170    /// assert_eq!(&format!("{expr}"), r#"a STARTS WITH "foo""#);
171    /// ```
172    pub fn starts_with(self, datum: Datum) -> Predicate {
173        Predicate::Binary(BinaryExpression::new(
174            PredicateOperator::StartsWith,
175            self,
176            datum,
177        ))
178    }
179
180    /// Creates a not start-with expression. For example, `a NOT STARTS WITH 'foo'`.
181    ///
182    /// # Example
183    ///
184    /// ```rust
185    /// use iceberg::expr::Reference;
186    /// use iceberg::spec::Datum;
187    ///
188    /// let expr = Reference::new("a").not_starts_with(Datum::string("foo"));
189    ///
190    /// assert_eq!(&format!("{expr}"), r#"a NOT STARTS WITH "foo""#);
191    /// ```
192    pub fn not_starts_with(self, datum: Datum) -> Predicate {
193        Predicate::Binary(BinaryExpression::new(
194            PredicateOperator::NotStartsWith,
195            self,
196            datum,
197        ))
198    }
199
200    /// Creates an is-nan expression. For example, `a IS NAN`.
201    ///
202    /// # Example
203    ///
204    /// ```rust
205    /// use iceberg::expr::Reference;
206    /// use iceberg::spec::Datum;
207    /// let expr = Reference::new("a").is_nan();
208    ///
209    /// assert_eq!(&format!("{expr}"), "a IS NAN");
210    /// ```
211    pub fn is_nan(self) -> Predicate {
212        Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNan, self))
213    }
214
215    /// Creates an is-not-nan expression. For example, `a IS NOT NAN`.
216    ///
217    /// # Example
218    ///
219    /// ```rust
220    /// use iceberg::expr::Reference;
221    /// use iceberg::spec::Datum;
222    /// let expr = Reference::new("a").is_not_nan();
223    ///
224    /// assert_eq!(&format!("{expr}"), "a IS NOT NAN");
225    /// ```
226    pub fn is_not_nan(self) -> Predicate {
227        Predicate::Unary(UnaryExpression::new(PredicateOperator::NotNan, self))
228    }
229
230    /// Creates an is-null expression. For example, `a IS NULL`.
231    ///
232    /// # Example
233    ///
234    /// ```rust
235    /// use iceberg::expr::Reference;
236    /// use iceberg::spec::Datum;
237    /// let expr = Reference::new("a").is_null();
238    ///
239    /// assert_eq!(&format!("{expr}"), "a IS NULL");
240    /// ```
241    pub fn is_null(self) -> Predicate {
242        Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self))
243    }
244
245    /// Creates an is-not-null expression. For example, `a IS NOT NULL`.
246    ///
247    /// # Example
248    ///
249    /// ```rust
250    /// use iceberg::expr::Reference;
251    /// use iceberg::spec::Datum;
252    /// let expr = Reference::new("a").is_not_null();
253    ///
254    /// assert_eq!(&format!("{expr}"), "a IS NOT NULL");
255    /// ```
256    pub fn is_not_null(self) -> Predicate {
257        Predicate::Unary(UnaryExpression::new(PredicateOperator::NotNull, self))
258    }
259
260    /// Creates an is-in expression. For example, `a IN (5, 6)`.
261    ///
262    /// # Example
263    ///
264    /// ```rust
265    /// use fnv::FnvHashSet;
266    /// use iceberg::expr::Reference;
267    /// use iceberg::spec::Datum;
268    /// let expr = Reference::new("a").is_in([Datum::long(5), Datum::long(6)]);
269    ///
270    /// let as_string = format!("{expr}");
271    /// assert!(&as_string == "a IN (5, 6)" || &as_string == "a IN (6, 5)");
272    /// ```
273    pub fn is_in(self, literals: impl IntoIterator<Item = Datum>) -> Predicate {
274        Predicate::Set(SetExpression::new(
275            PredicateOperator::In,
276            self,
277            FnvHashSet::from_iter(literals),
278        ))
279    }
280
281    /// Creates an is-not-in expression. For example, `a NOT IN (5, 6)`.
282    ///
283    /// # Example
284    ///
285    /// ```rust
286    /// use fnv::FnvHashSet;
287    /// use iceberg::expr::Reference;
288    /// use iceberg::spec::Datum;
289    /// let expr = Reference::new("a").is_not_in([Datum::long(5), Datum::long(6)]);
290    ///
291    /// let as_string = format!("{expr}");
292    /// assert!(&as_string == "a NOT IN (5, 6)" || &as_string == "a NOT IN (6, 5)");
293    /// ```
294    pub fn is_not_in(self, literals: impl IntoIterator<Item = Datum>) -> Predicate {
295        Predicate::Set(SetExpression::new(
296            PredicateOperator::NotIn,
297            self,
298            FnvHashSet::from_iter(literals),
299        ))
300    }
301}
302
303impl Display for Reference {
304    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
305        write!(f, "{}", self.name)
306    }
307}
308
309impl Bind for Reference {
310    type Bound = BoundReference;
311
312    fn bind(&self, schema: SchemaRef, case_sensitive: bool) -> crate::Result<Self::Bound> {
313        let field = if case_sensitive {
314            schema.field_by_name(&self.name)
315        } else {
316            schema.field_by_name_case_insensitive(&self.name)
317        };
318
319        let field = field.ok_or_else(|| {
320            Error::new(
321                ErrorKind::DataInvalid,
322                format!("Field {} not found in schema", self.name),
323            )
324        })?;
325
326        let accessor = schema.accessor_by_field_id(field.id).ok_or_else(|| {
327            Error::new(
328                ErrorKind::DataInvalid,
329                format!("Accessor for Field {} not found", self.name),
330            )
331        })?;
332
333        Ok(BoundReference::new(
334            self.name.clone(),
335            field.clone(),
336            accessor.clone(),
337        ))
338    }
339}
340
341/// A named reference in a bound expression after binding to a schema.
342#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
343pub struct BoundReference {
344    // This maybe different from [`name`] filed in [`NestedField`] since this contains full path.
345    // For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
346    column_name: String,
347    field: NestedFieldRef,
348    accessor: StructAccessorRef,
349}
350
351impl BoundReference {
352    /// Creates a new bound reference.
353    pub fn new(
354        name: impl Into<String>,
355        field: NestedFieldRef,
356        accessor: StructAccessorRef,
357    ) -> Self {
358        Self {
359            column_name: name.into(),
360            field,
361            accessor,
362        }
363    }
364
365    /// Return the field of this reference.
366    pub fn field(&self) -> &NestedField {
367        &self.field
368    }
369
370    /// Get this BoundReference's Accessor
371    pub fn accessor(&self) -> &StructAccessor {
372        &self.accessor
373    }
374}
375
376impl Display for BoundReference {
377    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
378        write!(f, "{}", self.column_name)
379    }
380}
381
382/// Bound term after binding to a schema.
383pub type BoundTerm = BoundReference;
384
385#[cfg(test)]
386mod tests {
387    use std::sync::Arc;
388
389    use crate::expr::accessor::StructAccessor;
390    use crate::expr::{Bind, BoundReference, Reference};
391    use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
392
393    fn table_schema_simple() -> SchemaRef {
394        Arc::new(
395            Schema::builder()
396                .with_schema_id(1)
397                .with_identifier_field_ids(vec![2])
398                .with_fields(vec![
399                    NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
400                    NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
401                    NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
402                ])
403                .build()
404                .unwrap(),
405        )
406    }
407
408    #[test]
409    fn test_bind_reference() {
410        let schema = table_schema_simple();
411        let reference = Reference::new("bar").bind(schema, true).unwrap();
412
413        let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
414        let expected_ref = BoundReference::new(
415            "bar",
416            NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
417            accessor_ref.clone(),
418        );
419
420        assert_eq!(expected_ref, reference);
421    }
422
423    #[test]
424    fn test_bind_reference_case_insensitive() {
425        let schema = table_schema_simple();
426        let reference = Reference::new("BAR").bind(schema, false).unwrap();
427
428        let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
429        let expected_ref = BoundReference::new(
430            "BAR",
431            NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
432            accessor_ref.clone(),
433        );
434
435        assert_eq!(expected_ref, reference);
436    }
437
438    #[test]
439    fn test_bind_reference_failure() {
440        let schema = table_schema_simple();
441        let result = Reference::new("bar_not_eix").bind(schema, true);
442
443        assert!(result.is_err());
444    }
445
446    #[test]
447    fn test_bind_reference_case_insensitive_failure() {
448        let schema = table_schema_simple();
449        let result = Reference::new("bar_non_exist").bind(schema, false);
450        assert!(result.is_err());
451    }
452}