iceberg/expr/term.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Term definition.
19
20use std::fmt::{Display, Formatter};
21
22use fnv::FnvHashSet;
23use serde::{Deserialize, Serialize};
24
25use crate::expr::accessor::{StructAccessor, StructAccessorRef};
26use crate::expr::{
27 BinaryExpression, Bind, Predicate, PredicateOperator, SetExpression, UnaryExpression,
28};
29use crate::spec::{Datum, NestedField, NestedFieldRef, SchemaRef};
30use crate::{Error, ErrorKind};
31
32/// Unbound term before binding to a schema.
33pub type Term = Reference;
34
35/// A named reference in an unbound expression.
36/// For example, `a` in `a > 10`.
37#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
38pub struct Reference {
39 name: String,
40}
41
42impl Reference {
43 /// Create a new unbound reference.
44 pub fn new(name: impl Into<String>) -> Self {
45 Self { name: name.into() }
46 }
47
48 /// Return the name of this reference.
49 pub fn name(&self) -> &str {
50 &self.name
51 }
52}
53
54impl Reference {
55 /// Creates an less than expression. For example, `a < 10`.
56 ///
57 /// # Example
58 ///
59 /// ```rust
60 /// use iceberg::expr::Reference;
61 /// use iceberg::spec::Datum;
62 /// let expr = Reference::new("a").less_than(Datum::long(10));
63 ///
64 /// assert_eq!(&format!("{expr}"), "a < 10");
65 /// ```
66 pub fn less_than(self, datum: Datum) -> Predicate {
67 Predicate::Binary(BinaryExpression::new(
68 PredicateOperator::LessThan,
69 self,
70 datum,
71 ))
72 }
73
74 /// Creates an less than or equal to expression. For example, `a <= 10`.
75 ///
76 /// # Example
77 ///
78 /// ```rust
79 /// use iceberg::expr::Reference;
80 /// use iceberg::spec::Datum;
81 /// let expr = Reference::new("a").less_than_or_equal_to(Datum::long(10));
82 ///
83 /// assert_eq!(&format!("{expr}"), "a <= 10");
84 /// ```
85 pub fn less_than_or_equal_to(self, datum: Datum) -> Predicate {
86 Predicate::Binary(BinaryExpression::new(
87 PredicateOperator::LessThanOrEq,
88 self,
89 datum,
90 ))
91 }
92
93 /// Creates an greater than expression. For example, `a > 10`.
94 ///
95 /// # Example
96 ///
97 /// ```rust
98 /// use iceberg::expr::Reference;
99 /// use iceberg::spec::Datum;
100 /// let expr = Reference::new("a").greater_than(Datum::long(10));
101 ///
102 /// assert_eq!(&format!("{expr}"), "a > 10");
103 /// ```
104 pub fn greater_than(self, datum: Datum) -> Predicate {
105 Predicate::Binary(BinaryExpression::new(
106 PredicateOperator::GreaterThan,
107 self,
108 datum,
109 ))
110 }
111
112 /// Creates a greater-than-or-equal-to than expression. For example, `a >= 10`.
113 ///
114 /// # Example
115 ///
116 /// ```rust
117 /// use iceberg::expr::Reference;
118 /// use iceberg::spec::Datum;
119 /// let expr = Reference::new("a").greater_than_or_equal_to(Datum::long(10));
120 ///
121 /// assert_eq!(&format!("{expr}"), "a >= 10");
122 /// ```
123 pub fn greater_than_or_equal_to(self, datum: Datum) -> Predicate {
124 Predicate::Binary(BinaryExpression::new(
125 PredicateOperator::GreaterThanOrEq,
126 self,
127 datum,
128 ))
129 }
130
131 /// Creates an equal-to expression. For example, `a = 10`.
132 ///
133 /// # Example
134 ///
135 /// ```rust
136 /// use iceberg::expr::Reference;
137 /// use iceberg::spec::Datum;
138 /// let expr = Reference::new("a").equal_to(Datum::long(10));
139 ///
140 /// assert_eq!(&format!("{expr}"), "a = 10");
141 /// ```
142 pub fn equal_to(self, datum: Datum) -> Predicate {
143 Predicate::Binary(BinaryExpression::new(PredicateOperator::Eq, self, datum))
144 }
145
146 /// Creates a not equal-to expression. For example, `a!= 10`.
147 ///
148 /// # Example
149 ///
150 /// ```rust
151 /// use iceberg::expr::Reference;
152 /// use iceberg::spec::Datum;
153 /// let expr = Reference::new("a").not_equal_to(Datum::long(10));
154 ///
155 /// assert_eq!(&format!("{expr}"), "a != 10");
156 /// ```
157 pub fn not_equal_to(self, datum: Datum) -> Predicate {
158 Predicate::Binary(BinaryExpression::new(PredicateOperator::NotEq, self, datum))
159 }
160
161 /// Creates a start-with expression. For example, `a STARTS WITH "foo"`.
162 ///
163 /// # Example
164 ///
165 /// ```rust
166 /// use iceberg::expr::Reference;
167 /// use iceberg::spec::Datum;
168 /// let expr = Reference::new("a").starts_with(Datum::string("foo"));
169 ///
170 /// assert_eq!(&format!("{expr}"), r#"a STARTS WITH "foo""#);
171 /// ```
172 pub fn starts_with(self, datum: Datum) -> Predicate {
173 Predicate::Binary(BinaryExpression::new(
174 PredicateOperator::StartsWith,
175 self,
176 datum,
177 ))
178 }
179
180 /// Creates a not start-with expression. For example, `a NOT STARTS WITH 'foo'`.
181 ///
182 /// # Example
183 ///
184 /// ```rust
185 /// use iceberg::expr::Reference;
186 /// use iceberg::spec::Datum;
187 ///
188 /// let expr = Reference::new("a").not_starts_with(Datum::string("foo"));
189 ///
190 /// assert_eq!(&format!("{expr}"), r#"a NOT STARTS WITH "foo""#);
191 /// ```
192 pub fn not_starts_with(self, datum: Datum) -> Predicate {
193 Predicate::Binary(BinaryExpression::new(
194 PredicateOperator::NotStartsWith,
195 self,
196 datum,
197 ))
198 }
199
200 /// Creates an is-nan expression. For example, `a IS NAN`.
201 ///
202 /// # Example
203 ///
204 /// ```rust
205 /// use iceberg::expr::Reference;
206 /// use iceberg::spec::Datum;
207 /// let expr = Reference::new("a").is_nan();
208 ///
209 /// assert_eq!(&format!("{expr}"), "a IS NAN");
210 /// ```
211 pub fn is_nan(self) -> Predicate {
212 Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNan, self))
213 }
214
215 /// Creates an is-not-nan expression. For example, `a IS NOT NAN`.
216 ///
217 /// # Example
218 ///
219 /// ```rust
220 /// use iceberg::expr::Reference;
221 /// use iceberg::spec::Datum;
222 /// let expr = Reference::new("a").is_not_nan();
223 ///
224 /// assert_eq!(&format!("{expr}"), "a IS NOT NAN");
225 /// ```
226 pub fn is_not_nan(self) -> Predicate {
227 Predicate::Unary(UnaryExpression::new(PredicateOperator::NotNan, self))
228 }
229
230 /// Creates an is-null expression. For example, `a IS NULL`.
231 ///
232 /// # Example
233 ///
234 /// ```rust
235 /// use iceberg::expr::Reference;
236 /// use iceberg::spec::Datum;
237 /// let expr = Reference::new("a").is_null();
238 ///
239 /// assert_eq!(&format!("{expr}"), "a IS NULL");
240 /// ```
241 pub fn is_null(self) -> Predicate {
242 Predicate::Unary(UnaryExpression::new(PredicateOperator::IsNull, self))
243 }
244
245 /// Creates an is-not-null expression. For example, `a IS NOT NULL`.
246 ///
247 /// # Example
248 ///
249 /// ```rust
250 /// use iceberg::expr::Reference;
251 /// use iceberg::spec::Datum;
252 /// let expr = Reference::new("a").is_not_null();
253 ///
254 /// assert_eq!(&format!("{expr}"), "a IS NOT NULL");
255 /// ```
256 pub fn is_not_null(self) -> Predicate {
257 Predicate::Unary(UnaryExpression::new(PredicateOperator::NotNull, self))
258 }
259
260 /// Creates an is-in expression. For example, `a IN (5, 6)`.
261 ///
262 /// # Example
263 ///
264 /// ```rust
265 /// use fnv::FnvHashSet;
266 /// use iceberg::expr::Reference;
267 /// use iceberg::spec::Datum;
268 /// let expr = Reference::new("a").is_in([Datum::long(5), Datum::long(6)]);
269 ///
270 /// let as_string = format!("{expr}");
271 /// assert!(&as_string == "a IN (5, 6)" || &as_string == "a IN (6, 5)");
272 /// ```
273 pub fn is_in(self, literals: impl IntoIterator<Item = Datum>) -> Predicate {
274 Predicate::Set(SetExpression::new(
275 PredicateOperator::In,
276 self,
277 FnvHashSet::from_iter(literals),
278 ))
279 }
280
281 /// Creates an is-not-in expression. For example, `a NOT IN (5, 6)`.
282 ///
283 /// # Example
284 ///
285 /// ```rust
286 /// use fnv::FnvHashSet;
287 /// use iceberg::expr::Reference;
288 /// use iceberg::spec::Datum;
289 /// let expr = Reference::new("a").is_not_in([Datum::long(5), Datum::long(6)]);
290 ///
291 /// let as_string = format!("{expr}");
292 /// assert!(&as_string == "a NOT IN (5, 6)" || &as_string == "a NOT IN (6, 5)");
293 /// ```
294 pub fn is_not_in(self, literals: impl IntoIterator<Item = Datum>) -> Predicate {
295 Predicate::Set(SetExpression::new(
296 PredicateOperator::NotIn,
297 self,
298 FnvHashSet::from_iter(literals),
299 ))
300 }
301}
302
303impl Display for Reference {
304 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
305 write!(f, "{}", self.name)
306 }
307}
308
309impl Bind for Reference {
310 type Bound = BoundReference;
311
312 fn bind(&self, schema: SchemaRef, case_sensitive: bool) -> crate::Result<Self::Bound> {
313 let field = if case_sensitive {
314 schema.field_by_name(&self.name)
315 } else {
316 schema.field_by_name_case_insensitive(&self.name)
317 };
318
319 let field = field.ok_or_else(|| {
320 Error::new(
321 ErrorKind::DataInvalid,
322 format!("Field {} not found in schema", self.name),
323 )
324 })?;
325
326 let accessor = schema.accessor_by_field_id(field.id).ok_or_else(|| {
327 Error::new(
328 ErrorKind::DataInvalid,
329 format!("Accessor for Field {} not found", self.name),
330 )
331 })?;
332
333 Ok(BoundReference::new(
334 self.name.clone(),
335 field.clone(),
336 accessor.clone(),
337 ))
338 }
339}
340
341/// A named reference in a bound expression after binding to a schema.
342#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
343pub struct BoundReference {
344 // This maybe different from [`name`] filed in [`NestedField`] since this contains full path.
345 // For example, if the field is `a.b.c`, then `field.name` is `c`, but `original_name` is `a.b.c`.
346 column_name: String,
347 field: NestedFieldRef,
348 accessor: StructAccessorRef,
349}
350
351impl BoundReference {
352 /// Creates a new bound reference.
353 pub fn new(
354 name: impl Into<String>,
355 field: NestedFieldRef,
356 accessor: StructAccessorRef,
357 ) -> Self {
358 Self {
359 column_name: name.into(),
360 field,
361 accessor,
362 }
363 }
364
365 /// Return the field of this reference.
366 pub fn field(&self) -> &NestedField {
367 &self.field
368 }
369
370 /// Get this BoundReference's Accessor
371 pub fn accessor(&self) -> &StructAccessor {
372 &self.accessor
373 }
374}
375
376impl Display for BoundReference {
377 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
378 write!(f, "{}", self.column_name)
379 }
380}
381
382/// Bound term after binding to a schema.
383pub type BoundTerm = BoundReference;
384
385#[cfg(test)]
386mod tests {
387 use std::sync::Arc;
388
389 use crate::expr::accessor::StructAccessor;
390 use crate::expr::{Bind, BoundReference, Reference};
391 use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
392
393 fn table_schema_simple() -> SchemaRef {
394 Arc::new(
395 Schema::builder()
396 .with_schema_id(1)
397 .with_identifier_field_ids(vec![2])
398 .with_fields(vec![
399 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
400 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
401 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
402 ])
403 .build()
404 .unwrap(),
405 )
406 }
407
408 #[test]
409 fn test_bind_reference() {
410 let schema = table_schema_simple();
411 let reference = Reference::new("bar").bind(schema, true).unwrap();
412
413 let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
414 let expected_ref = BoundReference::new(
415 "bar",
416 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
417 accessor_ref.clone(),
418 );
419
420 assert_eq!(expected_ref, reference);
421 }
422
423 #[test]
424 fn test_bind_reference_case_insensitive() {
425 let schema = table_schema_simple();
426 let reference = Reference::new("BAR").bind(schema, false).unwrap();
427
428 let accessor_ref = Arc::new(StructAccessor::new(1, PrimitiveType::Int));
429 let expected_ref = BoundReference::new(
430 "BAR",
431 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
432 accessor_ref.clone(),
433 );
434
435 assert_eq!(expected_ref, reference);
436 }
437
438 #[test]
439 fn test_bind_reference_failure() {
440 let schema = table_schema_simple();
441 let result = Reference::new("bar_not_eix").bind(schema, true);
442
443 assert!(result.is_err());
444 }
445
446 #[test]
447 fn test_bind_reference_case_insensitive_failure() {
448 let schema = table_schema_simple();
449 let result = Reference::new("bar_non_exist").bind(schema, false);
450 assert!(result.is_err());
451 }
452}