iceberg/transform/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Transform function used to compute partition values.
19
20use std::fmt::Debug;
21
22use arrow_array::ArrayRef;
23
24use crate::spec::{Datum, Transform};
25use crate::{Error, ErrorKind, Result};
26
27mod bucket;
28mod identity;
29mod temporal;
30mod truncate;
31mod void;
32
33/// TransformFunction is a trait that defines the interface for all transform functions.
34pub trait TransformFunction: Send + Sync + Debug {
35    /// transform will take an input array and transform it into a new array.
36    /// The implementation of this function will need to check and downcast the input to specific
37    /// type.
38    fn transform(&self, input: ArrayRef) -> Result<ArrayRef>;
39    /// transform_literal will take an input literal and transform it into a new literal.
40    fn transform_literal(&self, input: &Datum) -> Result<Option<Datum>>;
41    /// A thin wrapper around `transform_literal`
42    /// to return an error even when it's `None`.
43    fn transform_literal_result(&self, input: &Datum) -> Result<Datum> {
44        self.transform_literal(input)?.ok_or_else(|| {
45            Error::new(
46                ErrorKind::Unexpected,
47                format!("Returns 'None' for literal {input}"),
48            )
49        })
50    }
51}
52
53/// BoxedTransformFunction is a boxed trait object of TransformFunction.
54pub type BoxedTransformFunction = Box<dyn TransformFunction>;
55
56/// create_transform_function creates a boxed trait object of TransformFunction from a Transform.
57pub fn create_transform_function(transform: &Transform) -> Result<BoxedTransformFunction> {
58    match transform {
59        Transform::Identity => Ok(Box::new(identity::Identity {})),
60        Transform::Void => Ok(Box::new(void::Void {})),
61        Transform::Year => Ok(Box::new(temporal::Year {})),
62        Transform::Month => Ok(Box::new(temporal::Month {})),
63        Transform::Day => Ok(Box::new(temporal::Day {})),
64        Transform::Hour => Ok(Box::new(temporal::Hour {})),
65        Transform::Bucket(mod_n) => Ok(Box::new(bucket::Bucket::new(*mod_n))),
66        Transform::Truncate(width) => Ok(Box::new(truncate::Truncate::new(*width))),
67        Transform::Unknown => Err(crate::error::Error::new(
68            crate::ErrorKind::FeatureUnsupported,
69            "Transform Unknown is not implemented",
70        )),
71    }
72}
73
74#[cfg(test)]
75mod test {
76    use std::collections::HashSet;
77    use std::sync::Arc;
78
79    use crate::Result;
80    use crate::expr::accessor::StructAccessor;
81    use crate::expr::{
82        BinaryExpression, BoundPredicate, BoundReference, PredicateOperator, SetExpression,
83    };
84    use crate::spec::{Datum, NestedField, NestedFieldRef, PrimitiveType, Transform, Type};
85
86    /// A utitily struct, test fixture
87    /// used for testing the projection on `Transform`
88    pub(crate) struct TestProjectionFixture {
89        transform: Transform,
90        name: String,
91        field: NestedFieldRef,
92    }
93
94    impl TestProjectionFixture {
95        pub(crate) fn new(
96            transform: Transform,
97            name: impl Into<String>,
98            field: NestedField,
99        ) -> Self {
100            TestProjectionFixture {
101                transform,
102                name: name.into(),
103                field: Arc::new(field),
104            }
105        }
106        pub(crate) fn binary_predicate(
107            &self,
108            op: PredicateOperator,
109            literal: Datum,
110        ) -> BoundPredicate {
111            BoundPredicate::Binary(BinaryExpression::new(
112                op,
113                BoundReference::new(
114                    self.name.clone(),
115                    self.field.clone(),
116                    Arc::new(StructAccessor::new(1, PrimitiveType::Boolean)),
117                ),
118                literal,
119            ))
120        }
121        pub(crate) fn set_predicate(
122            &self,
123            op: PredicateOperator,
124            literals: Vec<Datum>,
125        ) -> BoundPredicate {
126            BoundPredicate::Set(SetExpression::new(
127                op,
128                BoundReference::new(
129                    self.name.clone(),
130                    self.field.clone(),
131                    Arc::new(StructAccessor::new(1, PrimitiveType::Boolean)),
132                ),
133                HashSet::from_iter(literals),
134            ))
135        }
136        pub(crate) fn assert_projection(
137            &self,
138            predicate: &BoundPredicate,
139            expected: Option<&str>,
140        ) -> Result<()> {
141            let result = self.transform.project(&self.name, predicate)?;
142            match expected {
143                Some(exp) => assert_eq!(format!("{}", result.unwrap()), exp),
144                None => assert!(result.is_none()),
145            }
146            Ok(())
147        }
148    }
149
150    /// A utility struct, test fixture
151    /// used for testing the transform on `Transform`
152    pub(crate) struct TestTransformFixture {
153        pub display: String,
154        pub json: String,
155        pub dedup_name: String,
156        pub preserves_order: bool,
157        pub satisfies_order_of: Vec<(Transform, bool)>,
158        pub trans_types: Vec<(Type, Option<Type>)>,
159    }
160
161    impl TestTransformFixture {
162        #[track_caller]
163        pub(crate) fn assert_transform(&self, trans: Transform) {
164            assert_eq!(self.display, format!("{trans}"));
165            assert_eq!(self.json, serde_json::to_string(&trans).unwrap());
166            assert_eq!(trans, serde_json::from_str(self.json.as_str()).unwrap());
167            assert_eq!(self.dedup_name, trans.dedup_name());
168            assert_eq!(self.preserves_order, trans.preserves_order());
169
170            for (other_trans, satisfies_order_of) in &self.satisfies_order_of {
171                assert_eq!(
172                    satisfies_order_of,
173                    &trans.satisfies_order_of(other_trans),
174                    "Failed to check satisfies order {trans}, {other_trans}, {satisfies_order_of}"
175                );
176            }
177
178            for (i, (input_type, result_type)) in self.trans_types.iter().enumerate() {
179                let actual = trans.result_type(input_type).ok();
180                assert_eq!(
181                    result_type, &actual,
182                    "type mismatch at index {i}, input: {input_type}, expected: {result_type:?}, actual: {actual:?}"
183                );
184            }
185        }
186    }
187}