iceberg/spec/values/
datum.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Typed literals with validation
19
20use std::cmp::Ordering;
21use std::fmt::{Display, Formatter};
22use std::str::FromStr;
23
24use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
25use num_bigint::BigInt;
26use ordered_float::{Float, OrderedFloat};
27use rust_decimal::Decimal;
28use rust_decimal::prelude::ToPrimitive;
29use serde::de::{self, MapAccess};
30use serde::ser::SerializeStruct;
31use serde::{Deserialize, Serialize};
32use serde_bytes::ByteBuf;
33
34use super::literal::Literal;
35use super::primitive::PrimitiveLiteral;
36use super::serde::_serde::RawLiteral;
37use super::temporal::{date, time, timestamp, timestamptz};
38use crate::error::Result;
39use crate::spec::MAX_DECIMAL_PRECISION;
40use crate::spec::datatypes::{PrimitiveType, Type};
41use crate::{Error, ErrorKind, ensure_data_valid};
42
43/// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23 hours 59 minutes 59 seconds 999999 microseconds.
44pub(crate) const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1;
45
46pub(crate) const INT_MAX: i32 = 2147483647;
47pub(crate) const INT_MIN: i32 = -2147483648;
48pub(crate) const LONG_MAX: i64 = 9223372036854775807;
49pub(crate) const LONG_MIN: i64 = -9223372036854775808;
50
51/// Literal associated with its type. The value and type pair is checked when construction, so the type and value is
52/// guaranteed to be correct when used.
53///
54/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
55/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
56#[derive(Clone, Debug, PartialEq, Hash, Eq)]
57pub struct Datum {
58    r#type: PrimitiveType,
59    literal: PrimitiveLiteral,
60}
61
62impl Serialize for Datum {
63    fn serialize<S: serde::Serializer>(
64        &self,
65        serializer: S,
66    ) -> std::result::Result<S::Ok, S::Error> {
67        let mut struct_ser = serializer
68            .serialize_struct("Datum", 2)
69            .map_err(serde::ser::Error::custom)?;
70        struct_ser
71            .serialize_field("type", &self.r#type)
72            .map_err(serde::ser::Error::custom)?;
73        struct_ser
74            .serialize_field(
75                "literal",
76                &RawLiteral::try_from(
77                    Literal::Primitive(self.literal.clone()),
78                    &Type::Primitive(self.r#type.clone()),
79                )
80                .map_err(serde::ser::Error::custom)?,
81            )
82            .map_err(serde::ser::Error::custom)?;
83        struct_ser.end()
84    }
85}
86
87impl<'de> Deserialize<'de> for Datum {
88    fn deserialize<D: serde::Deserializer<'de>>(
89        deserializer: D,
90    ) -> std::result::Result<Self, D::Error> {
91        #[derive(Deserialize)]
92        #[serde(field_identifier, rename_all = "lowercase")]
93        enum Field {
94            Type,
95            Literal,
96        }
97
98        struct DatumVisitor;
99
100        impl<'de> serde::de::Visitor<'de> for DatumVisitor {
101            type Value = Datum;
102
103            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
104                formatter.write_str("struct Datum")
105            }
106
107            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
108            where A: serde::de::SeqAccess<'de> {
109                let r#type = seq
110                    .next_element::<PrimitiveType>()?
111                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
112                let value = seq
113                    .next_element::<RawLiteral>()?
114                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
115                let Literal::Primitive(primitive) = value
116                    .try_into(&Type::Primitive(r#type.clone()))
117                    .map_err(serde::de::Error::custom)?
118                    .ok_or_else(|| serde::de::Error::custom("None value"))?
119                else {
120                    return Err(serde::de::Error::custom("Invalid value"));
121                };
122
123                Ok(Datum::new(r#type, primitive))
124            }
125
126            fn visit_map<V>(self, mut map: V) -> std::result::Result<Datum, V::Error>
127            where V: MapAccess<'de> {
128                let mut raw_primitive: Option<RawLiteral> = None;
129                let mut r#type: Option<PrimitiveType> = None;
130                while let Some(key) = map.next_key()? {
131                    match key {
132                        Field::Type => {
133                            if r#type.is_some() {
134                                return Err(de::Error::duplicate_field("type"));
135                            }
136                            r#type = Some(map.next_value()?);
137                        }
138                        Field::Literal => {
139                            if raw_primitive.is_some() {
140                                return Err(de::Error::duplicate_field("literal"));
141                            }
142                            raw_primitive = Some(map.next_value()?);
143                        }
144                    }
145                }
146                let Some(r#type) = r#type else {
147                    return Err(serde::de::Error::missing_field("type"));
148                };
149                let Some(raw_primitive) = raw_primitive else {
150                    return Err(serde::de::Error::missing_field("literal"));
151                };
152                let Literal::Primitive(primitive) = raw_primitive
153                    .try_into(&Type::Primitive(r#type.clone()))
154                    .map_err(serde::de::Error::custom)?
155                    .ok_or_else(|| serde::de::Error::custom("None value"))?
156                else {
157                    return Err(serde::de::Error::custom("Invalid value"));
158                };
159                Ok(Datum::new(r#type, primitive))
160            }
161        }
162        const FIELDS: &[&str] = &["type", "literal"];
163        deserializer.deserialize_struct("Datum", FIELDS, DatumVisitor)
164    }
165}
166
167// Compare following iceberg float ordering rules:
168//  -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN
169fn iceberg_float_cmp_f32(a: OrderedFloat<f32>, b: OrderedFloat<f32>) -> Option<Ordering> {
170    Some(a.total_cmp(&b))
171}
172
173fn iceberg_float_cmp_f64(a: OrderedFloat<f64>, b: OrderedFloat<f64>) -> Option<Ordering> {
174    Some(a.total_cmp(&b))
175}
176
177impl PartialOrd for Datum {
178    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
179        match (&self.literal, &other.literal, &self.r#type, &other.r#type) {
180            // generate the arm with same type and same literal
181            (
182                PrimitiveLiteral::Boolean(val),
183                PrimitiveLiteral::Boolean(other_val),
184                PrimitiveType::Boolean,
185                PrimitiveType::Boolean,
186            ) => val.partial_cmp(other_val),
187            (
188                PrimitiveLiteral::Int(val),
189                PrimitiveLiteral::Int(other_val),
190                PrimitiveType::Int,
191                PrimitiveType::Int,
192            ) => val.partial_cmp(other_val),
193            (
194                PrimitiveLiteral::Long(val),
195                PrimitiveLiteral::Long(other_val),
196                PrimitiveType::Long,
197                PrimitiveType::Long,
198            ) => val.partial_cmp(other_val),
199            (
200                PrimitiveLiteral::Float(val),
201                PrimitiveLiteral::Float(other_val),
202                PrimitiveType::Float,
203                PrimitiveType::Float,
204            ) => iceberg_float_cmp_f32(*val, *other_val),
205            (
206                PrimitiveLiteral::Double(val),
207                PrimitiveLiteral::Double(other_val),
208                PrimitiveType::Double,
209                PrimitiveType::Double,
210            ) => iceberg_float_cmp_f64(*val, *other_val),
211            (
212                PrimitiveLiteral::Int(val),
213                PrimitiveLiteral::Int(other_val),
214                PrimitiveType::Date,
215                PrimitiveType::Date,
216            ) => val.partial_cmp(other_val),
217            (
218                PrimitiveLiteral::Long(val),
219                PrimitiveLiteral::Long(other_val),
220                PrimitiveType::Time,
221                PrimitiveType::Time,
222            ) => val.partial_cmp(other_val),
223            (
224                PrimitiveLiteral::Long(val),
225                PrimitiveLiteral::Long(other_val),
226                PrimitiveType::Timestamp,
227                PrimitiveType::Timestamp,
228            ) => val.partial_cmp(other_val),
229            (
230                PrimitiveLiteral::Long(val),
231                PrimitiveLiteral::Long(other_val),
232                PrimitiveType::Timestamptz,
233                PrimitiveType::Timestamptz,
234            ) => val.partial_cmp(other_val),
235            (
236                PrimitiveLiteral::String(val),
237                PrimitiveLiteral::String(other_val),
238                PrimitiveType::String,
239                PrimitiveType::String,
240            ) => val.partial_cmp(other_val),
241            (
242                PrimitiveLiteral::UInt128(val),
243                PrimitiveLiteral::UInt128(other_val),
244                PrimitiveType::Uuid,
245                PrimitiveType::Uuid,
246            ) => uuid::Uuid::from_u128(*val).partial_cmp(&uuid::Uuid::from_u128(*other_val)),
247            (
248                PrimitiveLiteral::Binary(val),
249                PrimitiveLiteral::Binary(other_val),
250                PrimitiveType::Fixed(_),
251                PrimitiveType::Fixed(_),
252            ) => val.partial_cmp(other_val),
253            (
254                PrimitiveLiteral::Binary(val),
255                PrimitiveLiteral::Binary(other_val),
256                PrimitiveType::Binary,
257                PrimitiveType::Binary,
258            ) => val.partial_cmp(other_val),
259            (
260                PrimitiveLiteral::Int128(val),
261                PrimitiveLiteral::Int128(other_val),
262                PrimitiveType::Decimal {
263                    precision: _,
264                    scale,
265                },
266                PrimitiveType::Decimal {
267                    precision: _,
268                    scale: other_scale,
269                },
270            ) => {
271                let val = Decimal::from_i128_with_scale(*val, *scale);
272                let other_val = Decimal::from_i128_with_scale(*other_val, *other_scale);
273                val.partial_cmp(&other_val)
274            }
275            _ => None,
276        }
277    }
278}
279
280impl Display for Datum {
281    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
282        match (&self.r#type, &self.literal) {
283            (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{val}"),
284            (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => write!(f, "{val}"),
285            (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => write!(f, "{val}"),
286            (_, PrimitiveLiteral::Float(val)) => write!(f, "{val}"),
287            (_, PrimitiveLiteral::Double(val)) => write!(f, "{val}"),
288            (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => {
289                write!(f, "{}", date::days_to_date(*val))
290            }
291            (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => {
292                write!(f, "{}", time::microseconds_to_time(*val))
293            }
294            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => {
295                write!(f, "{}", timestamp::microseconds_to_datetime(*val))
296            }
297            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => {
298                write!(f, "{}", timestamptz::microseconds_to_datetimetz(*val))
299            }
300            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => {
301                write!(f, "{}", timestamp::nanoseconds_to_datetime(*val))
302            }
303            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
304                write!(f, "{}", timestamptz::nanoseconds_to_datetimetz(*val))
305            }
306            (_, PrimitiveLiteral::String(val)) => write!(f, r#""{val}""#),
307            (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => {
308                write!(f, "{}", uuid::Uuid::from_u128(*val))
309            }
310            (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
311            (
312                PrimitiveType::Decimal {
313                    precision: _,
314                    scale,
315                },
316                PrimitiveLiteral::Int128(val),
317            ) => {
318                write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale))
319            }
320            (_, _) => {
321                unreachable!()
322            }
323        }
324    }
325}
326
327fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
328    let mut s = String::with_capacity(bytes.len() * 2);
329    for b in bytes {
330        s.push_str(&format!("{b:02X}"));
331    }
332    f.write_str(&s)
333}
334
335impl From<Datum> for Literal {
336    fn from(value: Datum) -> Self {
337        Literal::Primitive(value.literal)
338    }
339}
340
341impl From<Datum> for PrimitiveLiteral {
342    fn from(value: Datum) -> Self {
343        value.literal
344    }
345}
346
347impl Datum {
348    /// Creates a `Datum` from a `PrimitiveType` and a `PrimitiveLiteral`
349    pub(crate) fn new(r#type: PrimitiveType, literal: PrimitiveLiteral) -> Self {
350        Datum { r#type, literal }
351    }
352
353    /// Create iceberg value from bytes.
354    ///
355    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
356    pub fn try_from_bytes(bytes: &[u8], data_type: PrimitiveType) -> Result<Self> {
357        let literal = match data_type {
358            PrimitiveType::Boolean => {
359                if bytes.len() == 1 && bytes[0] == 0u8 {
360                    PrimitiveLiteral::Boolean(false)
361                } else {
362                    PrimitiveLiteral::Boolean(true)
363                }
364            }
365            PrimitiveType::Int => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
366            PrimitiveType::Long => {
367                if bytes.len() == 4 {
368                    // In the case of an evolved field
369                    PrimitiveLiteral::Long(i32::from_le_bytes(bytes.try_into()?) as i64)
370                } else {
371                    PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
372                }
373            }
374            PrimitiveType::Float => {
375                PrimitiveLiteral::Float(OrderedFloat(f32::from_le_bytes(bytes.try_into()?)))
376            }
377            PrimitiveType::Double => {
378                if bytes.len() == 4 {
379                    // In the case of an evolved field
380                    PrimitiveLiteral::Double(OrderedFloat(
381                        f32::from_le_bytes(bytes.try_into()?) as f64
382                    ))
383                } else {
384                    PrimitiveLiteral::Double(OrderedFloat(f64::from_le_bytes(bytes.try_into()?)))
385                }
386            }
387            PrimitiveType::Date => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
388            PrimitiveType::Time => PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)),
389            PrimitiveType::Timestamp => {
390                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
391            }
392            PrimitiveType::Timestamptz => {
393                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
394            }
395            PrimitiveType::TimestampNs => {
396                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
397            }
398            PrimitiveType::TimestamptzNs => {
399                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
400            }
401            PrimitiveType::String => {
402                PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string())
403            }
404            PrimitiveType::Uuid => {
405                PrimitiveLiteral::UInt128(u128::from_be_bytes(bytes.try_into()?))
406            }
407            PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)),
408            PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)),
409            PrimitiveType::Decimal { .. } => {
410                let unscaled_value = BigInt::from_signed_bytes_be(bytes);
411                PrimitiveLiteral::Int128(unscaled_value.to_i128().ok_or_else(|| {
412                    Error::new(
413                        ErrorKind::DataInvalid,
414                        format!("Can't convert bytes to i128: {bytes:?}"),
415                    )
416                })?)
417            }
418        };
419        Ok(Datum::new(data_type, literal))
420    }
421
422    /// Convert the value to bytes
423    ///
424    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
425    pub fn to_bytes(&self) -> Result<ByteBuf> {
426        let buf = match &self.literal {
427            PrimitiveLiteral::Boolean(val) => {
428                if *val {
429                    ByteBuf::from([1u8])
430                } else {
431                    ByteBuf::from([0u8])
432                }
433            }
434            PrimitiveLiteral::Int(val) => ByteBuf::from(val.to_le_bytes()),
435            PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()),
436            PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()),
437            PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()),
438            PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()),
439            PrimitiveLiteral::UInt128(val) => ByteBuf::from(val.to_be_bytes()),
440            PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()),
441            PrimitiveLiteral::Int128(val) => {
442                let PrimitiveType::Decimal { precision, .. } = self.r#type else {
443                    return Err(Error::new(
444                        ErrorKind::DataInvalid,
445                        format!(
446                            "PrimitiveLiteral Int128 must be PrimitiveType Decimal but got {}",
447                            &self.r#type
448                        ),
449                    ));
450                };
451
452                // It's required by iceberg spec that we must keep the minimum
453                // number of bytes for the value
454                let Ok(required_bytes) = Type::decimal_required_bytes(precision) else {
455                    return Err(Error::new(
456                        ErrorKind::DataInvalid,
457                        format!(
458                            "PrimitiveType Decimal must has valid precision but got {precision}"
459                        ),
460                    ));
461                };
462
463                // The primitive literal is unscaled value.
464                let unscaled_value = BigInt::from(*val);
465                // Convert into two's-complement byte representation of the BigInt
466                // in big-endian byte order.
467                let mut bytes = unscaled_value.to_signed_bytes_be();
468                // Truncate with required bytes to make sure.
469                bytes.truncate(required_bytes as usize);
470
471                ByteBuf::from(bytes)
472            }
473            PrimitiveLiteral::AboveMax | PrimitiveLiteral::BelowMin => {
474                return Err(Error::new(
475                    ErrorKind::DataInvalid,
476                    "Cannot convert AboveMax or BelowMin to bytes".to_string(),
477                ));
478            }
479        };
480
481        Ok(buf)
482    }
483
484    /// Creates a boolean value.
485    ///
486    /// Example:
487    /// ```rust
488    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
489    /// let t = Datum::bool(true);
490    ///
491    /// assert_eq!(format!("{}", t), "true".to_string());
492    /// assert_eq!(
493    ///     Literal::from(t),
494    ///     Literal::Primitive(PrimitiveLiteral::Boolean(true))
495    /// );
496    /// ```
497    pub fn bool<T: Into<bool>>(t: T) -> Self {
498        Self {
499            r#type: PrimitiveType::Boolean,
500            literal: PrimitiveLiteral::Boolean(t.into()),
501        }
502    }
503
504    /// Creates a boolean value from string.
505    /// See [Parse bool from str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool) for reference.
506    ///
507    /// Example:
508    /// ```rust
509    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
510    /// let t = Datum::bool_from_str("false").unwrap();
511    ///
512    /// assert_eq!(&format!("{}", t), "false");
513    /// assert_eq!(
514    ///     Literal::Primitive(PrimitiveLiteral::Boolean(false)),
515    ///     t.into()
516    /// );
517    /// ```
518    pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
519        let v = s.as_ref().parse::<bool>().map_err(|e| {
520            Error::new(ErrorKind::DataInvalid, "Can't parse string to bool.").with_source(e)
521        })?;
522        Ok(Self::bool(v))
523    }
524
525    /// Creates an 32bit integer.
526    ///
527    /// Example:
528    /// ```rust
529    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
530    /// let t = Datum::int(23i8);
531    ///
532    /// assert_eq!(&format!("{}", t), "23");
533    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
534    /// ```
535    pub fn int<T: Into<i32>>(t: T) -> Self {
536        Self {
537            r#type: PrimitiveType::Int,
538            literal: PrimitiveLiteral::Int(t.into()),
539        }
540    }
541
542    /// Creates an 64bit integer.
543    ///
544    /// Example:
545    /// ```rust
546    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
547    /// let t = Datum::long(24i8);
548    ///
549    /// assert_eq!(&format!("{t}"), "24");
550    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
551    /// ```
552    pub fn long<T: Into<i64>>(t: T) -> Self {
553        Self {
554            r#type: PrimitiveType::Long,
555            literal: PrimitiveLiteral::Long(t.into()),
556        }
557    }
558
559    /// Creates an 32bit floating point number.
560    ///
561    /// Example:
562    /// ```rust
563    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
564    /// use ordered_float::OrderedFloat;
565    /// let t = Datum::float(32.1f32);
566    ///
567    /// assert_eq!(&format!("{t}"), "32.1");
568    /// assert_eq!(
569    ///     Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))),
570    ///     t.into()
571    /// );
572    /// ```
573    pub fn float<T: Into<f32>>(t: T) -> Self {
574        Self {
575            r#type: PrimitiveType::Float,
576            literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
577        }
578    }
579
580    /// Creates an 64bit floating point number.
581    ///
582    /// Example:
583    /// ```rust
584    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
585    /// use ordered_float::OrderedFloat;
586    /// let t = Datum::double(32.1f64);
587    ///
588    /// assert_eq!(&format!("{t}"), "32.1");
589    /// assert_eq!(
590    ///     Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))),
591    ///     t.into()
592    /// );
593    /// ```
594    pub fn double<T: Into<f64>>(t: T) -> Self {
595        Self {
596            r#type: PrimitiveType::Double,
597            literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
598        }
599    }
600
601    /// Creates date literal from number of days from unix epoch directly.
602    ///
603    /// Example:
604    /// ```rust
605    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
606    /// // 2 days after 1970-01-01
607    /// let t = Datum::date(2);
608    ///
609    /// assert_eq!(&format!("{t}"), "1970-01-03");
610    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(2)), t.into());
611    /// ```
612    pub fn date(days: i32) -> Self {
613        Self {
614            r#type: PrimitiveType::Date,
615            literal: PrimitiveLiteral::Int(days),
616        }
617    }
618
619    /// Creates date literal in `%Y-%m-%d` format, assume in utc timezone.
620    ///
621    /// See [`NaiveDate::from_str`].
622    ///
623    /// Example
624    /// ```rust
625    /// use iceberg::spec::{Datum, Literal};
626    /// let t = Datum::date_from_str("1970-01-05").unwrap();
627    ///
628    /// assert_eq!(&format!("{t}"), "1970-01-05");
629    /// assert_eq!(Literal::date(4), t.into());
630    /// ```
631    pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
632        let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
633            Error::new(
634                ErrorKind::DataInvalid,
635                format!("Can't parse date from string: {}", s.as_ref()),
636            )
637            .with_source(e)
638        })?;
639
640        Ok(Self::date(date::date_from_naive_date(t)))
641    }
642
643    /// Create date literal from calendar date (year, month and day).
644    ///
645    /// See [`NaiveDate::from_ymd_opt`].
646    ///
647    /// Example:
648    ///
649    ///```rust
650    /// use iceberg::spec::{Datum, Literal};
651    /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
652    ///
653    /// assert_eq!(&format!("{t}"), "1970-01-05");
654    /// assert_eq!(Literal::date(4), t.into());
655    /// ```
656    pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
657        let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
658            Error::new(
659                ErrorKind::DataInvalid,
660                format!("Can't create date from year: {year}, month: {month}, day: {day}"),
661            )
662        })?;
663
664        Ok(Self::date(date::date_from_naive_date(t)))
665    }
666
667    /// Creates time literal in microseconds directly.
668    ///
669    /// It will return error when it's negative or too large to fit in 24 hours.
670    ///
671    /// Example:
672    ///
673    /// ```rust
674    /// use iceberg::spec::{Datum, Literal};
675    /// let micro_secs = {
676    ///     1 * 3600 * 1_000_000 + // 1 hour
677    ///     2 * 60 * 1_000_000 +   // 2 minutes
678    ///     1 * 1_000_000 + // 1 second
679    ///     888999 // microseconds
680    /// };
681    ///
682    /// let t = Datum::time_micros(micro_secs).unwrap();
683    ///
684    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
685    /// assert_eq!(Literal::time(micro_secs), t.into());
686    ///
687    /// let negative_value = -100;
688    /// assert!(Datum::time_micros(negative_value).is_err());
689    ///
690    /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in 24 hours.
691    /// assert!(Datum::time_micros(too_large_value).is_err());
692    /// ```
693    pub fn time_micros(value: i64) -> Result<Self> {
694        ensure_data_valid!(
695            (0..=MAX_TIME_VALUE).contains(&value),
696            "Invalid value for Time type: {}",
697            value
698        );
699
700        Ok(Self {
701            r#type: PrimitiveType::Time,
702            literal: PrimitiveLiteral::Long(value),
703        })
704    }
705
706    /// Creates time literal from [`chrono::NaiveTime`].
707    fn time_from_naive_time(t: NaiveTime) -> Self {
708        let duration = t - date::unix_epoch().time();
709        // It's safe to unwrap here since less than 24 hours will never overflow.
710        let micro_secs = duration.num_microseconds().unwrap();
711
712        Self {
713            r#type: PrimitiveType::Time,
714            literal: PrimitiveLiteral::Long(micro_secs),
715        }
716    }
717
718    /// Creates time literal in microseconds in `%H:%M:%S:.f` format.
719    ///
720    /// See [`NaiveTime::from_str`] for details.
721    ///
722    /// Example:
723    /// ```rust
724    /// use iceberg::spec::{Datum, Literal};
725    /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
726    ///
727    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
728    /// ```
729    pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
730        let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
731            Error::new(
732                ErrorKind::DataInvalid,
733                format!("Can't parse time from string: {}", s.as_ref()),
734            )
735            .with_source(e)
736        })?;
737
738        Ok(Self::time_from_naive_time(t))
739    }
740
741    /// Creates time literal from hour, minute, second, and microseconds.
742    ///
743    /// See [`NaiveTime::from_hms_micro_opt`].
744    ///
745    /// Example:
746    /// ```rust
747    /// use iceberg::spec::{Datum, Literal};
748    /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
749    ///
750    /// assert_eq!(&format!("{t}"), "22:15:33.000111");
751    /// ```
752    pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> Result<Self> {
753        let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
754            .ok_or_else(|| Error::new(
755                ErrorKind::DataInvalid,
756                format!("Can't create time from hour: {hour}, min: {min}, second: {sec}, microsecond: {micro}"),
757            ))?;
758        Ok(Self::time_from_naive_time(t))
759    }
760
761    /// Creates a timestamp from unix epoch in microseconds.
762    ///
763    /// Example:
764    ///
765    /// ```rust
766    /// use iceberg::spec::Datum;
767    /// let t = Datum::timestamp_micros(1000);
768    ///
769    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
770    /// ```
771    pub fn timestamp_micros(value: i64) -> Self {
772        Self {
773            r#type: PrimitiveType::Timestamp,
774            literal: PrimitiveLiteral::Long(value),
775        }
776    }
777
778    /// Creates a timestamp from unix epoch in nanoseconds.
779    ///
780    /// Example:
781    ///
782    /// ```rust
783    /// use iceberg::spec::Datum;
784    /// let t = Datum::timestamp_nanos(1000);
785    ///
786    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001");
787    /// ```
788    pub fn timestamp_nanos(value: i64) -> Self {
789        Self {
790            r#type: PrimitiveType::TimestampNs,
791            literal: PrimitiveLiteral::Long(value),
792        }
793    }
794
795    /// Creates a timestamp from [`DateTime`].
796    ///
797    /// Example:
798    ///
799    /// ```rust
800    /// use chrono::{NaiveDate, NaiveDateTime, TimeZone, Utc};
801    /// use iceberg::spec::Datum;
802    /// let t = Datum::timestamp_from_datetime(
803    ///     NaiveDate::from_ymd_opt(1992, 3, 1)
804    ///         .unwrap()
805    ///         .and_hms_micro_opt(1, 2, 3, 88)
806    ///         .unwrap(),
807    /// );
808    ///
809    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
810    /// ```
811    pub fn timestamp_from_datetime(dt: NaiveDateTime) -> Self {
812        Self::timestamp_micros(dt.and_utc().timestamp_micros())
813    }
814
815    /// Parse a timestamp in [`%Y-%m-%dT%H:%M:%S%.f`] format.
816    ///
817    /// See [`NaiveDateTime::from_str`].
818    ///
819    /// Example:
820    ///
821    /// ```rust
822    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
823    /// use iceberg::spec::{Datum, Literal};
824    /// let t = Datum::timestamp_from_str("1992-03-01T01:02:03.000088").unwrap();
825    ///
826    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
827    /// ```
828    pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
829        let dt = s.as_ref().parse::<NaiveDateTime>().map_err(|e| {
830            Error::new(ErrorKind::DataInvalid, "Can't parse timestamp.").with_source(e)
831        })?;
832
833        Ok(Self::timestamp_from_datetime(dt))
834    }
835
836    /// Creates a timestamp with timezone from unix epoch in microseconds.
837    ///
838    /// Example:
839    ///
840    /// ```rust
841    /// use iceberg::spec::Datum;
842    /// let t = Datum::timestamptz_micros(1000);
843    ///
844    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001 UTC");
845    /// ```
846    pub fn timestamptz_micros(value: i64) -> Self {
847        Self {
848            r#type: PrimitiveType::Timestamptz,
849            literal: PrimitiveLiteral::Long(value),
850        }
851    }
852
853    /// Creates a timestamp with timezone from unix epoch in nanoseconds.
854    ///
855    /// Example:
856    ///
857    /// ```rust
858    /// use iceberg::spec::Datum;
859    /// let t = Datum::timestamptz_nanos(1000);
860    ///
861    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC");
862    /// ```
863    pub fn timestamptz_nanos(value: i64) -> Self {
864        Self {
865            r#type: PrimitiveType::TimestamptzNs,
866            literal: PrimitiveLiteral::Long(value),
867        }
868    }
869
870    /// Creates a timestamp with timezone from [`DateTime`].
871    /// Example:
872    ///
873    /// ```rust
874    /// use chrono::{TimeZone, Utc};
875    /// use iceberg::spec::Datum;
876    /// let t = Datum::timestamptz_from_datetime(Utc.timestamp_opt(1000, 0).unwrap());
877    ///
878    /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40 UTC");
879    /// ```
880    pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
881        Self::timestamptz_micros(dt.with_timezone(&Utc).timestamp_micros())
882    }
883
884    /// Parse timestamp with timezone in RFC3339 format.
885    ///
886    /// See [`DateTime::from_str`].
887    ///
888    /// Example:
889    ///
890    /// ```rust
891    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
892    /// use iceberg::spec::{Datum, Literal};
893    /// let t = Datum::timestamptz_from_str("1992-03-01T01:02:03.000088+08:00").unwrap();
894    ///
895    /// assert_eq!(&format!("{t}"), "1992-02-29 17:02:03.000088 UTC");
896    /// ```
897    pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
898        let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
899            Error::new(ErrorKind::DataInvalid, "Can't parse datetime.").with_source(e)
900        })?;
901
902        Ok(Self::timestamptz_from_datetime(dt))
903    }
904
905    /// Creates a string literal.
906    ///
907    /// Example:
908    ///
909    /// ```rust
910    /// use iceberg::spec::Datum;
911    /// let t = Datum::string("ss");
912    ///
913    /// assert_eq!(&format!("{t}"), r#""ss""#);
914    /// ```
915    pub fn string<S: ToString>(s: S) -> Self {
916        Self {
917            r#type: PrimitiveType::String,
918            literal: PrimitiveLiteral::String(s.to_string()),
919        }
920    }
921
922    /// Creates uuid literal.
923    ///
924    /// Example:
925    ///
926    /// ```rust
927    /// use iceberg::spec::Datum;
928    /// use uuid::uuid;
929    /// let t = Datum::uuid(uuid!("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"));
930    ///
931    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
932    /// ```
933    pub fn uuid(uuid: uuid::Uuid) -> Self {
934        Self {
935            r#type: PrimitiveType::Uuid,
936            literal: PrimitiveLiteral::UInt128(uuid.as_u128()),
937        }
938    }
939
940    /// Creates uuid from str. See [`uuid::Uuid::parse_str`].
941    ///
942    /// Example:
943    ///
944    /// ```rust
945    /// use iceberg::spec::Datum;
946    /// let t = Datum::uuid_from_str("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8").unwrap();
947    ///
948    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
949    /// ```
950    pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
951        let uuid = uuid::Uuid::parse_str(s.as_ref()).map_err(|e| {
952            Error::new(
953                ErrorKind::DataInvalid,
954                format!("Can't parse uuid from string: {}", s.as_ref()),
955            )
956            .with_source(e)
957        })?;
958        Ok(Self::uuid(uuid))
959    }
960
961    /// Creates a fixed literal from bytes.
962    ///
963    /// Example:
964    ///
965    /// ```rust
966    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
967    /// let t = Datum::fixed(vec![1u8, 2u8]);
968    ///
969    /// assert_eq!(&format!("{t}"), "0102");
970    /// ```
971    pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
972        let value: Vec<u8> = input.into_iter().collect();
973        Self {
974            r#type: PrimitiveType::Fixed(value.len() as u64),
975            literal: PrimitiveLiteral::Binary(value),
976        }
977    }
978
979    /// Creates a binary literal from bytes.
980    ///
981    /// Example:
982    ///
983    /// ```rust
984    /// use iceberg::spec::Datum;
985    /// let t = Datum::binary(vec![1u8, 100u8]);
986    ///
987    /// assert_eq!(&format!("{t}"), "0164");
988    /// ```
989    pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
990        Self {
991            r#type: PrimitiveType::Binary,
992            literal: PrimitiveLiteral::Binary(input.into_iter().collect()),
993        }
994    }
995
996    /// Creates decimal literal from string. See [`Decimal::from_str_exact`].
997    ///
998    /// Example:
999    ///
1000    /// ```rust
1001    /// use iceberg::spec::Datum;
1002    /// use itertools::assert_equal;
1003    /// use rust_decimal::Decimal;
1004    /// let t = Datum::decimal_from_str("123.45").unwrap();
1005    ///
1006    /// assert_eq!(&format!("{t}"), "123.45");
1007    /// ```
1008    pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
1009        let decimal = Decimal::from_str_exact(s.as_ref()).map_err(|e| {
1010            Error::new(ErrorKind::DataInvalid, "Can't parse decimal.").with_source(e)
1011        })?;
1012
1013        Self::decimal(decimal)
1014    }
1015
1016    /// Try to create a decimal literal from [`Decimal`].
1017    ///
1018    /// Example:
1019    ///
1020    /// ```rust
1021    /// use iceberg::spec::Datum;
1022    /// use rust_decimal::Decimal;
1023    ///
1024    /// let t = Datum::decimal(Decimal::new(123, 2)).unwrap();
1025    ///
1026    /// assert_eq!(&format!("{t}"), "1.23");
1027    /// ```
1028    pub fn decimal(value: impl Into<Decimal>) -> Result<Self> {
1029        let decimal = value.into();
1030        let scale = decimal.scale();
1031
1032        let r#type = Type::decimal(MAX_DECIMAL_PRECISION, scale)?;
1033        if let Type::Primitive(p) = r#type {
1034            Ok(Self {
1035                r#type: p,
1036                literal: PrimitiveLiteral::Int128(decimal.mantissa()),
1037            })
1038        } else {
1039            unreachable!("Decimal type must be primitive.")
1040        }
1041    }
1042
1043    /// Try to create a decimal literal from [`Decimal`] with precision.
1044    ///
1045    /// Example:
1046    ///
1047    /// ```rust
1048    /// use iceberg::spec::Datum;
1049    /// use rust_decimal::Decimal;
1050    ///
1051    /// let t = Datum::decimal_with_precision(Decimal::new(123, 2), 30).unwrap();
1052    ///
1053    /// assert_eq!(&format!("{t}"), "1.23");
1054    /// ```
1055    pub fn decimal_with_precision(value: impl Into<Decimal>, precision: u32) -> Result<Self> {
1056        let decimal = value.into();
1057        let scale = decimal.scale();
1058
1059        let available_bytes = Type::decimal_required_bytes(precision)? as usize;
1060        let unscaled_value = BigInt::from(decimal.mantissa());
1061        let actual_bytes = unscaled_value.to_signed_bytes_be();
1062        if actual_bytes.len() > available_bytes {
1063            return Err(Error::new(
1064                ErrorKind::DataInvalid,
1065                format!("Decimal value {decimal} is too large for precision {precision}"),
1066            ));
1067        }
1068
1069        let r#type = Type::decimal(precision, scale)?;
1070        if let Type::Primitive(p) = r#type {
1071            Ok(Self {
1072                r#type: p,
1073                literal: PrimitiveLiteral::Int128(decimal.mantissa()),
1074            })
1075        } else {
1076            unreachable!("Decimal type must be primitive.")
1077        }
1078    }
1079
1080    fn i64_to_i32<T: Into<i64> + PartialOrd<i64>>(val: T) -> Datum {
1081        if val > INT_MAX as i64 {
1082            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1083        } else if val < INT_MIN as i64 {
1084            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1085        } else {
1086            Datum::int(val.into() as i32)
1087        }
1088    }
1089
1090    fn i128_to_i32<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1091        if val > INT_MAX as i128 {
1092            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1093        } else if val < INT_MIN as i128 {
1094            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1095        } else {
1096            Datum::int(val.into() as i32)
1097        }
1098    }
1099
1100    fn i128_to_i64<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1101        if val > LONG_MAX as i128 {
1102            Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax)
1103        } else if val < LONG_MIN as i128 {
1104            Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin)
1105        } else {
1106            Datum::long(val.into() as i64)
1107        }
1108    }
1109
1110    fn string_to_i128<S: AsRef<str>>(s: S) -> Result<i128> {
1111        s.as_ref().parse::<i128>().map_err(|e| {
1112            Error::new(ErrorKind::DataInvalid, "Can't parse string to i128.").with_source(e)
1113        })
1114    }
1115
1116    /// Convert the datum to `target_type`.
1117    pub fn to(self, target_type: &Type) -> Result<Datum> {
1118        match target_type {
1119            Type::Primitive(target_primitive_type) => {
1120                match (&self.literal, &self.r#type, target_primitive_type) {
1121                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)),
1122                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)),
1123                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Long) => Ok(Datum::long(*val)),
1124                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => {
1125                        Ok(Datum::i64_to_i32(*val))
1126                    }
1127                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => {
1128                        Ok(Datum::timestamp_micros(*val))
1129                    }
1130                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => {
1131                        Ok(Datum::timestamptz_micros(*val))
1132                    }
1133                    // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775
1134                    (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => {
1135                        Ok(Datum::i128_to_i64(*val))
1136                    }
1137
1138                    (PrimitiveLiteral::String(val), _, PrimitiveType::Boolean) => {
1139                        Datum::bool_from_str(val)
1140                    }
1141                    (PrimitiveLiteral::String(val), _, PrimitiveType::Int) => {
1142                        Datum::string_to_i128(val).map(Datum::i128_to_i32)
1143                    }
1144                    (PrimitiveLiteral::String(val), _, PrimitiveType::Long) => {
1145                        Datum::string_to_i128(val).map(Datum::i128_to_i64)
1146                    }
1147                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamp) => {
1148                        Datum::timestamp_from_str(val)
1149                    }
1150                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamptz) => {
1151                        Datum::timestamptz_from_str(val)
1152                    }
1153
1154                    // TODO: implement more type conversions
1155                    (_, self_type, target_type) if self_type == target_type => Ok(self),
1156                    _ => Err(Error::new(
1157                        ErrorKind::DataInvalid,
1158                        format!(
1159                            "Can't convert datum from {} type to {} type.",
1160                            self.r#type, target_primitive_type
1161                        ),
1162                    )),
1163                }
1164            }
1165            _ => Err(Error::new(
1166                ErrorKind::DataInvalid,
1167                format!(
1168                    "Can't convert datum from {} type to {} type.",
1169                    self.r#type, target_type
1170                ),
1171            )),
1172        }
1173    }
1174
1175    /// Get the primitive literal from datum.
1176    pub fn literal(&self) -> &PrimitiveLiteral {
1177        &self.literal
1178    }
1179
1180    /// Get the primitive type from datum.
1181    pub fn data_type(&self) -> &PrimitiveType {
1182        &self.r#type
1183    }
1184
1185    /// Returns true if the Literal represents a primitive type
1186    /// that can be a NaN, and that it's value is NaN
1187    pub fn is_nan(&self) -> bool {
1188        match self.literal {
1189            PrimitiveLiteral::Double(val) => val.is_nan(),
1190            PrimitiveLiteral::Float(val) => val.is_nan(),
1191            _ => false,
1192        }
1193    }
1194
1195    /// Returns a human-readable string representation of this literal.
1196    ///
1197    /// For string literals, this returns the raw string value without quotes.
1198    /// For all other literals, it falls back to [`to_string()`].
1199    pub fn to_human_string(&self) -> String {
1200        match self.literal() {
1201            PrimitiveLiteral::String(s) => s.to_string(),
1202            _ => self.to_string(),
1203        }
1204    }
1205}