iceberg/spec/values/
datum.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Typed literals with validation
19
20use std::cmp::Ordering;
21use std::fmt::{Display, Formatter};
22use std::str::FromStr;
23
24use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
25use num_bigint::BigInt;
26use ordered_float::{Float, OrderedFloat};
27use rust_decimal::Decimal;
28use rust_decimal::prelude::ToPrimitive;
29use serde::de::{self, MapAccess};
30use serde::ser::SerializeStruct;
31use serde::{Deserialize, Serialize};
32use serde_bytes::ByteBuf;
33
34use super::literal::Literal;
35use super::primitive::PrimitiveLiteral;
36use super::serde::_serde::RawLiteral;
37use super::temporal::{date, time, timestamp, timestamptz};
38use crate::error::Result;
39use crate::spec::MAX_DECIMAL_PRECISION;
40use crate::spec::datatypes::{PrimitiveType, Type};
41use crate::{Error, ErrorKind, ensure_data_valid};
42
43/// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23 hours 59 minutes 59 seconds 999999 microseconds.
44pub(crate) const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1;
45
46pub(crate) const INT_MAX: i32 = 2147483647;
47pub(crate) const INT_MIN: i32 = -2147483648;
48pub(crate) const LONG_MAX: i64 = 9223372036854775807;
49pub(crate) const LONG_MIN: i64 = -9223372036854775808;
50
51/// Literal associated with its type. The value and type pair is checked when construction, so the type and value is
52/// guaranteed to be correct when used.
53///
54/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
55/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
56#[derive(Clone, Debug, PartialEq, Hash, Eq)]
57pub struct Datum {
58    r#type: PrimitiveType,
59    literal: PrimitiveLiteral,
60}
61
62impl Serialize for Datum {
63    fn serialize<S: serde::Serializer>(
64        &self,
65        serializer: S,
66    ) -> std::result::Result<S::Ok, S::Error> {
67        let mut struct_ser = serializer
68            .serialize_struct("Datum", 2)
69            .map_err(serde::ser::Error::custom)?;
70        struct_ser
71            .serialize_field("type", &self.r#type)
72            .map_err(serde::ser::Error::custom)?;
73        struct_ser
74            .serialize_field(
75                "literal",
76                &RawLiteral::try_from(
77                    Literal::Primitive(self.literal.clone()),
78                    &Type::Primitive(self.r#type.clone()),
79                )
80                .map_err(serde::ser::Error::custom)?,
81            )
82            .map_err(serde::ser::Error::custom)?;
83        struct_ser.end()
84    }
85}
86
87impl<'de> Deserialize<'de> for Datum {
88    fn deserialize<D: serde::Deserializer<'de>>(
89        deserializer: D,
90    ) -> std::result::Result<Self, D::Error> {
91        #[derive(Deserialize)]
92        #[serde(field_identifier, rename_all = "lowercase")]
93        enum Field {
94            Type,
95            Literal,
96        }
97
98        struct DatumVisitor;
99
100        impl<'de> serde::de::Visitor<'de> for DatumVisitor {
101            type Value = Datum;
102
103            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
104                formatter.write_str("struct Datum")
105            }
106
107            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
108            where A: serde::de::SeqAccess<'de> {
109                let r#type = seq
110                    .next_element::<PrimitiveType>()?
111                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
112                let value = seq
113                    .next_element::<RawLiteral>()?
114                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
115                let Literal::Primitive(primitive) = value
116                    .try_into(&Type::Primitive(r#type.clone()))
117                    .map_err(serde::de::Error::custom)?
118                    .ok_or_else(|| serde::de::Error::custom("None value"))?
119                else {
120                    return Err(serde::de::Error::custom("Invalid value"));
121                };
122
123                Ok(Datum::new(r#type, primitive))
124            }
125
126            fn visit_map<V>(self, mut map: V) -> std::result::Result<Datum, V::Error>
127            where V: MapAccess<'de> {
128                let mut raw_primitive: Option<RawLiteral> = None;
129                let mut r#type: Option<PrimitiveType> = None;
130                while let Some(key) = map.next_key()? {
131                    match key {
132                        Field::Type => {
133                            if r#type.is_some() {
134                                return Err(de::Error::duplicate_field("type"));
135                            }
136                            r#type = Some(map.next_value()?);
137                        }
138                        Field::Literal => {
139                            if raw_primitive.is_some() {
140                                return Err(de::Error::duplicate_field("literal"));
141                            }
142                            raw_primitive = Some(map.next_value()?);
143                        }
144                    }
145                }
146                let Some(r#type) = r#type else {
147                    return Err(serde::de::Error::missing_field("type"));
148                };
149                let Some(raw_primitive) = raw_primitive else {
150                    return Err(serde::de::Error::missing_field("literal"));
151                };
152                let Literal::Primitive(primitive) = raw_primitive
153                    .try_into(&Type::Primitive(r#type.clone()))
154                    .map_err(serde::de::Error::custom)?
155                    .ok_or_else(|| serde::de::Error::custom("None value"))?
156                else {
157                    return Err(serde::de::Error::custom("Invalid value"));
158                };
159                Ok(Datum::new(r#type, primitive))
160            }
161        }
162        const FIELDS: &[&str] = &["type", "literal"];
163        deserializer.deserialize_struct("Datum", FIELDS, DatumVisitor)
164    }
165}
166
167// Compare following iceberg float ordering rules:
168//  -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN
169fn iceberg_float_cmp<T: Float>(a: T, b: T) -> Option<Ordering> {
170    if a.is_nan() && b.is_nan() {
171        return match (a.is_sign_negative(), b.is_sign_negative()) {
172            (true, false) => Some(Ordering::Less),
173            (false, true) => Some(Ordering::Greater),
174            _ => Some(Ordering::Equal),
175        };
176    }
177
178    if a.is_nan() {
179        return Some(if a.is_sign_negative() {
180            Ordering::Less
181        } else {
182            Ordering::Greater
183        });
184    }
185
186    if b.is_nan() {
187        return Some(if b.is_sign_negative() {
188            Ordering::Greater
189        } else {
190            Ordering::Less
191        });
192    }
193
194    a.partial_cmp(&b)
195}
196
197impl PartialOrd for Datum {
198    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
199        match (&self.literal, &other.literal, &self.r#type, &other.r#type) {
200            // generate the arm with same type and same literal
201            (
202                PrimitiveLiteral::Boolean(val),
203                PrimitiveLiteral::Boolean(other_val),
204                PrimitiveType::Boolean,
205                PrimitiveType::Boolean,
206            ) => val.partial_cmp(other_val),
207            (
208                PrimitiveLiteral::Int(val),
209                PrimitiveLiteral::Int(other_val),
210                PrimitiveType::Int,
211                PrimitiveType::Int,
212            ) => val.partial_cmp(other_val),
213            (
214                PrimitiveLiteral::Long(val),
215                PrimitiveLiteral::Long(other_val),
216                PrimitiveType::Long,
217                PrimitiveType::Long,
218            ) => val.partial_cmp(other_val),
219            (
220                PrimitiveLiteral::Float(val),
221                PrimitiveLiteral::Float(other_val),
222                PrimitiveType::Float,
223                PrimitiveType::Float,
224            ) => iceberg_float_cmp(*val, *other_val),
225            (
226                PrimitiveLiteral::Double(val),
227                PrimitiveLiteral::Double(other_val),
228                PrimitiveType::Double,
229                PrimitiveType::Double,
230            ) => iceberg_float_cmp(*val, *other_val),
231            (
232                PrimitiveLiteral::Int(val),
233                PrimitiveLiteral::Int(other_val),
234                PrimitiveType::Date,
235                PrimitiveType::Date,
236            ) => val.partial_cmp(other_val),
237            (
238                PrimitiveLiteral::Long(val),
239                PrimitiveLiteral::Long(other_val),
240                PrimitiveType::Time,
241                PrimitiveType::Time,
242            ) => val.partial_cmp(other_val),
243            (
244                PrimitiveLiteral::Long(val),
245                PrimitiveLiteral::Long(other_val),
246                PrimitiveType::Timestamp,
247                PrimitiveType::Timestamp,
248            ) => val.partial_cmp(other_val),
249            (
250                PrimitiveLiteral::Long(val),
251                PrimitiveLiteral::Long(other_val),
252                PrimitiveType::Timestamptz,
253                PrimitiveType::Timestamptz,
254            ) => val.partial_cmp(other_val),
255            (
256                PrimitiveLiteral::String(val),
257                PrimitiveLiteral::String(other_val),
258                PrimitiveType::String,
259                PrimitiveType::String,
260            ) => val.partial_cmp(other_val),
261            (
262                PrimitiveLiteral::UInt128(val),
263                PrimitiveLiteral::UInt128(other_val),
264                PrimitiveType::Uuid,
265                PrimitiveType::Uuid,
266            ) => uuid::Uuid::from_u128(*val).partial_cmp(&uuid::Uuid::from_u128(*other_val)),
267            (
268                PrimitiveLiteral::Binary(val),
269                PrimitiveLiteral::Binary(other_val),
270                PrimitiveType::Fixed(_),
271                PrimitiveType::Fixed(_),
272            ) => val.partial_cmp(other_val),
273            (
274                PrimitiveLiteral::Binary(val),
275                PrimitiveLiteral::Binary(other_val),
276                PrimitiveType::Binary,
277                PrimitiveType::Binary,
278            ) => val.partial_cmp(other_val),
279            (
280                PrimitiveLiteral::Int128(val),
281                PrimitiveLiteral::Int128(other_val),
282                PrimitiveType::Decimal {
283                    precision: _,
284                    scale,
285                },
286                PrimitiveType::Decimal {
287                    precision: _,
288                    scale: other_scale,
289                },
290            ) => {
291                let val = Decimal::from_i128_with_scale(*val, *scale);
292                let other_val = Decimal::from_i128_with_scale(*other_val, *other_scale);
293                val.partial_cmp(&other_val)
294            }
295            _ => None,
296        }
297    }
298}
299
300impl Display for Datum {
301    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
302        match (&self.r#type, &self.literal) {
303            (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{val}"),
304            (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => write!(f, "{val}"),
305            (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => write!(f, "{val}"),
306            (_, PrimitiveLiteral::Float(val)) => write!(f, "{val}"),
307            (_, PrimitiveLiteral::Double(val)) => write!(f, "{val}"),
308            (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => {
309                write!(f, "{}", date::days_to_date(*val))
310            }
311            (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => {
312                write!(f, "{}", time::microseconds_to_time(*val))
313            }
314            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => {
315                write!(f, "{}", timestamp::microseconds_to_datetime(*val))
316            }
317            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => {
318                write!(f, "{}", timestamptz::microseconds_to_datetimetz(*val))
319            }
320            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => {
321                write!(f, "{}", timestamp::nanoseconds_to_datetime(*val))
322            }
323            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
324                write!(f, "{}", timestamptz::nanoseconds_to_datetimetz(*val))
325            }
326            (_, PrimitiveLiteral::String(val)) => write!(f, r#""{val}""#),
327            (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => {
328                write!(f, "{}", uuid::Uuid::from_u128(*val))
329            }
330            (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
331            (
332                PrimitiveType::Decimal {
333                    precision: _,
334                    scale,
335                },
336                PrimitiveLiteral::Int128(val),
337            ) => {
338                write!(f, "{}", Decimal::from_i128_with_scale(*val, *scale))
339            }
340            (_, _) => {
341                unreachable!()
342            }
343        }
344    }
345}
346
347fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
348    let mut s = String::with_capacity(bytes.len() * 2);
349    for b in bytes {
350        s.push_str(&format!("{b:02X}"));
351    }
352    f.write_str(&s)
353}
354
355impl From<Datum> for Literal {
356    fn from(value: Datum) -> Self {
357        Literal::Primitive(value.literal)
358    }
359}
360
361impl From<Datum> for PrimitiveLiteral {
362    fn from(value: Datum) -> Self {
363        value.literal
364    }
365}
366
367impl Datum {
368    /// Creates a `Datum` from a `PrimitiveType` and a `PrimitiveLiteral`
369    pub(crate) fn new(r#type: PrimitiveType, literal: PrimitiveLiteral) -> Self {
370        Datum { r#type, literal }
371    }
372
373    /// Create iceberg value from bytes.
374    ///
375    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
376    pub fn try_from_bytes(bytes: &[u8], data_type: PrimitiveType) -> Result<Self> {
377        let literal = match data_type {
378            PrimitiveType::Boolean => {
379                if bytes.len() == 1 && bytes[0] == 0u8 {
380                    PrimitiveLiteral::Boolean(false)
381                } else {
382                    PrimitiveLiteral::Boolean(true)
383                }
384            }
385            PrimitiveType::Int => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
386            PrimitiveType::Long => {
387                if bytes.len() == 4 {
388                    // In the case of an evolved field
389                    PrimitiveLiteral::Long(i32::from_le_bytes(bytes.try_into()?) as i64)
390                } else {
391                    PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
392                }
393            }
394            PrimitiveType::Float => {
395                PrimitiveLiteral::Float(OrderedFloat(f32::from_le_bytes(bytes.try_into()?)))
396            }
397            PrimitiveType::Double => {
398                if bytes.len() == 4 {
399                    // In the case of an evolved field
400                    PrimitiveLiteral::Double(OrderedFloat(
401                        f32::from_le_bytes(bytes.try_into()?) as f64
402                    ))
403                } else {
404                    PrimitiveLiteral::Double(OrderedFloat(f64::from_le_bytes(bytes.try_into()?)))
405                }
406            }
407            PrimitiveType::Date => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
408            PrimitiveType::Time => PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)),
409            PrimitiveType::Timestamp => {
410                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
411            }
412            PrimitiveType::Timestamptz => {
413                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
414            }
415            PrimitiveType::TimestampNs => {
416                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
417            }
418            PrimitiveType::TimestamptzNs => {
419                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
420            }
421            PrimitiveType::String => {
422                PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string())
423            }
424            PrimitiveType::Uuid => {
425                PrimitiveLiteral::UInt128(u128::from_be_bytes(bytes.try_into()?))
426            }
427            PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)),
428            PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)),
429            PrimitiveType::Decimal { .. } => {
430                let unscaled_value = BigInt::from_signed_bytes_be(bytes);
431                PrimitiveLiteral::Int128(unscaled_value.to_i128().ok_or_else(|| {
432                    Error::new(
433                        ErrorKind::DataInvalid,
434                        format!("Can't convert bytes to i128: {bytes:?}"),
435                    )
436                })?)
437            }
438        };
439        Ok(Datum::new(data_type, literal))
440    }
441
442    /// Convert the value to bytes
443    ///
444    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
445    pub fn to_bytes(&self) -> Result<ByteBuf> {
446        let buf = match &self.literal {
447            PrimitiveLiteral::Boolean(val) => {
448                if *val {
449                    ByteBuf::from([1u8])
450                } else {
451                    ByteBuf::from([0u8])
452                }
453            }
454            PrimitiveLiteral::Int(val) => ByteBuf::from(val.to_le_bytes()),
455            PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()),
456            PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()),
457            PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()),
458            PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()),
459            PrimitiveLiteral::UInt128(val) => ByteBuf::from(val.to_be_bytes()),
460            PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()),
461            PrimitiveLiteral::Int128(val) => {
462                let PrimitiveType::Decimal { precision, .. } = self.r#type else {
463                    return Err(Error::new(
464                        ErrorKind::DataInvalid,
465                        format!(
466                            "PrimitiveLiteral Int128 must be PrimitiveType Decimal but got {}",
467                            &self.r#type
468                        ),
469                    ));
470                };
471
472                // It's required by iceberg spec that we must keep the minimum
473                // number of bytes for the value
474                let Ok(required_bytes) = Type::decimal_required_bytes(precision) else {
475                    return Err(Error::new(
476                        ErrorKind::DataInvalid,
477                        format!(
478                            "PrimitiveType Decimal must has valid precision but got {precision}"
479                        ),
480                    ));
481                };
482
483                // The primitive literal is unscaled value.
484                let unscaled_value = BigInt::from(*val);
485                // Convert into two's-complement byte representation of the BigInt
486                // in big-endian byte order.
487                let mut bytes = unscaled_value.to_signed_bytes_be();
488                // Truncate with required bytes to make sure.
489                bytes.truncate(required_bytes as usize);
490
491                ByteBuf::from(bytes)
492            }
493            PrimitiveLiteral::AboveMax | PrimitiveLiteral::BelowMin => {
494                return Err(Error::new(
495                    ErrorKind::DataInvalid,
496                    "Cannot convert AboveMax or BelowMin to bytes".to_string(),
497                ));
498            }
499        };
500
501        Ok(buf)
502    }
503
504    /// Creates a boolean value.
505    ///
506    /// Example:
507    /// ```rust
508    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
509    /// let t = Datum::bool(true);
510    ///
511    /// assert_eq!(format!("{}", t), "true".to_string());
512    /// assert_eq!(
513    ///     Literal::from(t),
514    ///     Literal::Primitive(PrimitiveLiteral::Boolean(true))
515    /// );
516    /// ```
517    pub fn bool<T: Into<bool>>(t: T) -> Self {
518        Self {
519            r#type: PrimitiveType::Boolean,
520            literal: PrimitiveLiteral::Boolean(t.into()),
521        }
522    }
523
524    /// Creates a boolean value from string.
525    /// See [Parse bool from str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool) for reference.
526    ///
527    /// Example:
528    /// ```rust
529    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
530    /// let t = Datum::bool_from_str("false").unwrap();
531    ///
532    /// assert_eq!(&format!("{}", t), "false");
533    /// assert_eq!(
534    ///     Literal::Primitive(PrimitiveLiteral::Boolean(false)),
535    ///     t.into()
536    /// );
537    /// ```
538    pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
539        let v = s.as_ref().parse::<bool>().map_err(|e| {
540            Error::new(ErrorKind::DataInvalid, "Can't parse string to bool.").with_source(e)
541        })?;
542        Ok(Self::bool(v))
543    }
544
545    /// Creates an 32bit integer.
546    ///
547    /// Example:
548    /// ```rust
549    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
550    /// let t = Datum::int(23i8);
551    ///
552    /// assert_eq!(&format!("{}", t), "23");
553    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
554    /// ```
555    pub fn int<T: Into<i32>>(t: T) -> Self {
556        Self {
557            r#type: PrimitiveType::Int,
558            literal: PrimitiveLiteral::Int(t.into()),
559        }
560    }
561
562    /// Creates an 64bit integer.
563    ///
564    /// Example:
565    /// ```rust
566    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
567    /// let t = Datum::long(24i8);
568    ///
569    /// assert_eq!(&format!("{t}"), "24");
570    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
571    /// ```
572    pub fn long<T: Into<i64>>(t: T) -> Self {
573        Self {
574            r#type: PrimitiveType::Long,
575            literal: PrimitiveLiteral::Long(t.into()),
576        }
577    }
578
579    /// Creates an 32bit floating point number.
580    ///
581    /// Example:
582    /// ```rust
583    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
584    /// use ordered_float::OrderedFloat;
585    /// let t = Datum::float(32.1f32);
586    ///
587    /// assert_eq!(&format!("{t}"), "32.1");
588    /// assert_eq!(
589    ///     Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))),
590    ///     t.into()
591    /// );
592    /// ```
593    pub fn float<T: Into<f32>>(t: T) -> Self {
594        Self {
595            r#type: PrimitiveType::Float,
596            literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
597        }
598    }
599
600    /// Creates an 64bit floating point number.
601    ///
602    /// Example:
603    /// ```rust
604    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
605    /// use ordered_float::OrderedFloat;
606    /// let t = Datum::double(32.1f64);
607    ///
608    /// assert_eq!(&format!("{t}"), "32.1");
609    /// assert_eq!(
610    ///     Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))),
611    ///     t.into()
612    /// );
613    /// ```
614    pub fn double<T: Into<f64>>(t: T) -> Self {
615        Self {
616            r#type: PrimitiveType::Double,
617            literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
618        }
619    }
620
621    /// Creates date literal from number of days from unix epoch directly.
622    ///
623    /// Example:
624    /// ```rust
625    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
626    /// // 2 days after 1970-01-01
627    /// let t = Datum::date(2);
628    ///
629    /// assert_eq!(&format!("{t}"), "1970-01-03");
630    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(2)), t.into());
631    /// ```
632    pub fn date(days: i32) -> Self {
633        Self {
634            r#type: PrimitiveType::Date,
635            literal: PrimitiveLiteral::Int(days),
636        }
637    }
638
639    /// Creates date literal in `%Y-%m-%d` format, assume in utc timezone.
640    ///
641    /// See [`NaiveDate::from_str`].
642    ///
643    /// Example
644    /// ```rust
645    /// use iceberg::spec::{Datum, Literal};
646    /// let t = Datum::date_from_str("1970-01-05").unwrap();
647    ///
648    /// assert_eq!(&format!("{t}"), "1970-01-05");
649    /// assert_eq!(Literal::date(4), t.into());
650    /// ```
651    pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
652        let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
653            Error::new(
654                ErrorKind::DataInvalid,
655                format!("Can't parse date from string: {}", s.as_ref()),
656            )
657            .with_source(e)
658        })?;
659
660        Ok(Self::date(date::date_from_naive_date(t)))
661    }
662
663    /// Create date literal from calendar date (year, month and day).
664    ///
665    /// See [`NaiveDate::from_ymd_opt`].
666    ///
667    /// Example:
668    ///
669    ///```rust
670    /// use iceberg::spec::{Datum, Literal};
671    /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
672    ///
673    /// assert_eq!(&format!("{t}"), "1970-01-05");
674    /// assert_eq!(Literal::date(4), t.into());
675    /// ```
676    pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
677        let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
678            Error::new(
679                ErrorKind::DataInvalid,
680                format!("Can't create date from year: {year}, month: {month}, day: {day}"),
681            )
682        })?;
683
684        Ok(Self::date(date::date_from_naive_date(t)))
685    }
686
687    /// Creates time literal in microseconds directly.
688    ///
689    /// It will return error when it's negative or too large to fit in 24 hours.
690    ///
691    /// Example:
692    ///
693    /// ```rust
694    /// use iceberg::spec::{Datum, Literal};
695    /// let micro_secs = {
696    ///     1 * 3600 * 1_000_000 + // 1 hour
697    ///     2 * 60 * 1_000_000 +   // 2 minutes
698    ///     1 * 1_000_000 + // 1 second
699    ///     888999 // microseconds
700    /// };
701    ///
702    /// let t = Datum::time_micros(micro_secs).unwrap();
703    ///
704    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
705    /// assert_eq!(Literal::time(micro_secs), t.into());
706    ///
707    /// let negative_value = -100;
708    /// assert!(Datum::time_micros(negative_value).is_err());
709    ///
710    /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in 24 hours.
711    /// assert!(Datum::time_micros(too_large_value).is_err());
712    /// ```
713    pub fn time_micros(value: i64) -> Result<Self> {
714        ensure_data_valid!(
715            (0..=MAX_TIME_VALUE).contains(&value),
716            "Invalid value for Time type: {}",
717            value
718        );
719
720        Ok(Self {
721            r#type: PrimitiveType::Time,
722            literal: PrimitiveLiteral::Long(value),
723        })
724    }
725
726    /// Creates time literal from [`chrono::NaiveTime`].
727    fn time_from_naive_time(t: NaiveTime) -> Self {
728        let duration = t - date::unix_epoch().time();
729        // It's safe to unwrap here since less than 24 hours will never overflow.
730        let micro_secs = duration.num_microseconds().unwrap();
731
732        Self {
733            r#type: PrimitiveType::Time,
734            literal: PrimitiveLiteral::Long(micro_secs),
735        }
736    }
737
738    /// Creates time literal in microseconds in `%H:%M:%S:.f` format.
739    ///
740    /// See [`NaiveTime::from_str`] for details.
741    ///
742    /// Example:
743    /// ```rust
744    /// use iceberg::spec::{Datum, Literal};
745    /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
746    ///
747    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
748    /// ```
749    pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
750        let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
751            Error::new(
752                ErrorKind::DataInvalid,
753                format!("Can't parse time from string: {}", s.as_ref()),
754            )
755            .with_source(e)
756        })?;
757
758        Ok(Self::time_from_naive_time(t))
759    }
760
761    /// Creates time literal from hour, minute, second, and microseconds.
762    ///
763    /// See [`NaiveTime::from_hms_micro_opt`].
764    ///
765    /// Example:
766    /// ```rust
767    /// use iceberg::spec::{Datum, Literal};
768    /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
769    ///
770    /// assert_eq!(&format!("{t}"), "22:15:33.000111");
771    /// ```
772    pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> Result<Self> {
773        let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
774            .ok_or_else(|| Error::new(
775                ErrorKind::DataInvalid,
776                format!("Can't create time from hour: {hour}, min: {min}, second: {sec}, microsecond: {micro}"),
777            ))?;
778        Ok(Self::time_from_naive_time(t))
779    }
780
781    /// Creates a timestamp from unix epoch in microseconds.
782    ///
783    /// Example:
784    ///
785    /// ```rust
786    /// use iceberg::spec::Datum;
787    /// let t = Datum::timestamp_micros(1000);
788    ///
789    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
790    /// ```
791    pub fn timestamp_micros(value: i64) -> Self {
792        Self {
793            r#type: PrimitiveType::Timestamp,
794            literal: PrimitiveLiteral::Long(value),
795        }
796    }
797
798    /// Creates a timestamp from unix epoch in nanoseconds.
799    ///
800    /// Example:
801    ///
802    /// ```rust
803    /// use iceberg::spec::Datum;
804    /// let t = Datum::timestamp_nanos(1000);
805    ///
806    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001");
807    /// ```
808    pub fn timestamp_nanos(value: i64) -> Self {
809        Self {
810            r#type: PrimitiveType::TimestampNs,
811            literal: PrimitiveLiteral::Long(value),
812        }
813    }
814
815    /// Creates a timestamp from [`DateTime`].
816    ///
817    /// Example:
818    ///
819    /// ```rust
820    /// use chrono::{NaiveDate, NaiveDateTime, TimeZone, Utc};
821    /// use iceberg::spec::Datum;
822    /// let t = Datum::timestamp_from_datetime(
823    ///     NaiveDate::from_ymd_opt(1992, 3, 1)
824    ///         .unwrap()
825    ///         .and_hms_micro_opt(1, 2, 3, 88)
826    ///         .unwrap(),
827    /// );
828    ///
829    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
830    /// ```
831    pub fn timestamp_from_datetime(dt: NaiveDateTime) -> Self {
832        Self::timestamp_micros(dt.and_utc().timestamp_micros())
833    }
834
835    /// Parse a timestamp in [`%Y-%m-%dT%H:%M:%S%.f`] format.
836    ///
837    /// See [`NaiveDateTime::from_str`].
838    ///
839    /// Example:
840    ///
841    /// ```rust
842    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
843    /// use iceberg::spec::{Datum, Literal};
844    /// let t = Datum::timestamp_from_str("1992-03-01T01:02:03.000088").unwrap();
845    ///
846    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
847    /// ```
848    pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
849        let dt = s.as_ref().parse::<NaiveDateTime>().map_err(|e| {
850            Error::new(ErrorKind::DataInvalid, "Can't parse timestamp.").with_source(e)
851        })?;
852
853        Ok(Self::timestamp_from_datetime(dt))
854    }
855
856    /// Creates a timestamp with timezone from unix epoch in microseconds.
857    ///
858    /// Example:
859    ///
860    /// ```rust
861    /// use iceberg::spec::Datum;
862    /// let t = Datum::timestamptz_micros(1000);
863    ///
864    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001 UTC");
865    /// ```
866    pub fn timestamptz_micros(value: i64) -> Self {
867        Self {
868            r#type: PrimitiveType::Timestamptz,
869            literal: PrimitiveLiteral::Long(value),
870        }
871    }
872
873    /// Creates a timestamp with timezone from unix epoch in nanoseconds.
874    ///
875    /// Example:
876    ///
877    /// ```rust
878    /// use iceberg::spec::Datum;
879    /// let t = Datum::timestamptz_nanos(1000);
880    ///
881    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC");
882    /// ```
883    pub fn timestamptz_nanos(value: i64) -> Self {
884        Self {
885            r#type: PrimitiveType::TimestamptzNs,
886            literal: PrimitiveLiteral::Long(value),
887        }
888    }
889
890    /// Creates a timestamp with timezone from [`DateTime`].
891    /// Example:
892    ///
893    /// ```rust
894    /// use chrono::{TimeZone, Utc};
895    /// use iceberg::spec::Datum;
896    /// let t = Datum::timestamptz_from_datetime(Utc.timestamp_opt(1000, 0).unwrap());
897    ///
898    /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40 UTC");
899    /// ```
900    pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
901        Self::timestamptz_micros(dt.with_timezone(&Utc).timestamp_micros())
902    }
903
904    /// Parse timestamp with timezone in RFC3339 format.
905    ///
906    /// See [`DateTime::from_str`].
907    ///
908    /// Example:
909    ///
910    /// ```rust
911    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
912    /// use iceberg::spec::{Datum, Literal};
913    /// let t = Datum::timestamptz_from_str("1992-03-01T01:02:03.000088+08:00").unwrap();
914    ///
915    /// assert_eq!(&format!("{t}"), "1992-02-29 17:02:03.000088 UTC");
916    /// ```
917    pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
918        let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
919            Error::new(ErrorKind::DataInvalid, "Can't parse datetime.").with_source(e)
920        })?;
921
922        Ok(Self::timestamptz_from_datetime(dt))
923    }
924
925    /// Creates a string literal.
926    ///
927    /// Example:
928    ///
929    /// ```rust
930    /// use iceberg::spec::Datum;
931    /// let t = Datum::string("ss");
932    ///
933    /// assert_eq!(&format!("{t}"), r#""ss""#);
934    /// ```
935    pub fn string<S: ToString>(s: S) -> Self {
936        Self {
937            r#type: PrimitiveType::String,
938            literal: PrimitiveLiteral::String(s.to_string()),
939        }
940    }
941
942    /// Creates uuid literal.
943    ///
944    /// Example:
945    ///
946    /// ```rust
947    /// use iceberg::spec::Datum;
948    /// use uuid::uuid;
949    /// let t = Datum::uuid(uuid!("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"));
950    ///
951    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
952    /// ```
953    pub fn uuid(uuid: uuid::Uuid) -> Self {
954        Self {
955            r#type: PrimitiveType::Uuid,
956            literal: PrimitiveLiteral::UInt128(uuid.as_u128()),
957        }
958    }
959
960    /// Creates uuid from str. See [`uuid::Uuid::parse_str`].
961    ///
962    /// Example:
963    ///
964    /// ```rust
965    /// use iceberg::spec::Datum;
966    /// let t = Datum::uuid_from_str("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8").unwrap();
967    ///
968    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
969    /// ```
970    pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
971        let uuid = uuid::Uuid::parse_str(s.as_ref()).map_err(|e| {
972            Error::new(
973                ErrorKind::DataInvalid,
974                format!("Can't parse uuid from string: {}", s.as_ref()),
975            )
976            .with_source(e)
977        })?;
978        Ok(Self::uuid(uuid))
979    }
980
981    /// Creates a fixed literal from bytes.
982    ///
983    /// Example:
984    ///
985    /// ```rust
986    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
987    /// let t = Datum::fixed(vec![1u8, 2u8]);
988    ///
989    /// assert_eq!(&format!("{t}"), "0102");
990    /// ```
991    pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
992        let value: Vec<u8> = input.into_iter().collect();
993        Self {
994            r#type: PrimitiveType::Fixed(value.len() as u64),
995            literal: PrimitiveLiteral::Binary(value),
996        }
997    }
998
999    /// Creates a binary literal from bytes.
1000    ///
1001    /// Example:
1002    ///
1003    /// ```rust
1004    /// use iceberg::spec::Datum;
1005    /// let t = Datum::binary(vec![1u8, 100u8]);
1006    ///
1007    /// assert_eq!(&format!("{t}"), "0164");
1008    /// ```
1009    pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
1010        Self {
1011            r#type: PrimitiveType::Binary,
1012            literal: PrimitiveLiteral::Binary(input.into_iter().collect()),
1013        }
1014    }
1015
1016    /// Creates decimal literal from string. See [`Decimal::from_str_exact`].
1017    ///
1018    /// Example:
1019    ///
1020    /// ```rust
1021    /// use iceberg::spec::Datum;
1022    /// use itertools::assert_equal;
1023    /// use rust_decimal::Decimal;
1024    /// let t = Datum::decimal_from_str("123.45").unwrap();
1025    ///
1026    /// assert_eq!(&format!("{t}"), "123.45");
1027    /// ```
1028    pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
1029        let decimal = Decimal::from_str_exact(s.as_ref()).map_err(|e| {
1030            Error::new(ErrorKind::DataInvalid, "Can't parse decimal.").with_source(e)
1031        })?;
1032
1033        Self::decimal(decimal)
1034    }
1035
1036    /// Try to create a decimal literal from [`Decimal`].
1037    ///
1038    /// Example:
1039    ///
1040    /// ```rust
1041    /// use iceberg::spec::Datum;
1042    /// use rust_decimal::Decimal;
1043    ///
1044    /// let t = Datum::decimal(Decimal::new(123, 2)).unwrap();
1045    ///
1046    /// assert_eq!(&format!("{t}"), "1.23");
1047    /// ```
1048    pub fn decimal(value: impl Into<Decimal>) -> Result<Self> {
1049        let decimal = value.into();
1050        let scale = decimal.scale();
1051
1052        let r#type = Type::decimal(MAX_DECIMAL_PRECISION, scale)?;
1053        if let Type::Primitive(p) = r#type {
1054            Ok(Self {
1055                r#type: p,
1056                literal: PrimitiveLiteral::Int128(decimal.mantissa()),
1057            })
1058        } else {
1059            unreachable!("Decimal type must be primitive.")
1060        }
1061    }
1062
1063    /// Try to create a decimal literal from [`Decimal`] with precision.
1064    ///
1065    /// Example:
1066    ///
1067    /// ```rust
1068    /// use iceberg::spec::Datum;
1069    /// use rust_decimal::Decimal;
1070    ///
1071    /// let t = Datum::decimal_with_precision(Decimal::new(123, 2), 30).unwrap();
1072    ///
1073    /// assert_eq!(&format!("{t}"), "1.23");
1074    /// ```
1075    pub fn decimal_with_precision(value: impl Into<Decimal>, precision: u32) -> Result<Self> {
1076        let decimal = value.into();
1077        let scale = decimal.scale();
1078
1079        let available_bytes = Type::decimal_required_bytes(precision)? as usize;
1080        let unscaled_value = BigInt::from(decimal.mantissa());
1081        let actual_bytes = unscaled_value.to_signed_bytes_be();
1082        if actual_bytes.len() > available_bytes {
1083            return Err(Error::new(
1084                ErrorKind::DataInvalid,
1085                format!("Decimal value {decimal} is too large for precision {precision}"),
1086            ));
1087        }
1088
1089        let r#type = Type::decimal(precision, scale)?;
1090        if let Type::Primitive(p) = r#type {
1091            Ok(Self {
1092                r#type: p,
1093                literal: PrimitiveLiteral::Int128(decimal.mantissa()),
1094            })
1095        } else {
1096            unreachable!("Decimal type must be primitive.")
1097        }
1098    }
1099
1100    fn i64_to_i32<T: Into<i64> + PartialOrd<i64>>(val: T) -> Datum {
1101        if val > INT_MAX as i64 {
1102            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1103        } else if val < INT_MIN as i64 {
1104            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1105        } else {
1106            Datum::int(val.into() as i32)
1107        }
1108    }
1109
1110    fn i128_to_i32<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1111        if val > INT_MAX as i128 {
1112            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1113        } else if val < INT_MIN as i128 {
1114            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1115        } else {
1116            Datum::int(val.into() as i32)
1117        }
1118    }
1119
1120    fn i128_to_i64<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1121        if val > LONG_MAX as i128 {
1122            Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax)
1123        } else if val < LONG_MIN as i128 {
1124            Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin)
1125        } else {
1126            Datum::long(val.into() as i64)
1127        }
1128    }
1129
1130    fn string_to_i128<S: AsRef<str>>(s: S) -> Result<i128> {
1131        s.as_ref().parse::<i128>().map_err(|e| {
1132            Error::new(ErrorKind::DataInvalid, "Can't parse string to i128.").with_source(e)
1133        })
1134    }
1135
1136    /// Convert the datum to `target_type`.
1137    pub fn to(self, target_type: &Type) -> Result<Datum> {
1138        match target_type {
1139            Type::Primitive(target_primitive_type) => {
1140                match (&self.literal, &self.r#type, target_primitive_type) {
1141                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)),
1142                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)),
1143                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Long) => Ok(Datum::long(*val)),
1144                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => {
1145                        Ok(Datum::i64_to_i32(*val))
1146                    }
1147                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => {
1148                        Ok(Datum::timestamp_micros(*val))
1149                    }
1150                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => {
1151                        Ok(Datum::timestamptz_micros(*val))
1152                    }
1153                    // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775
1154                    (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => {
1155                        Ok(Datum::i128_to_i64(*val))
1156                    }
1157
1158                    (PrimitiveLiteral::String(val), _, PrimitiveType::Boolean) => {
1159                        Datum::bool_from_str(val)
1160                    }
1161                    (PrimitiveLiteral::String(val), _, PrimitiveType::Int) => {
1162                        Datum::string_to_i128(val).map(Datum::i128_to_i32)
1163                    }
1164                    (PrimitiveLiteral::String(val), _, PrimitiveType::Long) => {
1165                        Datum::string_to_i128(val).map(Datum::i128_to_i64)
1166                    }
1167                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamp) => {
1168                        Datum::timestamp_from_str(val)
1169                    }
1170                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamptz) => {
1171                        Datum::timestamptz_from_str(val)
1172                    }
1173
1174                    // TODO: implement more type conversions
1175                    (_, self_type, target_type) if self_type == target_type => Ok(self),
1176                    _ => Err(Error::new(
1177                        ErrorKind::DataInvalid,
1178                        format!(
1179                            "Can't convert datum from {} type to {} type.",
1180                            self.r#type, target_primitive_type
1181                        ),
1182                    )),
1183                }
1184            }
1185            _ => Err(Error::new(
1186                ErrorKind::DataInvalid,
1187                format!(
1188                    "Can't convert datum from {} type to {} type.",
1189                    self.r#type, target_type
1190                ),
1191            )),
1192        }
1193    }
1194
1195    /// Get the primitive literal from datum.
1196    pub fn literal(&self) -> &PrimitiveLiteral {
1197        &self.literal
1198    }
1199
1200    /// Get the primitive type from datum.
1201    pub fn data_type(&self) -> &PrimitiveType {
1202        &self.r#type
1203    }
1204
1205    /// Returns true if the Literal represents a primitive type
1206    /// that can be a NaN, and that it's value is NaN
1207    pub fn is_nan(&self) -> bool {
1208        match self.literal {
1209            PrimitiveLiteral::Double(val) => val.is_nan(),
1210            PrimitiveLiteral::Float(val) => val.is_nan(),
1211            _ => false,
1212        }
1213    }
1214
1215    /// Returns a human-readable string representation of this literal.
1216    ///
1217    /// For string literals, this returns the raw string value without quotes.
1218    /// For all other literals, it falls back to [`to_string()`].
1219    pub fn to_human_string(&self) -> String {
1220        match self.literal() {
1221            PrimitiveLiteral::String(s) => s.to_string(),
1222            _ => self.to_string(),
1223        }
1224    }
1225}