iceberg/spec/values/
datum.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Typed literals with validation
19
20use std::cmp::Ordering;
21use std::fmt::{Display, Formatter};
22use std::str::FromStr;
23
24use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
25use ordered_float::{Float, OrderedFloat};
26use serde::de::{self, MapAccess};
27use serde::ser::SerializeStruct;
28use serde::{Deserialize, Serialize};
29use serde_bytes::ByteBuf;
30
31use super::decimal_utils::{
32    Decimal, decimal_from_i128_with_scale, decimal_from_str_exact, decimal_mantissa, decimal_scale,
33    i128_from_be_bytes, i128_to_be_bytes_min,
34};
35use super::literal::Literal;
36use super::primitive::PrimitiveLiteral;
37use super::serde::_serde::RawLiteral;
38use super::temporal::{date, time, timestamp, timestamptz};
39use crate::error::Result;
40use crate::spec::MAX_DECIMAL_PRECISION;
41use crate::spec::datatypes::{PrimitiveType, Type};
42use crate::{Error, ErrorKind, ensure_data_valid};
43
44/// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23 hours 59 minutes 59 seconds 999999 microseconds.
45pub(crate) const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1;
46
47pub(crate) const INT_MAX: i32 = 2147483647;
48pub(crate) const INT_MIN: i32 = -2147483648;
49pub(crate) const LONG_MAX: i64 = 9223372036854775807;
50pub(crate) const LONG_MIN: i64 = -9223372036854775808;
51
52/// Literal associated with its type. The value and type pair is checked when construction, so the type and value is
53/// guaranteed to be correct when used.
54///
55/// By default, we decouple the type and value of a literal, so we can use avoid the cost of storing extra type info
56/// for each literal. But associate type with literal can be useful in some cases, for example, in unbound expression.
57#[derive(Clone, Debug, PartialEq, Hash, Eq)]
58pub struct Datum {
59    r#type: PrimitiveType,
60    literal: PrimitiveLiteral,
61}
62
63impl Serialize for Datum {
64    fn serialize<S: serde::Serializer>(
65        &self,
66        serializer: S,
67    ) -> std::result::Result<S::Ok, S::Error> {
68        let mut struct_ser = serializer
69            .serialize_struct("Datum", 2)
70            .map_err(serde::ser::Error::custom)?;
71        struct_ser
72            .serialize_field("type", &self.r#type)
73            .map_err(serde::ser::Error::custom)?;
74        struct_ser
75            .serialize_field(
76                "literal",
77                &RawLiteral::try_from(
78                    Literal::Primitive(self.literal.clone()),
79                    &Type::Primitive(self.r#type.clone()),
80                )
81                .map_err(serde::ser::Error::custom)?,
82            )
83            .map_err(serde::ser::Error::custom)?;
84        struct_ser.end()
85    }
86}
87
88impl<'de> Deserialize<'de> for Datum {
89    fn deserialize<D: serde::Deserializer<'de>>(
90        deserializer: D,
91    ) -> std::result::Result<Self, D::Error> {
92        #[derive(Deserialize)]
93        #[serde(field_identifier, rename_all = "lowercase")]
94        enum Field {
95            Type,
96            Literal,
97        }
98
99        struct DatumVisitor;
100
101        impl<'de> serde::de::Visitor<'de> for DatumVisitor {
102            type Value = Datum;
103
104            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
105                formatter.write_str("struct Datum")
106            }
107
108            fn visit_seq<A>(self, mut seq: A) -> std::result::Result<Self::Value, A::Error>
109            where A: serde::de::SeqAccess<'de> {
110                let r#type = seq
111                    .next_element::<PrimitiveType>()?
112                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
113                let value = seq
114                    .next_element::<RawLiteral>()?
115                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
116                let Literal::Primitive(primitive) = value
117                    .try_into(&Type::Primitive(r#type.clone()))
118                    .map_err(serde::de::Error::custom)?
119                    .ok_or_else(|| serde::de::Error::custom("None value"))?
120                else {
121                    return Err(serde::de::Error::custom("Invalid value"));
122                };
123
124                Ok(Datum::new(r#type, primitive))
125            }
126
127            fn visit_map<V>(self, mut map: V) -> std::result::Result<Datum, V::Error>
128            where V: MapAccess<'de> {
129                let mut raw_primitive: Option<RawLiteral> = None;
130                let mut r#type: Option<PrimitiveType> = None;
131                while let Some(key) = map.next_key()? {
132                    match key {
133                        Field::Type => {
134                            if r#type.is_some() {
135                                return Err(de::Error::duplicate_field("type"));
136                            }
137                            r#type = Some(map.next_value()?);
138                        }
139                        Field::Literal => {
140                            if raw_primitive.is_some() {
141                                return Err(de::Error::duplicate_field("literal"));
142                            }
143                            raw_primitive = Some(map.next_value()?);
144                        }
145                    }
146                }
147                let Some(r#type) = r#type else {
148                    return Err(serde::de::Error::missing_field("type"));
149                };
150                let Some(raw_primitive) = raw_primitive else {
151                    return Err(serde::de::Error::missing_field("literal"));
152                };
153                let Literal::Primitive(primitive) = raw_primitive
154                    .try_into(&Type::Primitive(r#type.clone()))
155                    .map_err(serde::de::Error::custom)?
156                    .ok_or_else(|| serde::de::Error::custom("None value"))?
157                else {
158                    return Err(serde::de::Error::custom("Invalid value"));
159                };
160                Ok(Datum::new(r#type, primitive))
161            }
162        }
163        const FIELDS: &[&str] = &["type", "literal"];
164        deserializer.deserialize_struct("Datum", FIELDS, DatumVisitor)
165    }
166}
167
168// Compare following iceberg float ordering rules:
169//  -NaN < -Infinity < -value < -0 < 0 < value < Infinity < NaN
170fn iceberg_float_cmp_f32(a: OrderedFloat<f32>, b: OrderedFloat<f32>) -> Option<Ordering> {
171    Some(a.total_cmp(&b))
172}
173
174fn iceberg_float_cmp_f64(a: OrderedFloat<f64>, b: OrderedFloat<f64>) -> Option<Ordering> {
175    Some(a.total_cmp(&b))
176}
177
178impl PartialOrd for Datum {
179    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
180        match (&self.literal, &other.literal, &self.r#type, &other.r#type) {
181            // generate the arm with same type and same literal
182            (
183                PrimitiveLiteral::Boolean(val),
184                PrimitiveLiteral::Boolean(other_val),
185                PrimitiveType::Boolean,
186                PrimitiveType::Boolean,
187            ) => val.partial_cmp(other_val),
188            (
189                PrimitiveLiteral::Int(val),
190                PrimitiveLiteral::Int(other_val),
191                PrimitiveType::Int,
192                PrimitiveType::Int,
193            ) => val.partial_cmp(other_val),
194            (
195                PrimitiveLiteral::Long(val),
196                PrimitiveLiteral::Long(other_val),
197                PrimitiveType::Long,
198                PrimitiveType::Long,
199            ) => val.partial_cmp(other_val),
200            (
201                PrimitiveLiteral::Float(val),
202                PrimitiveLiteral::Float(other_val),
203                PrimitiveType::Float,
204                PrimitiveType::Float,
205            ) => iceberg_float_cmp_f32(*val, *other_val),
206            (
207                PrimitiveLiteral::Double(val),
208                PrimitiveLiteral::Double(other_val),
209                PrimitiveType::Double,
210                PrimitiveType::Double,
211            ) => iceberg_float_cmp_f64(*val, *other_val),
212            (
213                PrimitiveLiteral::Int(val),
214                PrimitiveLiteral::Int(other_val),
215                PrimitiveType::Date,
216                PrimitiveType::Date,
217            ) => val.partial_cmp(other_val),
218            (
219                PrimitiveLiteral::Long(val),
220                PrimitiveLiteral::Long(other_val),
221                PrimitiveType::Time,
222                PrimitiveType::Time,
223            ) => val.partial_cmp(other_val),
224            (
225                PrimitiveLiteral::Long(val),
226                PrimitiveLiteral::Long(other_val),
227                PrimitiveType::Timestamp,
228                PrimitiveType::Timestamp,
229            ) => val.partial_cmp(other_val),
230            (
231                PrimitiveLiteral::Long(val),
232                PrimitiveLiteral::Long(other_val),
233                PrimitiveType::Timestamptz,
234                PrimitiveType::Timestamptz,
235            ) => val.partial_cmp(other_val),
236            (
237                PrimitiveLiteral::Long(val),
238                PrimitiveLiteral::Long(other_val),
239                PrimitiveType::TimestampNs,
240                PrimitiveType::TimestampNs,
241            ) => val.partial_cmp(other_val),
242            (
243                PrimitiveLiteral::Long(val),
244                PrimitiveLiteral::Long(other_val),
245                PrimitiveType::TimestamptzNs,
246                PrimitiveType::TimestamptzNs,
247            ) => val.partial_cmp(other_val),
248            (
249                PrimitiveLiteral::String(val),
250                PrimitiveLiteral::String(other_val),
251                PrimitiveType::String,
252                PrimitiveType::String,
253            ) => val.partial_cmp(other_val),
254            (
255                PrimitiveLiteral::UInt128(val),
256                PrimitiveLiteral::UInt128(other_val),
257                PrimitiveType::Uuid,
258                PrimitiveType::Uuid,
259            ) => uuid::Uuid::from_u128(*val).partial_cmp(&uuid::Uuid::from_u128(*other_val)),
260            (
261                PrimitiveLiteral::Binary(val),
262                PrimitiveLiteral::Binary(other_val),
263                PrimitiveType::Fixed(_),
264                PrimitiveType::Fixed(_),
265            ) => val.partial_cmp(other_val),
266            (
267                PrimitiveLiteral::Binary(val),
268                PrimitiveLiteral::Binary(other_val),
269                PrimitiveType::Binary,
270                PrimitiveType::Binary,
271            ) => val.partial_cmp(other_val),
272            (
273                PrimitiveLiteral::Int128(val),
274                PrimitiveLiteral::Int128(other_val),
275                PrimitiveType::Decimal {
276                    precision: _,
277                    scale,
278                },
279                PrimitiveType::Decimal {
280                    precision: _,
281                    scale: other_scale,
282                },
283            ) => {
284                let val = decimal_from_i128_with_scale(*val, *scale);
285                let other_val = decimal_from_i128_with_scale(*other_val, *other_scale);
286                val.partial_cmp(&other_val)
287            }
288            _ => None,
289        }
290    }
291}
292
293impl Display for Datum {
294    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
295        match (&self.r#type, &self.literal) {
296            (_, PrimitiveLiteral::Boolean(val)) => write!(f, "{val}"),
297            (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => write!(f, "{val}"),
298            (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => write!(f, "{val}"),
299            (_, PrimitiveLiteral::Float(val)) => write!(f, "{val}"),
300            (_, PrimitiveLiteral::Double(val)) => write!(f, "{val}"),
301            (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => {
302                write!(f, "{}", date::days_to_date(*val))
303            }
304            (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => {
305                write!(f, "{}", time::microseconds_to_time(*val))
306            }
307            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => {
308                write!(f, "{}", timestamp::microseconds_to_datetime(*val))
309            }
310            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => {
311                write!(f, "{}", timestamptz::microseconds_to_datetimetz(*val))
312            }
313            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => {
314                write!(f, "{}", timestamp::nanoseconds_to_datetime(*val))
315            }
316            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
317                write!(f, "{}", timestamptz::nanoseconds_to_datetimetz(*val))
318            }
319            (_, PrimitiveLiteral::String(val)) => write!(f, r#""{val}""#),
320            (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(val)) => {
321                write!(f, "{}", uuid::Uuid::from_u128(*val))
322            }
323            (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f),
324            (
325                PrimitiveType::Decimal {
326                    precision: _,
327                    scale,
328                },
329                PrimitiveLiteral::Int128(val),
330            ) => {
331                write!(f, "{}", decimal_from_i128_with_scale(*val, *scale))
332            }
333            (_, _) => {
334                unreachable!()
335            }
336        }
337    }
338}
339
340fn display_bytes(bytes: &[u8], f: &mut Formatter<'_>) -> std::fmt::Result {
341    let mut s = String::with_capacity(bytes.len() * 2);
342    for b in bytes {
343        s.push_str(&format!("{b:02X}"));
344    }
345    f.write_str(&s)
346}
347
348impl From<Datum> for Literal {
349    fn from(value: Datum) -> Self {
350        Literal::Primitive(value.literal)
351    }
352}
353
354impl From<Datum> for PrimitiveLiteral {
355    fn from(value: Datum) -> Self {
356        value.literal
357    }
358}
359
360impl Datum {
361    /// Creates a `Datum` from a `PrimitiveType` and a `PrimitiveLiteral`
362    pub(crate) fn new(r#type: PrimitiveType, literal: PrimitiveLiteral) -> Self {
363        Datum { r#type, literal }
364    }
365
366    /// Create iceberg value from bytes.
367    ///
368    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
369    pub fn try_from_bytes(bytes: &[u8], data_type: PrimitiveType) -> Result<Self> {
370        let literal = match data_type {
371            PrimitiveType::Boolean => {
372                if bytes.len() == 1 && bytes[0] == 0u8 {
373                    PrimitiveLiteral::Boolean(false)
374                } else {
375                    PrimitiveLiteral::Boolean(true)
376                }
377            }
378            PrimitiveType::Int => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
379            PrimitiveType::Long => {
380                if bytes.len() == 4 {
381                    // In the case of an evolved field
382                    PrimitiveLiteral::Long(i32::from_le_bytes(bytes.try_into()?) as i64)
383                } else {
384                    PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
385                }
386            }
387            PrimitiveType::Float => {
388                PrimitiveLiteral::Float(OrderedFloat(f32::from_le_bytes(bytes.try_into()?)))
389            }
390            PrimitiveType::Double => {
391                if bytes.len() == 4 {
392                    // In the case of an evolved field
393                    PrimitiveLiteral::Double(OrderedFloat(
394                        f32::from_le_bytes(bytes.try_into()?) as f64
395                    ))
396                } else {
397                    PrimitiveLiteral::Double(OrderedFloat(f64::from_le_bytes(bytes.try_into()?)))
398                }
399            }
400            PrimitiveType::Date => PrimitiveLiteral::Int(i32::from_le_bytes(bytes.try_into()?)),
401            PrimitiveType::Time => PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?)),
402            PrimitiveType::Timestamp => {
403                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
404            }
405            PrimitiveType::Timestamptz => {
406                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
407            }
408            PrimitiveType::TimestampNs => {
409                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
410            }
411            PrimitiveType::TimestamptzNs => {
412                PrimitiveLiteral::Long(i64::from_le_bytes(bytes.try_into()?))
413            }
414            PrimitiveType::String => {
415                PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string())
416            }
417            PrimitiveType::Uuid => {
418                PrimitiveLiteral::UInt128(u128::from_be_bytes(bytes.try_into()?))
419            }
420            PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)),
421            PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)),
422            PrimitiveType::Decimal { .. } => {
423                PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| {
424                    Error::new(
425                        ErrorKind::DataInvalid,
426                        format!("Can't convert bytes to i128: {bytes:?}"),
427                    )
428                })?)
429            }
430        };
431        Ok(Datum::new(data_type, literal))
432    }
433
434    /// Convert the value to bytes
435    ///
436    /// See [this spec](https://iceberg.apache.org/spec/#binary-single-value-serialization) for reference.
437    pub fn to_bytes(&self) -> Result<ByteBuf> {
438        let buf = match &self.literal {
439            PrimitiveLiteral::Boolean(val) => {
440                if *val {
441                    ByteBuf::from([1u8])
442                } else {
443                    ByteBuf::from([0u8])
444                }
445            }
446            PrimitiveLiteral::Int(val) => ByteBuf::from(val.to_le_bytes()),
447            PrimitiveLiteral::Long(val) => ByteBuf::from(val.to_le_bytes()),
448            PrimitiveLiteral::Float(val) => ByteBuf::from(val.to_le_bytes()),
449            PrimitiveLiteral::Double(val) => ByteBuf::from(val.to_le_bytes()),
450            PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()),
451            PrimitiveLiteral::UInt128(val) => ByteBuf::from(val.to_be_bytes()),
452            PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()),
453            PrimitiveLiteral::Int128(val) => {
454                let PrimitiveType::Decimal { precision, .. } = self.r#type else {
455                    return Err(Error::new(
456                        ErrorKind::DataInvalid,
457                        format!(
458                            "PrimitiveLiteral Int128 must be PrimitiveType Decimal but got {}",
459                            &self.r#type
460                        ),
461                    ));
462                };
463
464                // It's required by iceberg spec that we must keep the minimum
465                // number of bytes for the value
466                let Ok(required_bytes) = Type::decimal_required_bytes(precision) else {
467                    return Err(Error::new(
468                        ErrorKind::DataInvalid,
469                        format!(
470                            "PrimitiveType Decimal must has valid precision but got {precision}"
471                        ),
472                    ));
473                };
474
475                // The primitive literal is unscaled value.
476                // Convert into two's-complement byte representation in big-endian byte order.
477                let mut bytes = i128_to_be_bytes_min(*val);
478                // Truncate with required bytes to make sure.
479                bytes.truncate(required_bytes as usize);
480
481                ByteBuf::from(bytes)
482            }
483            PrimitiveLiteral::AboveMax | PrimitiveLiteral::BelowMin => {
484                return Err(Error::new(
485                    ErrorKind::DataInvalid,
486                    "Cannot convert AboveMax or BelowMin to bytes".to_string(),
487                ));
488            }
489        };
490
491        Ok(buf)
492    }
493
494    /// Creates a boolean value.
495    ///
496    /// Example:
497    /// ```rust
498    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
499    /// let t = Datum::bool(true);
500    ///
501    /// assert_eq!(format!("{}", t), "true".to_string());
502    /// assert_eq!(
503    ///     Literal::from(t),
504    ///     Literal::Primitive(PrimitiveLiteral::Boolean(true))
505    /// );
506    /// ```
507    pub fn bool<T: Into<bool>>(t: T) -> Self {
508        Self {
509            r#type: PrimitiveType::Boolean,
510            literal: PrimitiveLiteral::Boolean(t.into()),
511        }
512    }
513
514    /// Creates a boolean value from string.
515    /// See [Parse bool from str](https://doc.rust-lang.org/stable/std/primitive.bool.html#impl-FromStr-for-bool) for reference.
516    ///
517    /// Example:
518    /// ```rust
519    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
520    /// let t = Datum::bool_from_str("false").unwrap();
521    ///
522    /// assert_eq!(&format!("{}", t), "false");
523    /// assert_eq!(
524    ///     Literal::Primitive(PrimitiveLiteral::Boolean(false)),
525    ///     t.into()
526    /// );
527    /// ```
528    pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
529        let v = s.as_ref().parse::<bool>().map_err(|e| {
530            Error::new(ErrorKind::DataInvalid, "Can't parse string to bool.").with_source(e)
531        })?;
532        Ok(Self::bool(v))
533    }
534
535    /// Creates an 32bit integer.
536    ///
537    /// Example:
538    /// ```rust
539    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
540    /// let t = Datum::int(23i8);
541    ///
542    /// assert_eq!(&format!("{}", t), "23");
543    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(23)), t.into());
544    /// ```
545    pub fn int<T: Into<i32>>(t: T) -> Self {
546        Self {
547            r#type: PrimitiveType::Int,
548            literal: PrimitiveLiteral::Int(t.into()),
549        }
550    }
551
552    /// Creates an 64bit integer.
553    ///
554    /// Example:
555    /// ```rust
556    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
557    /// let t = Datum::long(24i8);
558    ///
559    /// assert_eq!(&format!("{t}"), "24");
560    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Long(24)), t.into());
561    /// ```
562    pub fn long<T: Into<i64>>(t: T) -> Self {
563        Self {
564            r#type: PrimitiveType::Long,
565            literal: PrimitiveLiteral::Long(t.into()),
566        }
567    }
568
569    /// Creates an 32bit floating point number.
570    ///
571    /// Example:
572    /// ```rust
573    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
574    /// use ordered_float::OrderedFloat;
575    /// let t = Datum::float(32.1f32);
576    ///
577    /// assert_eq!(&format!("{t}"), "32.1");
578    /// assert_eq!(
579    ///     Literal::Primitive(PrimitiveLiteral::Float(OrderedFloat(32.1))),
580    ///     t.into()
581    /// );
582    /// ```
583    pub fn float<T: Into<f32>>(t: T) -> Self {
584        Self {
585            r#type: PrimitiveType::Float,
586            literal: PrimitiveLiteral::Float(OrderedFloat(t.into())),
587        }
588    }
589
590    /// Creates an 64bit floating point number.
591    ///
592    /// Example:
593    /// ```rust
594    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
595    /// use ordered_float::OrderedFloat;
596    /// let t = Datum::double(32.1f64);
597    ///
598    /// assert_eq!(&format!("{t}"), "32.1");
599    /// assert_eq!(
600    ///     Literal::Primitive(PrimitiveLiteral::Double(OrderedFloat(32.1))),
601    ///     t.into()
602    /// );
603    /// ```
604    pub fn double<T: Into<f64>>(t: T) -> Self {
605        Self {
606            r#type: PrimitiveType::Double,
607            literal: PrimitiveLiteral::Double(OrderedFloat(t.into())),
608        }
609    }
610
611    /// Creates date literal from number of days from unix epoch directly.
612    ///
613    /// Example:
614    /// ```rust
615    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
616    /// // 2 days after 1970-01-01
617    /// let t = Datum::date(2);
618    ///
619    /// assert_eq!(&format!("{t}"), "1970-01-03");
620    /// assert_eq!(Literal::Primitive(PrimitiveLiteral::Int(2)), t.into());
621    /// ```
622    pub fn date(days: i32) -> Self {
623        Self {
624            r#type: PrimitiveType::Date,
625            literal: PrimitiveLiteral::Int(days),
626        }
627    }
628
629    /// Creates date literal in `%Y-%m-%d` format, assume in utc timezone.
630    ///
631    /// See [`NaiveDate::from_str`].
632    ///
633    /// Example
634    /// ```rust
635    /// use iceberg::spec::{Datum, Literal};
636    /// let t = Datum::date_from_str("1970-01-05").unwrap();
637    ///
638    /// assert_eq!(&format!("{t}"), "1970-01-05");
639    /// assert_eq!(Literal::date(4), t.into());
640    /// ```
641    pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
642        let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
643            Error::new(
644                ErrorKind::DataInvalid,
645                format!("Can't parse date from string: {}", s.as_ref()),
646            )
647            .with_source(e)
648        })?;
649
650        Ok(Self::date(date::date_from_naive_date(t)))
651    }
652
653    /// Create date literal from calendar date (year, month and day).
654    ///
655    /// See [`NaiveDate::from_ymd_opt`].
656    ///
657    /// Example:
658    ///
659    ///```rust
660    /// use iceberg::spec::{Datum, Literal};
661    /// let t = Datum::date_from_ymd(1970, 1, 5).unwrap();
662    ///
663    /// assert_eq!(&format!("{t}"), "1970-01-05");
664    /// assert_eq!(Literal::date(4), t.into());
665    /// ```
666    pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
667        let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
668            Error::new(
669                ErrorKind::DataInvalid,
670                format!("Can't create date from year: {year}, month: {month}, day: {day}"),
671            )
672        })?;
673
674        Ok(Self::date(date::date_from_naive_date(t)))
675    }
676
677    /// Creates time literal in microseconds directly.
678    ///
679    /// It will return error when it's negative or too large to fit in 24 hours.
680    ///
681    /// Example:
682    ///
683    /// ```rust
684    /// use iceberg::spec::{Datum, Literal};
685    /// let micro_secs = {
686    ///     1 * 3600 * 1_000_000 + // 1 hour
687    ///     2 * 60 * 1_000_000 +   // 2 minutes
688    ///     1 * 1_000_000 + // 1 second
689    ///     888999 // microseconds
690    /// };
691    ///
692    /// let t = Datum::time_micros(micro_secs).unwrap();
693    ///
694    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
695    /// assert_eq!(Literal::time(micro_secs), t.into());
696    ///
697    /// let negative_value = -100;
698    /// assert!(Datum::time_micros(negative_value).is_err());
699    ///
700    /// let too_large_value = 36 * 60 * 60 * 1_000_000; // Too large to fit in 24 hours.
701    /// assert!(Datum::time_micros(too_large_value).is_err());
702    /// ```
703    pub fn time_micros(value: i64) -> Result<Self> {
704        ensure_data_valid!(
705            (0..=MAX_TIME_VALUE).contains(&value),
706            "Invalid value for Time type: {}",
707            value
708        );
709
710        Ok(Self {
711            r#type: PrimitiveType::Time,
712            literal: PrimitiveLiteral::Long(value),
713        })
714    }
715
716    /// Creates time literal from [`chrono::NaiveTime`].
717    fn time_from_naive_time(t: NaiveTime) -> Self {
718        let duration = t - date::unix_epoch().time();
719        // It's safe to unwrap here since less than 24 hours will never overflow.
720        let micro_secs = duration.num_microseconds().unwrap();
721
722        Self {
723            r#type: PrimitiveType::Time,
724            literal: PrimitiveLiteral::Long(micro_secs),
725        }
726    }
727
728    /// Creates time literal in microseconds in `%H:%M:%S:.f` format.
729    ///
730    /// See [`NaiveTime::from_str`] for details.
731    ///
732    /// Example:
733    /// ```rust
734    /// use iceberg::spec::{Datum, Literal};
735    /// let t = Datum::time_from_str("01:02:01.888999777").unwrap();
736    ///
737    /// assert_eq!(&format!("{t}"), "01:02:01.888999");
738    /// ```
739    pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
740        let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
741            Error::new(
742                ErrorKind::DataInvalid,
743                format!("Can't parse time from string: {}", s.as_ref()),
744            )
745            .with_source(e)
746        })?;
747
748        Ok(Self::time_from_naive_time(t))
749    }
750
751    /// Creates time literal from hour, minute, second, and microseconds.
752    ///
753    /// See [`NaiveTime::from_hms_micro_opt`].
754    ///
755    /// Example:
756    /// ```rust
757    /// use iceberg::spec::{Datum, Literal};
758    /// let t = Datum::time_from_hms_micro(22, 15, 33, 111).unwrap();
759    ///
760    /// assert_eq!(&format!("{t}"), "22:15:33.000111");
761    /// ```
762    pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> Result<Self> {
763        let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
764            .ok_or_else(|| Error::new(
765                ErrorKind::DataInvalid,
766                format!("Can't create time from hour: {hour}, min: {min}, second: {sec}, microsecond: {micro}"),
767            ))?;
768        Ok(Self::time_from_naive_time(t))
769    }
770
771    /// Creates a timestamp from unix epoch in microseconds.
772    ///
773    /// Example:
774    ///
775    /// ```rust
776    /// use iceberg::spec::Datum;
777    /// let t = Datum::timestamp_micros(1000);
778    ///
779    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001");
780    /// ```
781    pub fn timestamp_micros(value: i64) -> Self {
782        Self {
783            r#type: PrimitiveType::Timestamp,
784            literal: PrimitiveLiteral::Long(value),
785        }
786    }
787
788    /// Creates a timestamp from unix epoch in nanoseconds.
789    ///
790    /// Example:
791    ///
792    /// ```rust
793    /// use iceberg::spec::Datum;
794    /// let t = Datum::timestamp_nanos(1000);
795    ///
796    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001");
797    /// ```
798    pub fn timestamp_nanos(value: i64) -> Self {
799        Self {
800            r#type: PrimitiveType::TimestampNs,
801            literal: PrimitiveLiteral::Long(value),
802        }
803    }
804
805    /// Creates a timestamp from [`DateTime`].
806    ///
807    /// Example:
808    ///
809    /// ```rust
810    /// use chrono::{NaiveDate, NaiveDateTime, TimeZone, Utc};
811    /// use iceberg::spec::Datum;
812    /// let t = Datum::timestamp_from_datetime(
813    ///     NaiveDate::from_ymd_opt(1992, 3, 1)
814    ///         .unwrap()
815    ///         .and_hms_micro_opt(1, 2, 3, 88)
816    ///         .unwrap(),
817    /// );
818    ///
819    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
820    /// ```
821    pub fn timestamp_from_datetime(dt: NaiveDateTime) -> Self {
822        Self::timestamp_micros(dt.and_utc().timestamp_micros())
823    }
824
825    /// Parse a timestamp in [`%Y-%m-%dT%H:%M:%S%.f`] format.
826    ///
827    /// See [`NaiveDateTime::from_str`].
828    ///
829    /// Example:
830    ///
831    /// ```rust
832    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
833    /// use iceberg::spec::{Datum, Literal};
834    /// let t = Datum::timestamp_from_str("1992-03-01T01:02:03.000088").unwrap();
835    ///
836    /// assert_eq!(&format!("{t}"), "1992-03-01 01:02:03.000088");
837    /// ```
838    pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
839        let dt = s.as_ref().parse::<NaiveDateTime>().map_err(|e| {
840            Error::new(ErrorKind::DataInvalid, "Can't parse timestamp.").with_source(e)
841        })?;
842
843        Ok(Self::timestamp_from_datetime(dt))
844    }
845
846    /// Creates a timestamp with timezone from unix epoch in microseconds.
847    ///
848    /// Example:
849    ///
850    /// ```rust
851    /// use iceberg::spec::Datum;
852    /// let t = Datum::timestamptz_micros(1000);
853    ///
854    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.001 UTC");
855    /// ```
856    pub fn timestamptz_micros(value: i64) -> Self {
857        Self {
858            r#type: PrimitiveType::Timestamptz,
859            literal: PrimitiveLiteral::Long(value),
860        }
861    }
862
863    /// Creates a timestamp with timezone from unix epoch in nanoseconds.
864    ///
865    /// Example:
866    ///
867    /// ```rust
868    /// use iceberg::spec::Datum;
869    /// let t = Datum::timestamptz_nanos(1000);
870    ///
871    /// assert_eq!(&format!("{t}"), "1970-01-01 00:00:00.000001 UTC");
872    /// ```
873    pub fn timestamptz_nanos(value: i64) -> Self {
874        Self {
875            r#type: PrimitiveType::TimestamptzNs,
876            literal: PrimitiveLiteral::Long(value),
877        }
878    }
879
880    /// Creates a timestamp with timezone from [`DateTime`].
881    /// Example:
882    ///
883    /// ```rust
884    /// use chrono::{TimeZone, Utc};
885    /// use iceberg::spec::Datum;
886    /// let t = Datum::timestamptz_from_datetime(Utc.timestamp_opt(1000, 0).unwrap());
887    ///
888    /// assert_eq!(&format!("{t}"), "1970-01-01 00:16:40 UTC");
889    /// ```
890    pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
891        Self::timestamptz_micros(dt.with_timezone(&Utc).timestamp_micros())
892    }
893
894    /// Parse timestamp with timezone in RFC3339 format.
895    ///
896    /// See [`DateTime::from_str`].
897    ///
898    /// Example:
899    ///
900    /// ```rust
901    /// use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
902    /// use iceberg::spec::{Datum, Literal};
903    /// let t = Datum::timestamptz_from_str("1992-03-01T01:02:03.000088+08:00").unwrap();
904    ///
905    /// assert_eq!(&format!("{t}"), "1992-02-29 17:02:03.000088 UTC");
906    /// ```
907    pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
908        let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
909            Error::new(ErrorKind::DataInvalid, "Can't parse datetime.").with_source(e)
910        })?;
911
912        Ok(Self::timestamptz_from_datetime(dt))
913    }
914
915    /// Creates a string literal.
916    ///
917    /// Example:
918    ///
919    /// ```rust
920    /// use iceberg::spec::Datum;
921    /// let t = Datum::string("ss");
922    ///
923    /// assert_eq!(&format!("{t}"), r#""ss""#);
924    /// ```
925    pub fn string<S: ToString>(s: S) -> Self {
926        Self {
927            r#type: PrimitiveType::String,
928            literal: PrimitiveLiteral::String(s.to_string()),
929        }
930    }
931
932    /// Creates uuid literal.
933    ///
934    /// Example:
935    ///
936    /// ```rust
937    /// use iceberg::spec::Datum;
938    /// use uuid::uuid;
939    /// let t = Datum::uuid(uuid!("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"));
940    ///
941    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
942    /// ```
943    pub fn uuid(uuid: uuid::Uuid) -> Self {
944        Self {
945            r#type: PrimitiveType::Uuid,
946            literal: PrimitiveLiteral::UInt128(uuid.as_u128()),
947        }
948    }
949
950    /// Creates uuid from str. See [`uuid::Uuid::parse_str`].
951    ///
952    /// Example:
953    ///
954    /// ```rust
955    /// use iceberg::spec::Datum;
956    /// let t = Datum::uuid_from_str("a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8").unwrap();
957    ///
958    /// assert_eq!(&format!("{t}"), "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8");
959    /// ```
960    pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
961        let uuid = uuid::Uuid::parse_str(s.as_ref()).map_err(|e| {
962            Error::new(
963                ErrorKind::DataInvalid,
964                format!("Can't parse uuid from string: {}", s.as_ref()),
965            )
966            .with_source(e)
967        })?;
968        Ok(Self::uuid(uuid))
969    }
970
971    /// Creates a fixed literal from bytes.
972    ///
973    /// Example:
974    ///
975    /// ```rust
976    /// use iceberg::spec::{Datum, Literal, PrimitiveLiteral};
977    /// let t = Datum::fixed(vec![1u8, 2u8]);
978    ///
979    /// assert_eq!(&format!("{t}"), "0102");
980    /// ```
981    pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
982        let value: Vec<u8> = input.into_iter().collect();
983        Self {
984            r#type: PrimitiveType::Fixed(value.len() as u64),
985            literal: PrimitiveLiteral::Binary(value),
986        }
987    }
988
989    /// Creates a binary literal from bytes.
990    ///
991    /// Example:
992    ///
993    /// ```rust
994    /// use iceberg::spec::Datum;
995    /// let t = Datum::binary(vec![1u8, 100u8]);
996    ///
997    /// assert_eq!(&format!("{t}"), "0164");
998    /// ```
999    pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
1000        Self {
1001            r#type: PrimitiveType::Binary,
1002            literal: PrimitiveLiteral::Binary(input.into_iter().collect()),
1003        }
1004    }
1005
1006    /// Creates decimal literal from string.
1007    ///
1008    /// Example:
1009    ///
1010    /// ```rust
1011    /// use iceberg::spec::Datum;
1012    /// let t = Datum::decimal_from_str("123.45").unwrap();
1013    ///
1014    /// assert_eq!(&format!("{t}"), "123.45");
1015    /// ```
1016    pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
1017        let decimal = decimal_from_str_exact(s.as_ref())?;
1018
1019        Self::decimal(decimal)
1020    }
1021
1022    /// Try to create a decimal literal from [`Decimal`].
1023    ///
1024    /// Example:
1025    ///
1026    /// ```rust
1027    /// use iceberg::spec::Datum;
1028    ///
1029    /// let t = Datum::decimal_from_str("1.23").unwrap();
1030    ///
1031    /// assert_eq!(&format!("{t}"), "1.23");
1032    /// ```
1033    pub fn decimal(value: Decimal) -> Result<Self> {
1034        let scale = decimal_scale(&value);
1035
1036        let r#type = Type::decimal(MAX_DECIMAL_PRECISION, scale)?;
1037        if let Type::Primitive(p) = r#type {
1038            Ok(Self {
1039                r#type: p,
1040                literal: PrimitiveLiteral::Int128(decimal_mantissa(&value)),
1041            })
1042        } else {
1043            unreachable!("Decimal type must be primitive.")
1044        }
1045    }
1046
1047    /// Try to create a decimal literal from [`Decimal`] with precision.
1048    ///
1049    /// This method allows specifying a custom precision for the decimal type,
1050    /// which is useful when you need to control the storage requirements.
1051    /// Use [`Datum::decimal`] if you want to use the maximum precision (38).
1052    pub fn decimal_with_precision(value: Decimal, precision: u32) -> Result<Self> {
1053        let scale = decimal_scale(&value);
1054        let mantissa = decimal_mantissa(&value);
1055
1056        let available_bytes = Type::decimal_required_bytes(precision)? as usize;
1057        let actual_bytes = i128_to_be_bytes_min(mantissa);
1058        if actual_bytes.len() > available_bytes {
1059            return Err(Error::new(
1060                ErrorKind::DataInvalid,
1061                format!("Decimal value {value} is too large for precision {precision}"),
1062            ));
1063        }
1064
1065        let r#type = Type::decimal(precision, scale)?;
1066        if let Type::Primitive(p) = r#type {
1067            Ok(Self {
1068                r#type: p,
1069                literal: PrimitiveLiteral::Int128(mantissa),
1070            })
1071        } else {
1072            unreachable!("Decimal type must be primitive.")
1073        }
1074    }
1075
1076    fn i64_to_i32<T: Into<i64> + PartialOrd<i64>>(val: T) -> Datum {
1077        if val > INT_MAX as i64 {
1078            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1079        } else if val < INT_MIN as i64 {
1080            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1081        } else {
1082            Datum::int(val.into() as i32)
1083        }
1084    }
1085
1086    fn i128_to_i32<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1087        if val > INT_MAX as i128 {
1088            Datum::new(PrimitiveType::Int, PrimitiveLiteral::AboveMax)
1089        } else if val < INT_MIN as i128 {
1090            Datum::new(PrimitiveType::Int, PrimitiveLiteral::BelowMin)
1091        } else {
1092            Datum::int(val.into() as i32)
1093        }
1094    }
1095
1096    fn i128_to_i64<T: Into<i128> + PartialOrd<i128>>(val: T) -> Datum {
1097        if val > LONG_MAX as i128 {
1098            Datum::new(PrimitiveType::Long, PrimitiveLiteral::AboveMax)
1099        } else if val < LONG_MIN as i128 {
1100            Datum::new(PrimitiveType::Long, PrimitiveLiteral::BelowMin)
1101        } else {
1102            Datum::long(val.into() as i64)
1103        }
1104    }
1105
1106    fn string_to_i128<S: AsRef<str>>(s: S) -> Result<i128> {
1107        s.as_ref().parse::<i128>().map_err(|e| {
1108            Error::new(ErrorKind::DataInvalid, "Can't parse string to i128.").with_source(e)
1109        })
1110    }
1111
1112    /// Convert the datum to `target_type`.
1113    pub fn to(self, target_type: &Type) -> Result<Datum> {
1114        match target_type {
1115            Type::Primitive(target_primitive_type) => {
1116                match (&self.literal, &self.r#type, target_primitive_type) {
1117                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)),
1118                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)),
1119                    (PrimitiveLiteral::Int(val), _, PrimitiveType::Long) => Ok(Datum::long(*val)),
1120                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Int) => {
1121                        Ok(Datum::i64_to_i32(*val))
1122                    }
1123                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamp) => {
1124                        Ok(Datum::timestamp_micros(*val))
1125                    }
1126                    (PrimitiveLiteral::Long(val), _, PrimitiveType::Timestamptz) => {
1127                        Ok(Datum::timestamptz_micros(*val))
1128                    }
1129                    // Let's wait with nano's until this clears up: https://github.com/apache/iceberg/pull/11775
1130                    (PrimitiveLiteral::Int128(val), _, PrimitiveType::Long) => {
1131                        Ok(Datum::i128_to_i64(*val))
1132                    }
1133
1134                    (PrimitiveLiteral::String(val), _, PrimitiveType::Boolean) => {
1135                        Datum::bool_from_str(val)
1136                    }
1137                    (PrimitiveLiteral::String(val), _, PrimitiveType::Int) => {
1138                        Datum::string_to_i128(val).map(Datum::i128_to_i32)
1139                    }
1140                    (PrimitiveLiteral::String(val), _, PrimitiveType::Long) => {
1141                        Datum::string_to_i128(val).map(Datum::i128_to_i64)
1142                    }
1143                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamp) => {
1144                        Datum::timestamp_from_str(val)
1145                    }
1146                    (PrimitiveLiteral::String(val), _, PrimitiveType::Timestamptz) => {
1147                        Datum::timestamptz_from_str(val)
1148                    }
1149
1150                    // TODO: implement more type conversions
1151                    (_, self_type, target_type) if self_type == target_type => Ok(self),
1152                    _ => Err(Error::new(
1153                        ErrorKind::DataInvalid,
1154                        format!(
1155                            "Can't convert datum from {} type to {} type.",
1156                            self.r#type, target_primitive_type
1157                        ),
1158                    )),
1159                }
1160            }
1161            _ => Err(Error::new(
1162                ErrorKind::DataInvalid,
1163                format!(
1164                    "Can't convert datum from {} type to {} type.",
1165                    self.r#type, target_type
1166                ),
1167            )),
1168        }
1169    }
1170
1171    /// Get the primitive literal from datum.
1172    pub fn literal(&self) -> &PrimitiveLiteral {
1173        &self.literal
1174    }
1175
1176    /// Get the primitive type from datum.
1177    pub fn data_type(&self) -> &PrimitiveType {
1178        &self.r#type
1179    }
1180
1181    /// Returns true if the Literal represents a primitive type
1182    /// that can be a NaN, and that it's value is NaN
1183    pub fn is_nan(&self) -> bool {
1184        match self.literal {
1185            PrimitiveLiteral::Double(val) => val.is_nan(),
1186            PrimitiveLiteral::Float(val) => val.is_nan(),
1187            _ => false,
1188        }
1189    }
1190
1191    /// Returns a human-readable string representation of this literal.
1192    ///
1193    /// For string literals, this returns the raw string value without quotes.
1194    /// For all other literals, it falls back to [`to_string()`].
1195    pub fn to_human_string(&self) -> String {
1196        match self.literal() {
1197            PrimitiveLiteral::String(s) => s.to_string(),
1198            _ => self.to_string(),
1199        }
1200    }
1201}