iceberg/spec/
datatypes.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/*!
19 * Data Types
20 */
21use std::collections::HashMap;
22use std::convert::identity;
23use std::fmt;
24use std::ops::Index;
25use std::sync::{Arc, OnceLock};
26
27use ::serde::de::{MapAccess, Visitor};
28use serde::de::{Error, IntoDeserializer};
29use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
30use serde_json::Value as JsonValue;
31
32use super::values::Literal;
33use crate::ensure_data_valid;
34use crate::error::Result;
35use crate::spec::PrimitiveLiteral;
36use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH};
37
38/// Field name for list type.
39pub const LIST_FIELD_NAME: &str = "element";
40/// Field name for map type's key.
41pub const MAP_KEY_FIELD_NAME: &str = "key";
42/// Field name for map type's value.
43pub const MAP_VALUE_FIELD_NAME: &str = "value";
44
45pub(crate) const MAX_DECIMAL_BYTES: u32 = 24;
46pub(crate) const MAX_DECIMAL_PRECISION: u32 = 38;
47
48mod _decimal {
49    use once_cell::sync::Lazy;
50
51    use crate::spec::{MAX_DECIMAL_BYTES, MAX_DECIMAL_PRECISION};
52
53    // Max precision of bytes, starts from 1
54    pub(super) static MAX_PRECISION: Lazy<[u32; MAX_DECIMAL_BYTES as usize]> = Lazy::new(|| {
55        let mut ret: [u32; 24] = [0; 24];
56        for (i, prec) in ret.iter_mut().enumerate() {
57            *prec = 2f64.powi((8 * (i + 1) - 1) as i32).log10().floor() as u32;
58        }
59
60        ret
61    });
62
63    //  Required bytes of precision, starts from 1
64    pub(super) static REQUIRED_LENGTH: Lazy<[u32; MAX_DECIMAL_PRECISION as usize]> =
65        Lazy::new(|| {
66            let mut ret: [u32; MAX_DECIMAL_PRECISION as usize] =
67                [0; MAX_DECIMAL_PRECISION as usize];
68
69            for (i, required_len) in ret.iter_mut().enumerate() {
70                for j in 0..MAX_PRECISION.len() {
71                    if MAX_PRECISION[j] >= ((i + 1) as u32) {
72                        *required_len = (j + 1) as u32;
73                        break;
74                    }
75                }
76            }
77
78            ret
79        });
80}
81
82#[derive(Debug, PartialEq, Eq, Clone)]
83/// All data types are either primitives or nested types, which are maps, lists, or structs.
84pub enum Type {
85    /// Primitive types
86    Primitive(PrimitiveType),
87    /// Struct type
88    Struct(StructType),
89    /// List type.
90    List(ListType),
91    /// Map type
92    Map(MapType),
93}
94
95impl fmt::Display for Type {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        match self {
98            Type::Primitive(primitive) => write!(f, "{primitive}"),
99            Type::Struct(s) => write!(f, "{s}"),
100            Type::List(_) => write!(f, "list"),
101            Type::Map(_) => write!(f, "map"),
102        }
103    }
104}
105
106impl Type {
107    /// Whether the type is primitive type.
108    #[inline(always)]
109    pub fn is_primitive(&self) -> bool {
110        matches!(self, Type::Primitive(_))
111    }
112
113    /// Whether the type is struct type.
114    #[inline(always)]
115    pub fn is_struct(&self) -> bool {
116        matches!(self, Type::Struct(_))
117    }
118
119    /// Whether the type is nested type.
120    #[inline(always)]
121    pub fn is_nested(&self) -> bool {
122        matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
123    }
124
125    /// Convert Type to reference of PrimitiveType
126    pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
127        if let Type::Primitive(primitive_type) = self {
128            Some(primitive_type)
129        } else {
130            None
131        }
132    }
133
134    /// Convert Type to StructType
135    pub fn to_struct_type(self) -> Option<StructType> {
136        if let Type::Struct(struct_type) = self {
137            Some(struct_type)
138        } else {
139            None
140        }
141    }
142
143    /// Return max precision for decimal given [`num_bytes`] bytes.
144    #[inline(always)]
145    pub fn decimal_max_precision(num_bytes: u32) -> Result<u32> {
146        ensure_data_valid!(
147            num_bytes > 0 && num_bytes <= MAX_DECIMAL_BYTES,
148            "Decimal length larger than {MAX_DECIMAL_BYTES} is not supported: {num_bytes}",
149        );
150        Ok(MAX_PRECISION[num_bytes as usize - 1])
151    }
152
153    /// Returns minimum bytes required for decimal with [`precision`].
154    #[inline(always)]
155    pub fn decimal_required_bytes(precision: u32) -> Result<u32> {
156        ensure_data_valid!(
157            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
158            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
159        );
160        Ok(REQUIRED_LENGTH[precision as usize - 1])
161    }
162
163    /// Creates  decimal type.
164    #[inline(always)]
165    pub fn decimal(precision: u32, scale: u32) -> Result<Self> {
166        ensure_data_valid!(
167            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
168            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
169        );
170        Ok(Type::Primitive(PrimitiveType::Decimal { precision, scale }))
171    }
172
173    /// Check if it's float or double type.
174    #[inline(always)]
175    pub fn is_floating_type(&self) -> bool {
176        matches!(
177            self,
178            Type::Primitive(PrimitiveType::Float) | Type::Primitive(PrimitiveType::Double)
179        )
180    }
181}
182
183impl From<PrimitiveType> for Type {
184    fn from(value: PrimitiveType) -> Self {
185        Self::Primitive(value)
186    }
187}
188
189impl From<StructType> for Type {
190    fn from(value: StructType) -> Self {
191        Type::Struct(value)
192    }
193}
194
195impl From<ListType> for Type {
196    fn from(value: ListType) -> Self {
197        Type::List(value)
198    }
199}
200
201impl From<MapType> for Type {
202    fn from(value: MapType) -> Self {
203        Type::Map(value)
204    }
205}
206
207/// Primitive data types
208#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash)]
209#[serde(rename_all = "lowercase", remote = "Self")]
210pub enum PrimitiveType {
211    /// True or False
212    Boolean,
213    /// 32-bit signed integer
214    Int,
215    /// 64-bit signed integer
216    Long,
217    /// 32-bit IEEE 754 floating point.
218    Float,
219    /// 64-bit IEEE 754 floating point.
220    Double,
221    /// Fixed point decimal
222    Decimal {
223        /// Precision, must be 38 or less
224        precision: u32,
225        /// Scale
226        scale: u32,
227    },
228    /// Calendar date without timezone or time.
229    Date,
230    /// Time of day in microsecond precision, without date or timezone.
231    Time,
232    /// Timestamp in microsecond precision, without timezone
233    Timestamp,
234    /// Timestamp in microsecond precision, with timezone
235    Timestamptz,
236    /// Timestamp in nanosecond precision, without timezone
237    #[serde(rename = "timestamp_ns")]
238    TimestampNs,
239    /// Timestamp in nanosecond precision with timezone
240    #[serde(rename = "timestamptz_ns")]
241    TimestamptzNs,
242    /// Arbitrary-length character sequences encoded in utf-8
243    String,
244    /// Universally Unique Identifiers, should use 16-byte fixed
245    Uuid,
246    /// Fixed length byte array
247    Fixed(u64),
248    /// Arbitrary-length byte array.
249    Binary,
250}
251
252impl PrimitiveType {
253    /// Check whether literal is compatible with the type.
254    pub fn compatible(&self, literal: &PrimitiveLiteral) -> bool {
255        matches!(
256            (self, literal),
257            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(_))
258                | (PrimitiveType::Int, PrimitiveLiteral::Int(_))
259                | (PrimitiveType::Long, PrimitiveLiteral::Long(_))
260                | (PrimitiveType::Float, PrimitiveLiteral::Float(_))
261                | (PrimitiveType::Double, PrimitiveLiteral::Double(_))
262                | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(_))
263                | (PrimitiveType::Date, PrimitiveLiteral::Int(_))
264                | (PrimitiveType::Time, PrimitiveLiteral::Long(_))
265                | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
266                | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
267                | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
268                | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
269                | (PrimitiveType::String, PrimitiveLiteral::String(_))
270                | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
271                | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
272                | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_))
273        )
274    }
275}
276
277impl Serialize for Type {
278    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
279    where S: Serializer {
280        let type_serde = _serde::SerdeType::from(self);
281        type_serde.serialize(serializer)
282    }
283}
284
285impl<'de> Deserialize<'de> for Type {
286    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
287    where D: Deserializer<'de> {
288        let type_serde = _serde::SerdeType::deserialize(deserializer)?;
289        Ok(Type::from(type_serde))
290    }
291}
292
293impl<'de> Deserialize<'de> for PrimitiveType {
294    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
295    where D: Deserializer<'de> {
296        let s = String::deserialize(deserializer)?;
297        if s.starts_with("decimal") {
298            deserialize_decimal(s.into_deserializer())
299        } else if s.starts_with("fixed") {
300            deserialize_fixed(s.into_deserializer())
301        } else {
302            PrimitiveType::deserialize(s.into_deserializer())
303        }
304    }
305}
306
307impl Serialize for PrimitiveType {
308    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
309    where S: Serializer {
310        match self {
311            PrimitiveType::Decimal { precision, scale } => {
312                serialize_decimal(precision, scale, serializer)
313            }
314            PrimitiveType::Fixed(l) => serialize_fixed(l, serializer),
315            _ => PrimitiveType::serialize(self, serializer),
316        }
317    }
318}
319
320fn deserialize_decimal<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
321where D: Deserializer<'de> {
322    let s = String::deserialize(deserializer)?;
323    let (precision, scale) = s
324        .trim_start_matches(r"decimal(")
325        .trim_end_matches(')')
326        .split_once(',')
327        .ok_or_else(|| D::Error::custom("Decimal requires precision and scale: {s}"))?;
328
329    Ok(PrimitiveType::Decimal {
330        precision: precision.trim().parse().map_err(D::Error::custom)?,
331        scale: scale.trim().parse().map_err(D::Error::custom)?,
332    })
333}
334
335fn serialize_decimal<S>(
336    precision: &u32,
337    scale: &u32,
338    serializer: S,
339) -> std::result::Result<S::Ok, S::Error>
340where
341    S: Serializer,
342{
343    serializer.serialize_str(&format!("decimal({precision},{scale})"))
344}
345
346fn deserialize_fixed<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
347where D: Deserializer<'de> {
348    let fixed = String::deserialize(deserializer)?
349        .trim_start_matches(r"fixed[")
350        .trim_end_matches(']')
351        .to_owned();
352
353    fixed
354        .parse()
355        .map(PrimitiveType::Fixed)
356        .map_err(D::Error::custom)
357}
358
359fn serialize_fixed<S>(value: &u64, serializer: S) -> std::result::Result<S::Ok, S::Error>
360where S: Serializer {
361    serializer.serialize_str(&format!("fixed[{value}]"))
362}
363
364impl fmt::Display for PrimitiveType {
365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
366        match self {
367            PrimitiveType::Boolean => write!(f, "boolean"),
368            PrimitiveType::Int => write!(f, "int"),
369            PrimitiveType::Long => write!(f, "long"),
370            PrimitiveType::Float => write!(f, "float"),
371            PrimitiveType::Double => write!(f, "double"),
372            PrimitiveType::Decimal { precision, scale } => {
373                write!(f, "decimal({precision},{scale})")
374            }
375            PrimitiveType::Date => write!(f, "date"),
376            PrimitiveType::Time => write!(f, "time"),
377            PrimitiveType::Timestamp => write!(f, "timestamp"),
378            PrimitiveType::Timestamptz => write!(f, "timestamptz"),
379            PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
380            PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
381            PrimitiveType::String => write!(f, "string"),
382            PrimitiveType::Uuid => write!(f, "uuid"),
383            PrimitiveType::Fixed(size) => write!(f, "fixed({size})"),
384            PrimitiveType::Binary => write!(f, "binary"),
385        }
386    }
387}
388
389/// DataType for a specific struct
390#[derive(Debug, Serialize, Clone, Default)]
391#[serde(rename = "struct", tag = "type")]
392pub struct StructType {
393    /// Struct fields
394    fields: Vec<NestedFieldRef>,
395    /// Lookup for index by field id
396    #[serde(skip_serializing)]
397    id_lookup: OnceLock<HashMap<i32, usize>>,
398    #[serde(skip_serializing)]
399    name_lookup: OnceLock<HashMap<String, usize>>,
400}
401
402impl<'de> Deserialize<'de> for StructType {
403    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
404    where D: Deserializer<'de> {
405        #[derive(Deserialize)]
406        #[serde(field_identifier, rename_all = "lowercase")]
407        enum Field {
408            Type,
409            Fields,
410        }
411
412        struct StructTypeVisitor;
413
414        impl<'de> Visitor<'de> for StructTypeVisitor {
415            type Value = StructType;
416
417            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
418                formatter.write_str("struct")
419            }
420
421            fn visit_map<V>(self, mut map: V) -> std::result::Result<StructType, V::Error>
422            where V: MapAccess<'de> {
423                let mut fields = None;
424                while let Some(key) = map.next_key()? {
425                    match key {
426                        Field::Type => {
427                            let type_val: String = map.next_value()?;
428                            if type_val != "struct" {
429                                return Err(serde::de::Error::custom(format!(
430                                    "expected type 'struct', got '{}'",
431                                    type_val
432                                )));
433                            }
434                        }
435                        Field::Fields => {
436                            if fields.is_some() {
437                                return Err(serde::de::Error::duplicate_field("fields"));
438                            }
439                            fields = Some(map.next_value()?);
440                        }
441                    }
442                }
443                let fields: Vec<NestedFieldRef> =
444                    fields.ok_or_else(|| de::Error::missing_field("fields"))?;
445
446                Ok(StructType::new(fields))
447            }
448        }
449
450        const FIELDS: &[&str] = &["type", "fields"];
451        deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
452    }
453}
454
455impl StructType {
456    /// Creates a struct type with the given fields.
457    pub fn new(fields: Vec<NestedFieldRef>) -> Self {
458        Self {
459            fields,
460            id_lookup: OnceLock::new(),
461            name_lookup: OnceLock::new(),
462        }
463    }
464
465    /// Get struct field with certain id
466    pub fn field_by_id(&self, id: i32) -> Option<&NestedFieldRef> {
467        self.field_id_to_index(id).map(|idx| &self.fields[idx])
468    }
469
470    fn field_id_to_index(&self, field_id: i32) -> Option<usize> {
471        self.id_lookup
472            .get_or_init(|| {
473                HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i)))
474            })
475            .get(&field_id)
476            .copied()
477    }
478
479    /// Get struct field with certain field name
480    pub fn field_by_name(&self, name: &str) -> Option<&NestedFieldRef> {
481        self.field_name_to_index(name).map(|idx| &self.fields[idx])
482    }
483
484    fn field_name_to_index(&self, name: &str) -> Option<usize> {
485        self.name_lookup
486            .get_or_init(|| {
487                HashMap::from_iter(
488                    self.fields
489                        .iter()
490                        .enumerate()
491                        .map(|(i, x)| (x.name.clone(), i)),
492                )
493            })
494            .get(name)
495            .copied()
496    }
497
498    /// Get fields.
499    pub fn fields(&self) -> &[NestedFieldRef] {
500        &self.fields
501    }
502}
503
504impl PartialEq for StructType {
505    fn eq(&self, other: &Self) -> bool {
506        self.fields == other.fields
507    }
508}
509
510impl Eq for StructType {}
511
512impl Index<usize> for StructType {
513    type Output = NestedField;
514
515    fn index(&self, index: usize) -> &Self::Output {
516        &self.fields[index]
517    }
518}
519
520impl fmt::Display for StructType {
521    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
522        write!(f, "struct<")?;
523        for field in &self.fields {
524            write!(f, "{}", field.field_type)?;
525        }
526        write!(f, ">")
527    }
528}
529
530#[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
531#[serde(from = "SerdeNestedField", into = "SerdeNestedField")]
532/// A struct is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema.
533/// Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type.
534/// Fields may have an optional comment or doc string. Fields can have default values.
535pub struct NestedField {
536    /// Id unique in table schema
537    pub id: i32,
538    /// Field Name
539    pub name: String,
540    /// Optional or required
541    pub required: bool,
542    /// Datatype
543    pub field_type: Box<Type>,
544    /// Fields may have an optional comment or doc string.
545    pub doc: Option<String>,
546    /// Used to populate the field’s value for all records that were written before the field was added to the schema
547    pub initial_default: Option<Literal>,
548    /// Used to populate the field’s value for any records written after the field was added to the schema, if the writer does not supply the field’s value
549    pub write_default: Option<Literal>,
550}
551
552#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
553#[serde(rename_all = "kebab-case")]
554struct SerdeNestedField {
555    pub id: i32,
556    pub name: String,
557    pub required: bool,
558    #[serde(rename = "type")]
559    pub field_type: Box<Type>,
560    #[serde(skip_serializing_if = "Option::is_none")]
561    pub doc: Option<String>,
562    #[serde(skip_serializing_if = "Option::is_none")]
563    pub initial_default: Option<JsonValue>,
564    #[serde(skip_serializing_if = "Option::is_none")]
565    pub write_default: Option<JsonValue>,
566}
567
568impl From<SerdeNestedField> for NestedField {
569    fn from(value: SerdeNestedField) -> Self {
570        NestedField {
571            id: value.id,
572            name: value.name,
573            required: value.required,
574            initial_default: value.initial_default.and_then(|x| {
575                Literal::try_from_json(x, &value.field_type)
576                    .ok()
577                    .and_then(identity)
578            }),
579            write_default: value.write_default.and_then(|x| {
580                Literal::try_from_json(x, &value.field_type)
581                    .ok()
582                    .and_then(identity)
583            }),
584            field_type: value.field_type,
585            doc: value.doc,
586        }
587    }
588}
589
590impl From<NestedField> for SerdeNestedField {
591    fn from(value: NestedField) -> Self {
592        let initial_default = value.initial_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_initial_default, it can't be converted to json value"));
593        let write_default = value.write_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_write_default, it can't be converted to json value"));
594        SerdeNestedField {
595            id: value.id,
596            name: value.name,
597            required: value.required,
598            field_type: value.field_type,
599            doc: value.doc,
600            initial_default,
601            write_default,
602        }
603    }
604}
605
606/// Reference to nested field.
607pub type NestedFieldRef = Arc<NestedField>;
608
609impl NestedField {
610    /// Construct a new field.
611    pub fn new(id: i32, name: impl ToString, field_type: Type, required: bool) -> Self {
612        Self {
613            id,
614            name: name.to_string(),
615            required,
616            field_type: Box::new(field_type),
617            doc: None,
618            initial_default: None,
619            write_default: None,
620        }
621    }
622
623    /// Construct a required field.
624    pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self {
625        Self::new(id, name, field_type, true)
626    }
627
628    /// Construct an optional field.
629    pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self {
630        Self::new(id, name, field_type, false)
631    }
632
633    /// Construct list type's element field.
634    pub fn list_element(id: i32, field_type: Type, required: bool) -> Self {
635        Self::new(id, LIST_FIELD_NAME, field_type, required)
636    }
637
638    /// Construct map type's key field.
639    pub fn map_key_element(id: i32, field_type: Type) -> Self {
640        Self::required(id, MAP_KEY_FIELD_NAME, field_type)
641    }
642
643    /// Construct map type's value field.
644    pub fn map_value_element(id: i32, field_type: Type, required: bool) -> Self {
645        Self::new(id, MAP_VALUE_FIELD_NAME, field_type, required)
646    }
647
648    /// Set the field's doc.
649    pub fn with_doc(mut self, doc: impl ToString) -> Self {
650        self.doc = Some(doc.to_string());
651        self
652    }
653
654    /// Set the field's initial default value.
655    pub fn with_initial_default(mut self, value: Literal) -> Self {
656        self.initial_default = Some(value);
657        self
658    }
659
660    /// Set the field's initial default value.
661    pub fn with_write_default(mut self, value: Literal) -> Self {
662        self.write_default = Some(value);
663        self
664    }
665
666    /// Set the id of the field.
667    pub(crate) fn with_id(mut self, id: i32) -> Self {
668        self.id = id;
669        self
670    }
671}
672
673impl fmt::Display for NestedField {
674    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
675        write!(f, "{}: ", self.id)?;
676        write!(f, "{}: ", self.name)?;
677        if self.required {
678            write!(f, "required ")?;
679        } else {
680            write!(f, "optional ")?;
681        }
682        write!(f, "{} ", self.field_type)?;
683        if let Some(doc) = &self.doc {
684            write!(f, "{doc}")?;
685        }
686        Ok(())
687    }
688}
689
690#[derive(Debug, PartialEq, Eq, Clone)]
691/// A list is a collection of values with some element type. The element field has an integer id that is unique in the table schema.
692/// Elements can be either optional or required. Element types may be any type.
693pub struct ListType {
694    /// Element field of list type.
695    pub element_field: NestedFieldRef,
696}
697
698impl ListType {
699    /// Construct a list type with the given element field.
700    pub fn new(element_field: NestedFieldRef) -> Self {
701        Self { element_field }
702    }
703}
704
705/// Module for type serialization/deserialization.
706pub(super) mod _serde {
707    use std::borrow::Cow;
708
709    use serde_derive::{Deserialize, Serialize};
710
711    use crate::spec::datatypes::Type::Map;
712    use crate::spec::datatypes::{
713        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
714    };
715
716    /// List type for serialization and deserialization
717    #[derive(Serialize, Deserialize)]
718    #[serde(untagged)]
719    pub(super) enum SerdeType<'a> {
720        #[serde(rename_all = "kebab-case")]
721        List {
722            r#type: String,
723            element_id: i32,
724            element_required: bool,
725            element: Cow<'a, Type>,
726        },
727        Struct {
728            r#type: String,
729            fields: Cow<'a, [NestedFieldRef]>,
730        },
731        #[serde(rename_all = "kebab-case")]
732        Map {
733            r#type: String,
734            key_id: i32,
735            key: Cow<'a, Type>,
736            value_id: i32,
737            value_required: bool,
738            value: Cow<'a, Type>,
739        },
740        Primitive(PrimitiveType),
741    }
742
743    impl From<SerdeType<'_>> for Type {
744        fn from(value: SerdeType) -> Self {
745            match value {
746                SerdeType::List {
747                    r#type: _,
748                    element_id,
749                    element_required,
750                    element,
751                } => Self::List(ListType {
752                    element_field: NestedField::list_element(
753                        element_id,
754                        element.into_owned(),
755                        element_required,
756                    )
757                    .into(),
758                }),
759                SerdeType::Map {
760                    r#type: _,
761                    key_id,
762                    key,
763                    value_id,
764                    value_required,
765                    value,
766                } => Map(MapType {
767                    key_field: NestedField::map_key_element(key_id, key.into_owned()).into(),
768                    value_field: NestedField::map_value_element(
769                        value_id,
770                        value.into_owned(),
771                        value_required,
772                    )
773                    .into(),
774                }),
775                SerdeType::Struct { r#type: _, fields } => {
776                    Self::Struct(StructType::new(fields.into_owned()))
777                }
778                SerdeType::Primitive(p) => Self::Primitive(p),
779            }
780        }
781    }
782
783    impl<'a> From<&'a Type> for SerdeType<'a> {
784        fn from(value: &'a Type) -> Self {
785            match value {
786                Type::List(list) => SerdeType::List {
787                    r#type: "list".to_string(),
788                    element_id: list.element_field.id,
789                    element_required: list.element_field.required,
790                    element: Cow::Borrowed(&list.element_field.field_type),
791                },
792                Type::Map(map) => SerdeType::Map {
793                    r#type: "map".to_string(),
794                    key_id: map.key_field.id,
795                    key: Cow::Borrowed(&map.key_field.field_type),
796                    value_id: map.value_field.id,
797                    value_required: map.value_field.required,
798                    value: Cow::Borrowed(&map.value_field.field_type),
799                },
800                Type::Struct(s) => SerdeType::Struct {
801                    r#type: "struct".to_string(),
802                    fields: Cow::Borrowed(&s.fields),
803                },
804                Type::Primitive(p) => SerdeType::Primitive(p.clone()),
805            }
806        }
807    }
808}
809
810#[derive(Debug, PartialEq, Eq, Clone)]
811/// A map is a collection of key-value pairs with a key type and a value type.
812/// Both the key field and value field each have an integer id that is unique in the table schema.
813/// Map keys are required and map values can be either optional or required.
814/// Both map keys and map values may be any type, including nested types.
815pub struct MapType {
816    /// Field for key.
817    pub key_field: NestedFieldRef,
818    /// Field for value.
819    pub value_field: NestedFieldRef,
820}
821
822impl MapType {
823    /// Construct a map type with the given key and value fields.
824    pub fn new(key_field: NestedFieldRef, value_field: NestedFieldRef) -> Self {
825        Self {
826            key_field,
827            value_field,
828        }
829    }
830}
831
832#[cfg(test)]
833mod tests {
834    use pretty_assertions::assert_eq;
835    use uuid::Uuid;
836
837    use super::*;
838    use crate::spec::values::PrimitiveLiteral;
839
840    fn check_type_serde(json: &str, expected_type: Type) {
841        let desered_type: Type = serde_json::from_str(json).unwrap();
842        assert_eq!(desered_type, expected_type);
843
844        let sered_json = serde_json::to_string(&expected_type).unwrap();
845        let parsed_json_value = serde_json::from_str::<serde_json::Value>(&sered_json).unwrap();
846        let raw_json_value = serde_json::from_str::<serde_json::Value>(json).unwrap();
847
848        assert_eq!(parsed_json_value, raw_json_value);
849    }
850
851    #[test]
852    fn primitive_type_serde() {
853        let record = r#"
854    {
855        "type": "struct",
856        "fields": [
857            {"id": 1, "name": "bool_field", "required": true, "type": "boolean"},
858            {"id": 2, "name": "int_field", "required": true, "type": "int"},
859            {"id": 3, "name": "long_field", "required": true, "type": "long"},
860            {"id": 4, "name": "float_field", "required": true, "type": "float"},
861            {"id": 5, "name": "double_field", "required": true, "type": "double"},
862            {"id": 6, "name": "decimal_field", "required": true, "type": "decimal(9,2)"},
863            {"id": 7, "name": "date_field", "required": true, "type": "date"},
864            {"id": 8, "name": "time_field", "required": true, "type": "time"},
865            {"id": 9, "name": "timestamp_field", "required": true, "type": "timestamp"},
866            {"id": 10, "name": "timestamptz_field", "required": true, "type": "timestamptz"},
867            {"id": 11, "name": "timestamp_ns_field", "required": true, "type": "timestamp_ns"},
868            {"id": 12, "name": "timestamptz_ns_field", "required": true, "type": "timestamptz_ns"},
869            {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"},
870            {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"},
871            {"id": 15, "name": "binary_field", "required": true, "type": "binary"},
872            {"id": 16, "name": "string_field", "required": true, "type": "string"}
873        ]
874    }
875    "#;
876
877        check_type_serde(
878            record,
879            Type::Struct(StructType {
880                fields: vec![
881                    NestedField::required(1, "bool_field", Type::Primitive(PrimitiveType::Boolean))
882                        .into(),
883                    NestedField::required(2, "int_field", Type::Primitive(PrimitiveType::Int))
884                        .into(),
885                    NestedField::required(3, "long_field", Type::Primitive(PrimitiveType::Long))
886                        .into(),
887                    NestedField::required(4, "float_field", Type::Primitive(PrimitiveType::Float))
888                        .into(),
889                    NestedField::required(
890                        5,
891                        "double_field",
892                        Type::Primitive(PrimitiveType::Double),
893                    )
894                    .into(),
895                    NestedField::required(
896                        6,
897                        "decimal_field",
898                        Type::Primitive(PrimitiveType::Decimal {
899                            precision: 9,
900                            scale: 2,
901                        }),
902                    )
903                    .into(),
904                    NestedField::required(7, "date_field", Type::Primitive(PrimitiveType::Date))
905                        .into(),
906                    NestedField::required(8, "time_field", Type::Primitive(PrimitiveType::Time))
907                        .into(),
908                    NestedField::required(
909                        9,
910                        "timestamp_field",
911                        Type::Primitive(PrimitiveType::Timestamp),
912                    )
913                    .into(),
914                    NestedField::required(
915                        10,
916                        "timestamptz_field",
917                        Type::Primitive(PrimitiveType::Timestamptz),
918                    )
919                    .into(),
920                    NestedField::required(
921                        11,
922                        "timestamp_ns_field",
923                        Type::Primitive(PrimitiveType::TimestampNs),
924                    )
925                    .into(),
926                    NestedField::required(
927                        12,
928                        "timestamptz_ns_field",
929                        Type::Primitive(PrimitiveType::TimestamptzNs),
930                    )
931                    .into(),
932                    NestedField::required(13, "uuid_field", Type::Primitive(PrimitiveType::Uuid))
933                        .into(),
934                    NestedField::required(
935                        14,
936                        "fixed_field",
937                        Type::Primitive(PrimitiveType::Fixed(10)),
938                    )
939                    .into(),
940                    NestedField::required(
941                        15,
942                        "binary_field",
943                        Type::Primitive(PrimitiveType::Binary),
944                    )
945                    .into(),
946                    NestedField::required(
947                        16,
948                        "string_field",
949                        Type::Primitive(PrimitiveType::String),
950                    )
951                    .into(),
952                ],
953                id_lookup: OnceLock::default(),
954                name_lookup: OnceLock::default(),
955            }),
956        )
957    }
958
959    #[test]
960    fn struct_type() {
961        let record = r#"
962        {
963            "type": "struct",
964            "fields": [
965                {
966                    "id": 1,
967                    "name": "id",
968                    "required": true,
969                    "type": "uuid",
970                    "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
971                    "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
972                }, {
973                    "id": 2,
974                    "name": "data",
975                    "required": false,
976                    "type": "int"
977                }
978            ]
979        }
980        "#;
981
982        check_type_serde(
983            record,
984            Type::Struct(StructType {
985                fields: vec![
986                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
987                        .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
988                            Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
989                                .unwrap()
990                                .as_u128(),
991                        )))
992                        .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
993                            Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
994                                .unwrap()
995                                .as_u128(),
996                        )))
997                        .into(),
998                    NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
999                ],
1000                id_lookup: HashMap::from([(1, 0), (2, 1)]).into(),
1001                name_lookup: HashMap::from([("id".to_string(), 0), ("data".to_string(), 1)]).into(),
1002            }),
1003        )
1004    }
1005
1006    #[test]
1007    fn test_deeply_nested_struct() {
1008        let record = r#"
1009{
1010  "type": "struct",
1011  "fields": [
1012    {
1013      "id": 1,
1014      "name": "id",
1015      "required": true,
1016      "type": "uuid",
1017      "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
1018      "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
1019    },
1020    {
1021      "id": 2,
1022      "name": "data",
1023      "required": false,
1024      "type": "int"
1025    },
1026    {
1027      "id": 3,
1028      "name": "address",
1029      "required": true,
1030      "type": {
1031        "type": "struct",
1032        "fields": [
1033          {
1034            "id": 4,
1035            "name": "street",
1036            "required": true,
1037            "type": "string"
1038          },
1039          {
1040            "id": 5,
1041            "name": "province",
1042            "required": false,
1043            "type": "string"
1044          },
1045          {
1046            "id": 6,
1047            "name": "zip",
1048            "required": true,
1049            "type": "int"
1050          }
1051        ]
1052      }
1053    }
1054  ]
1055}
1056"#;
1057
1058        let struct_type = Type::Struct(StructType::new(vec![
1059            NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
1060                .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1061                    Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
1062                        .unwrap()
1063                        .as_u128(),
1064                )))
1065                .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1066                    Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
1067                        .unwrap()
1068                        .as_u128(),
1069                )))
1070                .into(),
1071            NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
1072            NestedField::required(
1073                3,
1074                "address",
1075                Type::Struct(StructType::new(vec![
1076                    NestedField::required(4, "street", Type::Primitive(PrimitiveType::String))
1077                        .into(),
1078                    NestedField::optional(5, "province", Type::Primitive(PrimitiveType::String))
1079                        .into(),
1080                    NestedField::required(6, "zip", Type::Primitive(PrimitiveType::Int)).into(),
1081                ])),
1082            )
1083            .into(),
1084        ]));
1085
1086        check_type_serde(record, struct_type)
1087    }
1088
1089    #[test]
1090    fn list() {
1091        let record = r#"
1092        {
1093            "type": "list",
1094            "element-id": 3,
1095            "element-required": true,
1096            "element": "string"
1097        }
1098        "#;
1099
1100        check_type_serde(
1101            record,
1102            Type::List(ListType {
1103                element_field: NestedField::list_element(
1104                    3,
1105                    Type::Primitive(PrimitiveType::String),
1106                    true,
1107                )
1108                .into(),
1109            }),
1110        );
1111    }
1112
1113    #[test]
1114    fn map() {
1115        let record = r#"
1116        {
1117            "type": "map",
1118            "key-id": 4,
1119            "key": "string",
1120            "value-id": 5,
1121            "value-required": false,
1122            "value": "double"
1123        }
1124        "#;
1125
1126        check_type_serde(
1127            record,
1128            Type::Map(MapType {
1129                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::String))
1130                    .into(),
1131                value_field: NestedField::map_value_element(
1132                    5,
1133                    Type::Primitive(PrimitiveType::Double),
1134                    false,
1135                )
1136                .into(),
1137            }),
1138        );
1139    }
1140
1141    #[test]
1142    fn map_int() {
1143        let record = r#"
1144        {
1145            "type": "map",
1146            "key-id": 4,
1147            "key": "int",
1148            "value-id": 5,
1149            "value-required": false,
1150            "value": "string"
1151        }
1152        "#;
1153
1154        check_type_serde(
1155            record,
1156            Type::Map(MapType {
1157                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::Int))
1158                    .into(),
1159                value_field: NestedField::map_value_element(
1160                    5,
1161                    Type::Primitive(PrimitiveType::String),
1162                    false,
1163                )
1164                .into(),
1165            }),
1166        );
1167    }
1168
1169    #[test]
1170    fn test_decimal_precision() {
1171        let expected_max_precision = [
1172            2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55,
1173            57,
1174        ];
1175        for (i, max_precision) in expected_max_precision.iter().enumerate() {
1176            assert_eq!(
1177                *max_precision,
1178                Type::decimal_max_precision(i as u32 + 1).unwrap(),
1179                "Failed calculate max precision for {i}"
1180            );
1181        }
1182
1183        assert_eq!(5, Type::decimal_required_bytes(10).unwrap());
1184        assert_eq!(16, Type::decimal_required_bytes(38).unwrap());
1185    }
1186
1187    #[test]
1188    fn test_primitive_type_compatible() {
1189        let pairs = vec![
1190            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(true)),
1191            (PrimitiveType::Int, PrimitiveLiteral::Int(1)),
1192            (PrimitiveType::Long, PrimitiveLiteral::Long(1)),
1193            (PrimitiveType::Float, PrimitiveLiteral::Float(1.0.into())),
1194            (PrimitiveType::Double, PrimitiveLiteral::Double(1.0.into())),
1195            (
1196                PrimitiveType::Decimal {
1197                    precision: 9,
1198                    scale: 2,
1199                },
1200                PrimitiveLiteral::Int128(1),
1201            ),
1202            (PrimitiveType::Date, PrimitiveLiteral::Int(1)),
1203            (PrimitiveType::Time, PrimitiveLiteral::Long(1)),
1204            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
1205            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
1206            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
1207            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
1208            (
1209                PrimitiveType::Uuid,
1210                PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),
1211            ),
1212            (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])),
1213            (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])),
1214        ];
1215        for (ty, literal) in pairs {
1216            assert!(ty.compatible(&literal));
1217        }
1218    }
1219
1220    #[test]
1221    fn struct_type_with_type_field() {
1222        // Test that StructType properly deserializes JSON with "type":"struct" field
1223        // This was previously broken because the deserializer wasn't consuming the type field value
1224        let json = r#"
1225        {
1226            "type": "struct",
1227            "fields": [
1228                {"id": 1, "name": "field1", "required": true, "type": "string"}
1229            ]
1230        }
1231        "#;
1232
1233        let struct_type: StructType = serde_json::from_str(json)
1234            .expect("Should successfully deserialize StructType with type field");
1235
1236        assert_eq!(struct_type.fields().len(), 1);
1237        assert_eq!(struct_type.fields()[0].name, "field1");
1238    }
1239
1240    #[test]
1241    fn struct_type_rejects_wrong_type() {
1242        // Test that StructType validation rejects incorrect type field values
1243        let json = r#"
1244        {
1245            "type": "list",
1246            "fields": [
1247                {"id": 1, "name": "field1", "required": true, "type": "string"}
1248            ]
1249        }
1250        "#;
1251
1252        let result = serde_json::from_str::<StructType>(json);
1253        assert!(
1254            result.is_err(),
1255            "Should reject StructType with wrong type field"
1256        );
1257        assert!(
1258            result
1259                .unwrap_err()
1260                .to_string()
1261                .contains("expected type 'struct'")
1262        );
1263    }
1264}