iceberg/spec/
datatypes.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/*!
19 * Data Types
20 */
21use std::collections::HashMap;
22use std::convert::identity;
23use std::fmt;
24use std::ops::Index;
25use std::sync::{Arc, OnceLock};
26
27use ::serde::de::{MapAccess, Visitor};
28use serde::de::{Error, IntoDeserializer};
29use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
30use serde_json::Value as JsonValue;
31
32use super::values::Literal;
33use crate::ensure_data_valid;
34use crate::error::Result;
35use crate::spec::PrimitiveLiteral;
36use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH};
37
38/// Field name for list type.
39pub const LIST_FIELD_NAME: &str = "element";
40/// Field name for map type's key.
41pub const MAP_KEY_FIELD_NAME: &str = "key";
42/// Field name for map type's value.
43pub const MAP_VALUE_FIELD_NAME: &str = "value";
44
45pub(crate) const MAX_DECIMAL_BYTES: u32 = 24;
46pub(crate) const MAX_DECIMAL_PRECISION: u32 = 38;
47
48mod _decimal {
49    use once_cell::sync::Lazy;
50
51    use crate::spec::{MAX_DECIMAL_BYTES, MAX_DECIMAL_PRECISION};
52
53    // Max precision of bytes, starts from 1
54    pub(super) static MAX_PRECISION: Lazy<[u32; MAX_DECIMAL_BYTES as usize]> = Lazy::new(|| {
55        let mut ret: [u32; 24] = [0; 24];
56        for (i, prec) in ret.iter_mut().enumerate() {
57            *prec = 2f64.powi((8 * (i + 1) - 1) as i32).log10().floor() as u32;
58        }
59
60        ret
61    });
62
63    //  Required bytes of precision, starts from 1
64    pub(super) static REQUIRED_LENGTH: Lazy<[u32; MAX_DECIMAL_PRECISION as usize]> =
65        Lazy::new(|| {
66            let mut ret: [u32; MAX_DECIMAL_PRECISION as usize] =
67                [0; MAX_DECIMAL_PRECISION as usize];
68
69            for (i, required_len) in ret.iter_mut().enumerate() {
70                for j in 0..MAX_PRECISION.len() {
71                    if MAX_PRECISION[j] >= ((i + 1) as u32) {
72                        *required_len = (j + 1) as u32;
73                        break;
74                    }
75                }
76            }
77
78            ret
79        });
80}
81
82#[derive(Debug, PartialEq, Eq, Clone)]
83/// All data types are either primitives or nested types, which are maps, lists, or structs.
84pub enum Type {
85    /// Primitive types
86    Primitive(PrimitiveType),
87    /// Struct type
88    Struct(StructType),
89    /// List type.
90    List(ListType),
91    /// Map type
92    Map(MapType),
93}
94
95impl fmt::Display for Type {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        match self {
98            Type::Primitive(primitive) => write!(f, "{primitive}"),
99            Type::Struct(s) => write!(f, "{s}"),
100            Type::List(_) => write!(f, "list"),
101            Type::Map(_) => write!(f, "map"),
102        }
103    }
104}
105
106impl Type {
107    /// Whether the type is primitive type.
108    #[inline(always)]
109    pub fn is_primitive(&self) -> bool {
110        matches!(self, Type::Primitive(_))
111    }
112
113    /// Whether the type is struct type.
114    #[inline(always)]
115    pub fn is_struct(&self) -> bool {
116        matches!(self, Type::Struct(_))
117    }
118
119    /// Whether the type is nested type.
120    #[inline(always)]
121    pub fn is_nested(&self) -> bool {
122        matches!(self, Type::Struct(_) | Type::List(_) | Type::Map(_))
123    }
124
125    /// Convert Type to reference of PrimitiveType
126    pub fn as_primitive_type(&self) -> Option<&PrimitiveType> {
127        if let Type::Primitive(primitive_type) = self {
128            Some(primitive_type)
129        } else {
130            None
131        }
132    }
133
134    /// Convert Type to StructType
135    pub fn to_struct_type(self) -> Option<StructType> {
136        if let Type::Struct(struct_type) = self {
137            Some(struct_type)
138        } else {
139            None
140        }
141    }
142
143    /// Return max precision for decimal given [`num_bytes`] bytes.
144    #[inline(always)]
145    pub fn decimal_max_precision(num_bytes: u32) -> Result<u32> {
146        ensure_data_valid!(
147            num_bytes > 0 && num_bytes <= MAX_DECIMAL_BYTES,
148            "Decimal length larger than {MAX_DECIMAL_BYTES} is not supported: {num_bytes}",
149        );
150        Ok(MAX_PRECISION[num_bytes as usize - 1])
151    }
152
153    /// Returns minimum bytes required for decimal with [`precision`].
154    #[inline(always)]
155    pub fn decimal_required_bytes(precision: u32) -> Result<u32> {
156        ensure_data_valid!(
157            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
158            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
159        );
160        Ok(REQUIRED_LENGTH[precision as usize - 1])
161    }
162
163    /// Creates  decimal type.
164    #[inline(always)]
165    pub fn decimal(precision: u32, scale: u32) -> Result<Self> {
166        ensure_data_valid!(
167            precision > 0 && precision <= MAX_DECIMAL_PRECISION,
168            "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",
169        );
170        Ok(Type::Primitive(PrimitiveType::Decimal { precision, scale }))
171    }
172
173    /// Check if it's float or double type.
174    #[inline(always)]
175    pub fn is_floating_type(&self) -> bool {
176        matches!(
177            self,
178            Type::Primitive(PrimitiveType::Float) | Type::Primitive(PrimitiveType::Double)
179        )
180    }
181}
182
183impl From<PrimitiveType> for Type {
184    fn from(value: PrimitiveType) -> Self {
185        Self::Primitive(value)
186    }
187}
188
189impl From<StructType> for Type {
190    fn from(value: StructType) -> Self {
191        Type::Struct(value)
192    }
193}
194
195impl From<ListType> for Type {
196    fn from(value: ListType) -> Self {
197        Type::List(value)
198    }
199}
200
201impl From<MapType> for Type {
202    fn from(value: MapType) -> Self {
203        Type::Map(value)
204    }
205}
206
207/// Primitive data types
208#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Hash)]
209#[serde(rename_all = "lowercase", remote = "Self")]
210pub enum PrimitiveType {
211    /// True or False
212    Boolean,
213    /// 32-bit signed integer
214    Int,
215    /// 64-bit signed integer
216    Long,
217    /// 32-bit IEEE 754 floating point.
218    Float,
219    /// 64-bit IEEE 754 floating point.
220    Double,
221    /// Fixed point decimal
222    Decimal {
223        /// Precision, must be 38 or less
224        precision: u32,
225        /// Scale
226        scale: u32,
227    },
228    /// Calendar date without timezone or time.
229    Date,
230    /// Time of day in microsecond precision, without date or timezone.
231    Time,
232    /// Timestamp in microsecond precision, without timezone
233    Timestamp,
234    /// Timestamp in microsecond precision, with timezone
235    Timestamptz,
236    /// Timestamp in nanosecond precision, without timezone
237    #[serde(rename = "timestamp_ns")]
238    TimestampNs,
239    /// Timestamp in nanosecond precision with timezone
240    #[serde(rename = "timestamptz_ns")]
241    TimestamptzNs,
242    /// Arbitrary-length character sequences encoded in utf-8
243    String,
244    /// Universally Unique Identifiers, should use 16-byte fixed
245    Uuid,
246    /// Fixed length byte array
247    Fixed(u64),
248    /// Arbitrary-length byte array.
249    Binary,
250}
251
252impl PrimitiveType {
253    /// Check whether literal is compatible with the type.
254    pub fn compatible(&self, literal: &PrimitiveLiteral) -> bool {
255        matches!(
256            (self, literal),
257            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(_))
258                | (PrimitiveType::Int, PrimitiveLiteral::Int(_))
259                | (PrimitiveType::Long, PrimitiveLiteral::Long(_))
260                | (PrimitiveType::Float, PrimitiveLiteral::Float(_))
261                | (PrimitiveType::Double, PrimitiveLiteral::Double(_))
262                | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Int128(_))
263                | (PrimitiveType::Date, PrimitiveLiteral::Int(_))
264                | (PrimitiveType::Time, PrimitiveLiteral::Long(_))
265                | (PrimitiveType::Timestamp, PrimitiveLiteral::Long(_))
266                | (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(_))
267                | (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(_))
268                | (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(_))
269                | (PrimitiveType::String, PrimitiveLiteral::String(_))
270                | (PrimitiveType::Uuid, PrimitiveLiteral::UInt128(_))
271                | (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(_))
272                | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_))
273        )
274    }
275}
276
277impl Serialize for Type {
278    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
279    where S: Serializer {
280        let type_serde = _serde::SerdeType::from(self);
281        type_serde.serialize(serializer)
282    }
283}
284
285impl<'de> Deserialize<'de> for Type {
286    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
287    where D: Deserializer<'de> {
288        let type_serde = _serde::SerdeType::deserialize(deserializer)?;
289        Ok(Type::from(type_serde))
290    }
291}
292
293impl<'de> Deserialize<'de> for PrimitiveType {
294    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
295    where D: Deserializer<'de> {
296        let s = String::deserialize(deserializer)?;
297        if s.starts_with("decimal") {
298            deserialize_decimal(s.into_deserializer())
299        } else if s.starts_with("fixed") {
300            deserialize_fixed(s.into_deserializer())
301        } else {
302            PrimitiveType::deserialize(s.into_deserializer())
303        }
304    }
305}
306
307impl Serialize for PrimitiveType {
308    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
309    where S: Serializer {
310        match self {
311            PrimitiveType::Decimal { precision, scale } => {
312                serialize_decimal(precision, scale, serializer)
313            }
314            PrimitiveType::Fixed(l) => serialize_fixed(l, serializer),
315            _ => PrimitiveType::serialize(self, serializer),
316        }
317    }
318}
319
320fn deserialize_decimal<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
321where D: Deserializer<'de> {
322    let s = String::deserialize(deserializer)?;
323    let (precision, scale) = s
324        .trim_start_matches(r"decimal(")
325        .trim_end_matches(')')
326        .split_once(',')
327        .ok_or_else(|| D::Error::custom("Decimal requires precision and scale: {s}"))?;
328
329    Ok(PrimitiveType::Decimal {
330        precision: precision.trim().parse().map_err(D::Error::custom)?,
331        scale: scale.trim().parse().map_err(D::Error::custom)?,
332    })
333}
334
335fn serialize_decimal<S>(
336    precision: &u32,
337    scale: &u32,
338    serializer: S,
339) -> std::result::Result<S::Ok, S::Error>
340where
341    S: Serializer,
342{
343    serializer.serialize_str(&format!("decimal({precision},{scale})"))
344}
345
346fn deserialize_fixed<'de, D>(deserializer: D) -> std::result::Result<PrimitiveType, D::Error>
347where D: Deserializer<'de> {
348    let fixed = String::deserialize(deserializer)?
349        .trim_start_matches(r"fixed[")
350        .trim_end_matches(']')
351        .to_owned();
352
353    fixed
354        .parse()
355        .map(PrimitiveType::Fixed)
356        .map_err(D::Error::custom)
357}
358
359fn serialize_fixed<S>(value: &u64, serializer: S) -> std::result::Result<S::Ok, S::Error>
360where S: Serializer {
361    serializer.serialize_str(&format!("fixed[{value}]"))
362}
363
364impl fmt::Display for PrimitiveType {
365    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
366        match self {
367            PrimitiveType::Boolean => write!(f, "boolean"),
368            PrimitiveType::Int => write!(f, "int"),
369            PrimitiveType::Long => write!(f, "long"),
370            PrimitiveType::Float => write!(f, "float"),
371            PrimitiveType::Double => write!(f, "double"),
372            PrimitiveType::Decimal { precision, scale } => {
373                write!(f, "decimal({precision},{scale})")
374            }
375            PrimitiveType::Date => write!(f, "date"),
376            PrimitiveType::Time => write!(f, "time"),
377            PrimitiveType::Timestamp => write!(f, "timestamp"),
378            PrimitiveType::Timestamptz => write!(f, "timestamptz"),
379            PrimitiveType::TimestampNs => write!(f, "timestamp_ns"),
380            PrimitiveType::TimestamptzNs => write!(f, "timestamptz_ns"),
381            PrimitiveType::String => write!(f, "string"),
382            PrimitiveType::Uuid => write!(f, "uuid"),
383            PrimitiveType::Fixed(size) => write!(f, "fixed({size})"),
384            PrimitiveType::Binary => write!(f, "binary"),
385        }
386    }
387}
388
389/// DataType for a specific struct
390#[derive(Debug, Serialize, Clone, Default)]
391#[serde(rename = "struct", tag = "type")]
392pub struct StructType {
393    /// Struct fields
394    fields: Vec<NestedFieldRef>,
395    /// Lookup for index by field id
396    #[serde(skip_serializing)]
397    id_lookup: OnceLock<HashMap<i32, usize>>,
398    #[serde(skip_serializing)]
399    name_lookup: OnceLock<HashMap<String, usize>>,
400}
401
402impl<'de> Deserialize<'de> for StructType {
403    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
404    where D: Deserializer<'de> {
405        #[derive(Deserialize)]
406        #[serde(field_identifier, rename_all = "lowercase")]
407        enum Field {
408            Type,
409            Fields,
410        }
411
412        struct StructTypeVisitor;
413
414        impl<'de> Visitor<'de> for StructTypeVisitor {
415            type Value = StructType;
416
417            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
418                formatter.write_str("struct")
419            }
420
421            fn visit_map<V>(self, mut map: V) -> std::result::Result<StructType, V::Error>
422            where V: MapAccess<'de> {
423                let mut fields = None;
424                while let Some(key) = map.next_key()? {
425                    match key {
426                        Field::Type => {
427                            let type_val: String = map.next_value()?;
428                            if type_val != "struct" {
429                                return Err(serde::de::Error::custom(format!(
430                                    "expected type 'struct', got '{type_val}'"
431                                )));
432                            }
433                        }
434                        Field::Fields => {
435                            if fields.is_some() {
436                                return Err(serde::de::Error::duplicate_field("fields"));
437                            }
438                            fields = Some(map.next_value()?);
439                        }
440                    }
441                }
442                let fields: Vec<NestedFieldRef> =
443                    fields.ok_or_else(|| de::Error::missing_field("fields"))?;
444
445                Ok(StructType::new(fields))
446            }
447        }
448
449        const FIELDS: &[&str] = &["type", "fields"];
450        deserializer.deserialize_struct("struct", FIELDS, StructTypeVisitor)
451    }
452}
453
454impl StructType {
455    /// Creates a struct type with the given fields.
456    pub fn new(fields: Vec<NestedFieldRef>) -> Self {
457        Self {
458            fields,
459            id_lookup: OnceLock::new(),
460            name_lookup: OnceLock::new(),
461        }
462    }
463
464    /// Get struct field with certain id
465    pub fn field_by_id(&self, id: i32) -> Option<&NestedFieldRef> {
466        self.field_id_to_index(id).map(|idx| &self.fields[idx])
467    }
468
469    fn field_id_to_index(&self, field_id: i32) -> Option<usize> {
470        self.id_lookup
471            .get_or_init(|| {
472                HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i)))
473            })
474            .get(&field_id)
475            .copied()
476    }
477
478    /// Get struct field with certain field name
479    pub fn field_by_name(&self, name: &str) -> Option<&NestedFieldRef> {
480        self.field_name_to_index(name).map(|idx| &self.fields[idx])
481    }
482
483    fn field_name_to_index(&self, name: &str) -> Option<usize> {
484        self.name_lookup
485            .get_or_init(|| {
486                HashMap::from_iter(
487                    self.fields
488                        .iter()
489                        .enumerate()
490                        .map(|(i, x)| (x.name.clone(), i)),
491                )
492            })
493            .get(name)
494            .copied()
495    }
496
497    /// Get fields.
498    pub fn fields(&self) -> &[NestedFieldRef] {
499        &self.fields
500    }
501}
502
503impl PartialEq for StructType {
504    fn eq(&self, other: &Self) -> bool {
505        self.fields == other.fields
506    }
507}
508
509impl Eq for StructType {}
510
511impl Index<usize> for StructType {
512    type Output = NestedField;
513
514    fn index(&self, index: usize) -> &Self::Output {
515        &self.fields[index]
516    }
517}
518
519impl fmt::Display for StructType {
520    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
521        write!(f, "struct<")?;
522        for field in &self.fields {
523            write!(f, "{}", field.field_type)?;
524        }
525        write!(f, ">")
526    }
527}
528
529#[derive(Debug, PartialEq, Serialize, Deserialize, Eq, Clone)]
530#[serde(from = "SerdeNestedField", into = "SerdeNestedField")]
531/// A struct is a tuple of typed values. Each field in the tuple is named and has an integer id that is unique in the table schema.
532/// Each field can be either optional or required, meaning that values can (or cannot) be null. Fields may be any type.
533/// Fields may have an optional comment or doc string. Fields can have default values.
534pub struct NestedField {
535    /// Id unique in table schema
536    pub id: i32,
537    /// Field Name
538    pub name: String,
539    /// Optional or required
540    pub required: bool,
541    /// Datatype
542    pub field_type: Box<Type>,
543    /// Fields may have an optional comment or doc string.
544    pub doc: Option<String>,
545    /// Used to populate the field’s value for all records that were written before the field was added to the schema
546    pub initial_default: Option<Literal>,
547    /// Used to populate the field’s value for any records written after the field was added to the schema, if the writer does not supply the field’s value
548    pub write_default: Option<Literal>,
549}
550
551#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
552#[serde(rename_all = "kebab-case")]
553struct SerdeNestedField {
554    pub id: i32,
555    pub name: String,
556    pub required: bool,
557    #[serde(rename = "type")]
558    pub field_type: Box<Type>,
559    #[serde(skip_serializing_if = "Option::is_none")]
560    pub doc: Option<String>,
561    #[serde(skip_serializing_if = "Option::is_none")]
562    pub initial_default: Option<JsonValue>,
563    #[serde(skip_serializing_if = "Option::is_none")]
564    pub write_default: Option<JsonValue>,
565}
566
567impl From<SerdeNestedField> for NestedField {
568    fn from(value: SerdeNestedField) -> Self {
569        NestedField {
570            id: value.id,
571            name: value.name,
572            required: value.required,
573            initial_default: value.initial_default.and_then(|x| {
574                Literal::try_from_json(x, &value.field_type)
575                    .ok()
576                    .and_then(identity)
577            }),
578            write_default: value.write_default.and_then(|x| {
579                Literal::try_from_json(x, &value.field_type)
580                    .ok()
581                    .and_then(identity)
582            }),
583            field_type: value.field_type,
584            doc: value.doc,
585        }
586    }
587}
588
589impl From<NestedField> for SerdeNestedField {
590    fn from(value: NestedField) -> Self {
591        let initial_default = value.initial_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_initial_default, it can't be converted to json value"));
592        let write_default = value.write_default.map(|x| x.try_into_json(&value.field_type).expect("We should have checked this in NestedField::with_write_default, it can't be converted to json value"));
593        SerdeNestedField {
594            id: value.id,
595            name: value.name,
596            required: value.required,
597            field_type: value.field_type,
598            doc: value.doc,
599            initial_default,
600            write_default,
601        }
602    }
603}
604
605/// Reference to nested field.
606pub type NestedFieldRef = Arc<NestedField>;
607
608impl NestedField {
609    /// Construct a new field.
610    pub fn new(id: i32, name: impl ToString, field_type: Type, required: bool) -> Self {
611        Self {
612            id,
613            name: name.to_string(),
614            required,
615            field_type: Box::new(field_type),
616            doc: None,
617            initial_default: None,
618            write_default: None,
619        }
620    }
621
622    /// Construct a required field.
623    pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self {
624        Self::new(id, name, field_type, true)
625    }
626
627    /// Construct an optional field.
628    pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self {
629        Self::new(id, name, field_type, false)
630    }
631
632    /// Construct list type's element field.
633    pub fn list_element(id: i32, field_type: Type, required: bool) -> Self {
634        Self::new(id, LIST_FIELD_NAME, field_type, required)
635    }
636
637    /// Construct map type's key field.
638    pub fn map_key_element(id: i32, field_type: Type) -> Self {
639        Self::required(id, MAP_KEY_FIELD_NAME, field_type)
640    }
641
642    /// Construct map type's value field.
643    pub fn map_value_element(id: i32, field_type: Type, required: bool) -> Self {
644        Self::new(id, MAP_VALUE_FIELD_NAME, field_type, required)
645    }
646
647    /// Set the field's doc.
648    pub fn with_doc(mut self, doc: impl ToString) -> Self {
649        self.doc = Some(doc.to_string());
650        self
651    }
652
653    /// Set the field's initial default value.
654    pub fn with_initial_default(mut self, value: Literal) -> Self {
655        self.initial_default = Some(value);
656        self
657    }
658
659    /// Set the field's initial default value.
660    pub fn with_write_default(mut self, value: Literal) -> Self {
661        self.write_default = Some(value);
662        self
663    }
664
665    /// Set the id of the field.
666    pub(crate) fn with_id(mut self, id: i32) -> Self {
667        self.id = id;
668        self
669    }
670}
671
672impl fmt::Display for NestedField {
673    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
674        write!(f, "{}: ", self.id)?;
675        write!(f, "{}: ", self.name)?;
676        if self.required {
677            write!(f, "required ")?;
678        } else {
679            write!(f, "optional ")?;
680        }
681        write!(f, "{} ", self.field_type)?;
682        if let Some(doc) = &self.doc {
683            write!(f, "{doc}")?;
684        }
685        Ok(())
686    }
687}
688
689#[derive(Debug, PartialEq, Eq, Clone)]
690/// A list is a collection of values with some element type. The element field has an integer id that is unique in the table schema.
691/// Elements can be either optional or required. Element types may be any type.
692pub struct ListType {
693    /// Element field of list type.
694    pub element_field: NestedFieldRef,
695}
696
697impl ListType {
698    /// Construct a list type with the given element field.
699    pub fn new(element_field: NestedFieldRef) -> Self {
700        Self { element_field }
701    }
702}
703
704/// Module for type serialization/deserialization.
705pub(super) mod _serde {
706    use std::borrow::Cow;
707
708    use serde_derive::{Deserialize, Serialize};
709
710    use crate::spec::datatypes::Type::Map;
711    use crate::spec::datatypes::{
712        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
713    };
714
715    /// List type for serialization and deserialization
716    #[derive(Serialize, Deserialize)]
717    #[serde(untagged)]
718    pub(super) enum SerdeType<'a> {
719        #[serde(rename_all = "kebab-case")]
720        List {
721            r#type: String,
722            element_id: i32,
723            element_required: bool,
724            element: Cow<'a, Type>,
725        },
726        Struct {
727            r#type: String,
728            fields: Cow<'a, [NestedFieldRef]>,
729        },
730        #[serde(rename_all = "kebab-case")]
731        Map {
732            r#type: String,
733            key_id: i32,
734            key: Cow<'a, Type>,
735            value_id: i32,
736            value_required: bool,
737            value: Cow<'a, Type>,
738        },
739        Primitive(PrimitiveType),
740    }
741
742    impl From<SerdeType<'_>> for Type {
743        fn from(value: SerdeType) -> Self {
744            match value {
745                SerdeType::List {
746                    r#type: _,
747                    element_id,
748                    element_required,
749                    element,
750                } => Self::List(ListType {
751                    element_field: NestedField::list_element(
752                        element_id,
753                        element.into_owned(),
754                        element_required,
755                    )
756                    .into(),
757                }),
758                SerdeType::Map {
759                    r#type: _,
760                    key_id,
761                    key,
762                    value_id,
763                    value_required,
764                    value,
765                } => Map(MapType {
766                    key_field: NestedField::map_key_element(key_id, key.into_owned()).into(),
767                    value_field: NestedField::map_value_element(
768                        value_id,
769                        value.into_owned(),
770                        value_required,
771                    )
772                    .into(),
773                }),
774                SerdeType::Struct { r#type: _, fields } => {
775                    Self::Struct(StructType::new(fields.into_owned()))
776                }
777                SerdeType::Primitive(p) => Self::Primitive(p),
778            }
779        }
780    }
781
782    impl<'a> From<&'a Type> for SerdeType<'a> {
783        fn from(value: &'a Type) -> Self {
784            match value {
785                Type::List(list) => SerdeType::List {
786                    r#type: "list".to_string(),
787                    element_id: list.element_field.id,
788                    element_required: list.element_field.required,
789                    element: Cow::Borrowed(&list.element_field.field_type),
790                },
791                Type::Map(map) => SerdeType::Map {
792                    r#type: "map".to_string(),
793                    key_id: map.key_field.id,
794                    key: Cow::Borrowed(&map.key_field.field_type),
795                    value_id: map.value_field.id,
796                    value_required: map.value_field.required,
797                    value: Cow::Borrowed(&map.value_field.field_type),
798                },
799                Type::Struct(s) => SerdeType::Struct {
800                    r#type: "struct".to_string(),
801                    fields: Cow::Borrowed(&s.fields),
802                },
803                Type::Primitive(p) => SerdeType::Primitive(p.clone()),
804            }
805        }
806    }
807}
808
809#[derive(Debug, PartialEq, Eq, Clone)]
810/// A map is a collection of key-value pairs with a key type and a value type.
811/// Both the key field and value field each have an integer id that is unique in the table schema.
812/// Map keys are required and map values can be either optional or required.
813/// Both map keys and map values may be any type, including nested types.
814pub struct MapType {
815    /// Field for key.
816    pub key_field: NestedFieldRef,
817    /// Field for value.
818    pub value_field: NestedFieldRef,
819}
820
821impl MapType {
822    /// Construct a map type with the given key and value fields.
823    pub fn new(key_field: NestedFieldRef, value_field: NestedFieldRef) -> Self {
824        Self {
825            key_field,
826            value_field,
827        }
828    }
829
830    /// Construct an optional map type with the given key and value fields.
831    pub fn optional(key_id: i32, key_type: Type, value_id: i32, value_type: Type) -> Self {
832        Self {
833            key_field: NestedField::map_key_element(key_id, key_type).into(),
834            value_field: NestedField::map_value_element(value_id, value_type, false).into(),
835        }
836    }
837
838    /// Construct a required map type with the given key and value fields.
839    pub fn required(key_id: i32, key_type: Type, value_id: i32, value_type: Type) -> Self {
840        Self {
841            key_field: NestedField::map_key_element(key_id, key_type).into(),
842            value_field: NestedField::map_value_element(value_id, value_type, true).into(),
843        }
844    }
845}
846
847#[cfg(test)]
848mod tests {
849    use pretty_assertions::assert_eq;
850    use uuid::Uuid;
851
852    use super::*;
853    use crate::spec::values::PrimitiveLiteral;
854
855    fn check_type_serde(json: &str, expected_type: Type) {
856        let desered_type: Type = serde_json::from_str(json).unwrap();
857        assert_eq!(desered_type, expected_type);
858
859        let sered_json = serde_json::to_string(&expected_type).unwrap();
860        let parsed_json_value = serde_json::from_str::<serde_json::Value>(&sered_json).unwrap();
861        let raw_json_value = serde_json::from_str::<serde_json::Value>(json).unwrap();
862
863        assert_eq!(parsed_json_value, raw_json_value);
864    }
865
866    #[test]
867    fn primitive_type_serde() {
868        let record = r#"
869    {
870        "type": "struct",
871        "fields": [
872            {"id": 1, "name": "bool_field", "required": true, "type": "boolean"},
873            {"id": 2, "name": "int_field", "required": true, "type": "int"},
874            {"id": 3, "name": "long_field", "required": true, "type": "long"},
875            {"id": 4, "name": "float_field", "required": true, "type": "float"},
876            {"id": 5, "name": "double_field", "required": true, "type": "double"},
877            {"id": 6, "name": "decimal_field", "required": true, "type": "decimal(9,2)"},
878            {"id": 7, "name": "date_field", "required": true, "type": "date"},
879            {"id": 8, "name": "time_field", "required": true, "type": "time"},
880            {"id": 9, "name": "timestamp_field", "required": true, "type": "timestamp"},
881            {"id": 10, "name": "timestamptz_field", "required": true, "type": "timestamptz"},
882            {"id": 11, "name": "timestamp_ns_field", "required": true, "type": "timestamp_ns"},
883            {"id": 12, "name": "timestamptz_ns_field", "required": true, "type": "timestamptz_ns"},
884            {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"},
885            {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"},
886            {"id": 15, "name": "binary_field", "required": true, "type": "binary"},
887            {"id": 16, "name": "string_field", "required": true, "type": "string"}
888        ]
889    }
890    "#;
891
892        check_type_serde(
893            record,
894            Type::Struct(StructType {
895                fields: vec![
896                    NestedField::required(1, "bool_field", Type::Primitive(PrimitiveType::Boolean))
897                        .into(),
898                    NestedField::required(2, "int_field", Type::Primitive(PrimitiveType::Int))
899                        .into(),
900                    NestedField::required(3, "long_field", Type::Primitive(PrimitiveType::Long))
901                        .into(),
902                    NestedField::required(4, "float_field", Type::Primitive(PrimitiveType::Float))
903                        .into(),
904                    NestedField::required(
905                        5,
906                        "double_field",
907                        Type::Primitive(PrimitiveType::Double),
908                    )
909                    .into(),
910                    NestedField::required(
911                        6,
912                        "decimal_field",
913                        Type::Primitive(PrimitiveType::Decimal {
914                            precision: 9,
915                            scale: 2,
916                        }),
917                    )
918                    .into(),
919                    NestedField::required(7, "date_field", Type::Primitive(PrimitiveType::Date))
920                        .into(),
921                    NestedField::required(8, "time_field", Type::Primitive(PrimitiveType::Time))
922                        .into(),
923                    NestedField::required(
924                        9,
925                        "timestamp_field",
926                        Type::Primitive(PrimitiveType::Timestamp),
927                    )
928                    .into(),
929                    NestedField::required(
930                        10,
931                        "timestamptz_field",
932                        Type::Primitive(PrimitiveType::Timestamptz),
933                    )
934                    .into(),
935                    NestedField::required(
936                        11,
937                        "timestamp_ns_field",
938                        Type::Primitive(PrimitiveType::TimestampNs),
939                    )
940                    .into(),
941                    NestedField::required(
942                        12,
943                        "timestamptz_ns_field",
944                        Type::Primitive(PrimitiveType::TimestamptzNs),
945                    )
946                    .into(),
947                    NestedField::required(13, "uuid_field", Type::Primitive(PrimitiveType::Uuid))
948                        .into(),
949                    NestedField::required(
950                        14,
951                        "fixed_field",
952                        Type::Primitive(PrimitiveType::Fixed(10)),
953                    )
954                    .into(),
955                    NestedField::required(
956                        15,
957                        "binary_field",
958                        Type::Primitive(PrimitiveType::Binary),
959                    )
960                    .into(),
961                    NestedField::required(
962                        16,
963                        "string_field",
964                        Type::Primitive(PrimitiveType::String),
965                    )
966                    .into(),
967                ],
968                id_lookup: OnceLock::default(),
969                name_lookup: OnceLock::default(),
970            }),
971        )
972    }
973
974    #[test]
975    fn struct_type() {
976        let record = r#"
977        {
978            "type": "struct",
979            "fields": [
980                {
981                    "id": 1,
982                    "name": "id",
983                    "required": true,
984                    "type": "uuid",
985                    "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
986                    "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
987                }, {
988                    "id": 2,
989                    "name": "data",
990                    "required": false,
991                    "type": "int"
992                }
993            ]
994        }
995        "#;
996
997        check_type_serde(
998            record,
999            Type::Struct(StructType {
1000                fields: vec![
1001                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
1002                        .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1003                            Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
1004                                .unwrap()
1005                                .as_u128(),
1006                        )))
1007                        .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1008                            Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
1009                                .unwrap()
1010                                .as_u128(),
1011                        )))
1012                        .into(),
1013                    NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
1014                ],
1015                id_lookup: HashMap::from([(1, 0), (2, 1)]).into(),
1016                name_lookup: HashMap::from([("id".to_string(), 0), ("data".to_string(), 1)]).into(),
1017            }),
1018        )
1019    }
1020
1021    #[test]
1022    fn test_deeply_nested_struct() {
1023        let record = r#"
1024{
1025  "type": "struct",
1026  "fields": [
1027    {
1028      "id": 1,
1029      "name": "id",
1030      "required": true,
1031      "type": "uuid",
1032      "initial-default": "0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb",
1033      "write-default": "ec5911be-b0a7-458c-8438-c9a3e53cffae"
1034    },
1035    {
1036      "id": 2,
1037      "name": "data",
1038      "required": false,
1039      "type": "int"
1040    },
1041    {
1042      "id": 3,
1043      "name": "address",
1044      "required": true,
1045      "type": {
1046        "type": "struct",
1047        "fields": [
1048          {
1049            "id": 4,
1050            "name": "street",
1051            "required": true,
1052            "type": "string"
1053          },
1054          {
1055            "id": 5,
1056            "name": "province",
1057            "required": false,
1058            "type": "string"
1059          },
1060          {
1061            "id": 6,
1062            "name": "zip",
1063            "required": true,
1064            "type": "int"
1065          }
1066        ]
1067      }
1068    }
1069  ]
1070}
1071"#;
1072
1073        let struct_type = Type::Struct(StructType::new(vec![
1074            NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid))
1075                .with_initial_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1076                    Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb")
1077                        .unwrap()
1078                        .as_u128(),
1079                )))
1080                .with_write_default(Literal::Primitive(PrimitiveLiteral::UInt128(
1081                    Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae")
1082                        .unwrap()
1083                        .as_u128(),
1084                )))
1085                .into(),
1086            NestedField::optional(2, "data", Type::Primitive(PrimitiveType::Int)).into(),
1087            NestedField::required(
1088                3,
1089                "address",
1090                Type::Struct(StructType::new(vec![
1091                    NestedField::required(4, "street", Type::Primitive(PrimitiveType::String))
1092                        .into(),
1093                    NestedField::optional(5, "province", Type::Primitive(PrimitiveType::String))
1094                        .into(),
1095                    NestedField::required(6, "zip", Type::Primitive(PrimitiveType::Int)).into(),
1096                ])),
1097            )
1098            .into(),
1099        ]));
1100
1101        check_type_serde(record, struct_type)
1102    }
1103
1104    #[test]
1105    fn list() {
1106        let record = r#"
1107        {
1108            "type": "list",
1109            "element-id": 3,
1110            "element-required": true,
1111            "element": "string"
1112        }
1113        "#;
1114
1115        check_type_serde(
1116            record,
1117            Type::List(ListType {
1118                element_field: NestedField::list_element(
1119                    3,
1120                    Type::Primitive(PrimitiveType::String),
1121                    true,
1122                )
1123                .into(),
1124            }),
1125        );
1126    }
1127
1128    #[test]
1129    fn map() {
1130        let record = r#"
1131        {
1132            "type": "map",
1133            "key-id": 4,
1134            "key": "string",
1135            "value-id": 5,
1136            "value-required": false,
1137            "value": "double"
1138        }
1139        "#;
1140
1141        check_type_serde(
1142            record,
1143            Type::Map(MapType {
1144                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::String))
1145                    .into(),
1146                value_field: NestedField::map_value_element(
1147                    5,
1148                    Type::Primitive(PrimitiveType::Double),
1149                    false,
1150                )
1151                .into(),
1152            }),
1153        );
1154
1155        check_type_serde(
1156            record,
1157            Type::Map(MapType::optional(
1158                4,
1159                Type::Primitive(PrimitiveType::String),
1160                5,
1161                Type::Primitive(PrimitiveType::Double),
1162            )),
1163        );
1164    }
1165
1166    #[test]
1167    fn map_int() {
1168        let record = r#"
1169        {
1170            "type": "map",
1171            "key-id": 4,
1172            "key": "int",
1173            "value-id": 5,
1174            "value-required": false,
1175            "value": "string"
1176        }
1177        "#;
1178
1179        check_type_serde(
1180            record,
1181            Type::Map(MapType {
1182                key_field: NestedField::map_key_element(4, Type::Primitive(PrimitiveType::Int))
1183                    .into(),
1184                value_field: NestedField::map_value_element(
1185                    5,
1186                    Type::Primitive(PrimitiveType::String),
1187                    false,
1188                )
1189                .into(),
1190            }),
1191        );
1192
1193        check_type_serde(
1194            record,
1195            Type::Map(MapType::optional(
1196                4,
1197                Type::Primitive(PrimitiveType::Int),
1198                5,
1199                Type::Primitive(PrimitiveType::String),
1200            )),
1201        );
1202    }
1203
1204    #[test]
1205    fn map_required_int() {
1206        let record = r#"
1207        {
1208            "type": "map",
1209            "key-id": 4,
1210            "key": "int",
1211            "value-id": 5,
1212            "value-required": true,
1213            "value": "string"
1214        }
1215        "#;
1216
1217        check_type_serde(
1218            record,
1219            Type::Map(MapType::required(
1220                4,
1221                Type::Primitive(PrimitiveType::Int),
1222                5,
1223                Type::Primitive(PrimitiveType::String),
1224            )),
1225        );
1226    }
1227
1228    #[test]
1229    fn test_decimal_precision() {
1230        let expected_max_precision = [
1231            2, 4, 6, 9, 11, 14, 16, 18, 21, 23, 26, 28, 31, 33, 35, 38, 40, 43, 45, 47, 50, 52, 55,
1232            57,
1233        ];
1234        for (i, max_precision) in expected_max_precision.iter().enumerate() {
1235            assert_eq!(
1236                *max_precision,
1237                Type::decimal_max_precision(i as u32 + 1).unwrap(),
1238                "Failed calculate max precision for {i}"
1239            );
1240        }
1241
1242        assert_eq!(5, Type::decimal_required_bytes(10).unwrap());
1243        assert_eq!(16, Type::decimal_required_bytes(38).unwrap());
1244    }
1245
1246    #[test]
1247    fn test_primitive_type_compatible() {
1248        let pairs = vec![
1249            (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(true)),
1250            (PrimitiveType::Int, PrimitiveLiteral::Int(1)),
1251            (PrimitiveType::Long, PrimitiveLiteral::Long(1)),
1252            (PrimitiveType::Float, PrimitiveLiteral::Float(1.0.into())),
1253            (PrimitiveType::Double, PrimitiveLiteral::Double(1.0.into())),
1254            (
1255                PrimitiveType::Decimal {
1256                    precision: 9,
1257                    scale: 2,
1258                },
1259                PrimitiveLiteral::Int128(1),
1260            ),
1261            (PrimitiveType::Date, PrimitiveLiteral::Int(1)),
1262            (PrimitiveType::Time, PrimitiveLiteral::Long(1)),
1263            (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(1)),
1264            (PrimitiveType::Timestamp, PrimitiveLiteral::Long(1)),
1265            (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(1)),
1266            (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(1)),
1267            (
1268                PrimitiveType::Uuid,
1269                PrimitiveLiteral::UInt128(Uuid::new_v4().as_u128()),
1270            ),
1271            (PrimitiveType::Fixed(8), PrimitiveLiteral::Binary(vec![1])),
1272            (PrimitiveType::Binary, PrimitiveLiteral::Binary(vec![1])),
1273        ];
1274        for (ty, literal) in pairs {
1275            assert!(ty.compatible(&literal));
1276        }
1277    }
1278
1279    #[test]
1280    fn struct_type_with_type_field() {
1281        // Test that StructType properly deserializes JSON with "type":"struct" field
1282        // This was previously broken because the deserializer wasn't consuming the type field value
1283        let json = r#"
1284        {
1285            "type": "struct",
1286            "fields": [
1287                {"id": 1, "name": "field1", "required": true, "type": "string"}
1288            ]
1289        }
1290        "#;
1291
1292        let struct_type: StructType = serde_json::from_str(json)
1293            .expect("Should successfully deserialize StructType with type field");
1294
1295        assert_eq!(struct_type.fields().len(), 1);
1296        assert_eq!(struct_type.fields()[0].name, "field1");
1297    }
1298
1299    #[test]
1300    fn struct_type_rejects_wrong_type() {
1301        // Test that StructType validation rejects incorrect type field values
1302        let json = r#"
1303        {
1304            "type": "list",
1305            "fields": [
1306                {"id": 1, "name": "field1", "required": true, "type": "string"}
1307            ]
1308        }
1309        "#;
1310
1311        let result = serde_json::from_str::<StructType>(json);
1312        assert!(
1313            result.is_err(),
1314            "Should reject StructType with wrong type field"
1315        );
1316        assert!(
1317            result
1318                .unwrap_err()
1319                .to_string()
1320                .contains("expected type 'struct'")
1321        );
1322    }
1323}