iceberg/spec/schema/
_serde.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18/// This is a helper module that defines types to help with serialization/deserialization.
19/// For deserialization the input first gets read into either the [SchemaV1] or [SchemaV2] struct
20/// and then converted into the [Schema] struct. Serialization works the other way around.
21/// [SchemaV1] and [SchemaV2] are internal struct that are only used for serialization and deserialization.
22use serde::Deserialize;
23/// This is a helper module that defines types to help with serialization/deserialization.
24/// For deserialization the input first gets read into either the [SchemaV1] or [SchemaV2] struct
25/// and then converted into the [Schema] struct. Serialization works the other way around.
26/// [SchemaV1] and [SchemaV2] are internal struct that are only used for serialization and deserialization.
27use serde::Serialize;
28
29use super::{DEFAULT_SCHEMA_ID, Schema};
30use crate::spec::StructType;
31use crate::{Error, Result};
32
33#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
34#[serde(untagged)]
35/// Enum for Schema serialization/deserializaion
36pub(super) enum SchemaEnum {
37    V2(SchemaV2),
38    V1(SchemaV1),
39}
40
41#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
42#[serde(rename_all = "kebab-case")]
43/// Defines the structure of a v2 schema for serialization/deserialization
44pub(crate) struct SchemaV2 {
45    pub schema_id: i32,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub identifier_field_ids: Option<Vec<i32>>,
48    #[serde(flatten)]
49    pub fields: StructType,
50}
51
52#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
53#[serde(rename_all = "kebab-case")]
54/// Defines the structure of a v1 schema for serialization/deserialization
55pub(crate) struct SchemaV1 {
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub schema_id: Option<i32>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub identifier_field_ids: Option<Vec<i32>>,
60    #[serde(flatten)]
61    pub fields: StructType,
62}
63
64/// Helper to serialize/deserializa Schema
65impl TryFrom<SchemaEnum> for Schema {
66    type Error = Error;
67    fn try_from(value: SchemaEnum) -> Result<Self> {
68        match value {
69            SchemaEnum::V2(value) => value.try_into(),
70            SchemaEnum::V1(value) => value.try_into(),
71        }
72    }
73}
74
75impl From<Schema> for SchemaEnum {
76    fn from(value: Schema) -> Self {
77        SchemaEnum::V2(value.into())
78    }
79}
80
81impl TryFrom<SchemaV2> for Schema {
82    type Error = Error;
83    fn try_from(value: SchemaV2) -> Result<Self> {
84        Schema::builder()
85            .with_schema_id(value.schema_id)
86            .with_fields(value.fields.fields().iter().cloned())
87            .with_identifier_field_ids(value.identifier_field_ids.unwrap_or_default())
88            .build()
89    }
90}
91
92impl TryFrom<SchemaV1> for Schema {
93    type Error = Error;
94    fn try_from(value: SchemaV1) -> Result<Self> {
95        Schema::builder()
96            .with_schema_id(value.schema_id.unwrap_or(DEFAULT_SCHEMA_ID))
97            .with_fields(value.fields.fields().iter().cloned())
98            .with_identifier_field_ids(value.identifier_field_ids.unwrap_or_default())
99            .build()
100    }
101}
102
103impl From<Schema> for SchemaV2 {
104    fn from(value: Schema) -> Self {
105        SchemaV2 {
106            schema_id: value.schema_id,
107            identifier_field_ids: if value.identifier_field_ids.is_empty() {
108                None
109            } else {
110                Some(value.identifier_field_ids.into_iter().collect())
111            },
112            fields: value.r#struct,
113        }
114    }
115}
116
117impl From<Schema> for SchemaV1 {
118    fn from(value: Schema) -> Self {
119        SchemaV1 {
120            schema_id: Some(value.schema_id),
121            identifier_field_ids: if value.identifier_field_ids.is_empty() {
122                None
123            } else {
124                Some(value.identifier_field_ids.into_iter().collect())
125            },
126            fields: value.r#struct,
127        }
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134    use crate::spec::schema::tests::table_schema_simple;
135    use crate::spec::{PrimitiveType, Type};
136
137    fn check_schema_serde(json: &str, expected_type: Schema, _expected_enum: SchemaEnum) {
138        let desered_type: Schema = serde_json::from_str(json).unwrap();
139        assert_eq!(desered_type, expected_type);
140        assert!(matches!(desered_type.clone(), _expected_enum));
141
142        let sered_json = serde_json::to_string(&expected_type).unwrap();
143        let parsed_json_value = serde_json::from_str::<Schema>(&sered_json).unwrap();
144
145        assert_eq!(parsed_json_value, desered_type);
146    }
147
148    #[test]
149    fn test_serde_with_schema_id() {
150        let (schema, record) = table_schema_simple();
151
152        let x: SchemaV2 = serde_json::from_str(record).unwrap();
153        check_schema_serde(record, schema, SchemaEnum::V2(x));
154    }
155
156    #[test]
157    fn test_serde_without_schema_id() {
158        let (mut schema, record) = table_schema_simple();
159        // we remove the ""schema-id": 1," string from example
160        let new_record = record.replace("\"schema-id\":1,", "");
161        // By default schema_id field is set to DEFAULT_SCHEMA_ID when no value is set in json
162        schema.schema_id = DEFAULT_SCHEMA_ID;
163
164        let x: SchemaV1 = serde_json::from_str(new_record.as_str()).unwrap();
165        check_schema_serde(&new_record, schema, SchemaEnum::V1(x));
166    }
167
168    #[test]
169    fn schema() {
170        let record = r#"
171        {
172            "type": "struct",
173            "schema-id": 1,
174            "fields": [ {
175            "id": 1,
176            "name": "id",
177            "required": true,
178            "type": "uuid"
179            }, {
180            "id": 2,
181            "name": "data",
182            "required": false,
183            "type": "int"
184            } ]
185            }
186        "#;
187
188        let result: SchemaV2 = serde_json::from_str(record).unwrap();
189        assert_eq!(1, result.schema_id);
190        assert_eq!(
191            Box::new(Type::Primitive(PrimitiveType::Uuid)),
192            result.fields[0].field_type
193        );
194        assert_eq!(1, result.fields[0].id);
195        assert!(result.fields[0].required);
196
197        assert_eq!(
198            Box::new(Type::Primitive(PrimitiveType::Int)),
199            result.fields[1].field_type
200        );
201        assert_eq!(2, result.fields[1].id);
202        assert!(!result.fields[1].required);
203    }
204}