iceberg/spec/name_mapping/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Iceberg name mapping.
19
20use std::sync::Arc;
21
22use serde::{Deserialize, Serialize};
23use serde_with::{DefaultOnNull, serde_as};
24
25/// Property name for name mapping.
26pub const DEFAULT_SCHEMA_NAME_MAPPING: &str = "schema.name-mapping.default";
27
28/// Iceberg fallback field name to ID mapping.
29#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
30#[serde(transparent)]
31pub struct NameMapping {
32    root: Vec<MappedField>,
33}
34
35impl NameMapping {
36    /// Create a new [`NameMapping`] given a collection of mapped fields.
37    pub fn new(fields: Vec<MappedField>) -> Self {
38        Self { root: fields }
39    }
40
41    /// Get a reference to fields which are to be mapped from name to field ID.
42    pub fn fields(&self) -> &[MappedField] {
43        &self.root
44    }
45}
46
47/// Maps field names to IDs.
48#[serde_as]
49#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
50#[serde(rename_all = "kebab-case")]
51pub struct MappedField {
52    #[serde(skip_serializing_if = "Option::is_none")]
53    field_id: Option<i32>,
54    names: Vec<String>,
55    #[serde(default)]
56    #[serde(skip_serializing_if = "Vec::is_empty")]
57    #[serde_as(deserialize_as = "DefaultOnNull")]
58    fields: Vec<Arc<MappedField>>,
59}
60
61impl MappedField {
62    /// Create a new [`MappedField`].
63    pub fn new(field_id: Option<i32>, names: Vec<String>, fields: Vec<MappedField>) -> Self {
64        Self {
65            field_id,
66            names,
67            fields: fields.into_iter().map(Arc::new).collect(),
68        }
69    }
70
71    /// Iceberg field ID when a field's name is present within `names`.
72    pub fn field_id(&self) -> Option<i32> {
73        self.field_id
74    }
75
76    /// Get a reference to names for a mapped field.
77    pub fn names(&self) -> &[String] {
78        &self.names
79    }
80
81    /// Get a reference to the field mapping for any child fields.
82    pub fn fields(&self) -> &[Arc<MappedField>] {
83        &self.fields
84    }
85}
86
87#[cfg(test)]
88mod tests {
89    use super::*;
90
91    #[test]
92    fn test_json_mapped_field_deserialization() {
93        let expected = MappedField {
94            field_id: Some(1),
95            names: vec!["id".to_string(), "record_id".to_string()],
96            fields: vec![],
97        };
98        let mapped_field = r#"
99        {
100            "field-id": 1,
101            "names": ["id", "record_id"]
102        }
103        "#;
104
105        let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
106        assert_eq!(mapped_field, expected);
107
108        let mapped_field_with_null_fields = r#"
109        {
110            "field-id": 1,
111            "names": ["id", "record_id"],
112            "fields": null
113        }
114        "#;
115
116        let mapped_field_with_null_fields: MappedField =
117            serde_json::from_str(mapped_field_with_null_fields).unwrap();
118        assert_eq!(mapped_field_with_null_fields, expected);
119    }
120
121    #[test]
122    fn test_json_mapped_field_no_names_deserialization() {
123        let expected = MappedField {
124            field_id: Some(1),
125            names: vec![],
126            fields: vec![],
127        };
128        let mapped_field = r#"
129        {
130            "field-id": 1,
131            "names": []
132        }
133        "#;
134
135        let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
136        assert_eq!(mapped_field, expected);
137
138        let mapped_field_with_null_fields = r#"
139        {
140            "field-id": 1,
141            "names": [],
142            "fields": null
143        }
144        "#;
145
146        let mapped_field_with_null_fields: MappedField =
147            serde_json::from_str(mapped_field_with_null_fields).unwrap();
148        assert_eq!(mapped_field_with_null_fields, expected);
149    }
150
151    #[test]
152    fn test_json_mapped_field_no_field_id_deserialization() {
153        let expected = MappedField {
154            field_id: None,
155            names: vec!["id".to_string(), "record_id".to_string()],
156            fields: vec![],
157        };
158        let mapped_field = r#"
159        {
160            "names": ["id", "record_id"]
161        }
162        "#;
163
164        let mapped_field: MappedField = serde_json::from_str(mapped_field).unwrap();
165        assert_eq!(mapped_field, expected);
166
167        let mapped_field_with_null_fields = r#"
168        {
169            "names": ["id", "record_id"],
170            "fields": null
171        }
172        "#;
173
174        let mapped_field_with_null_fields: MappedField =
175            serde_json::from_str(mapped_field_with_null_fields).unwrap();
176        assert_eq!(mapped_field_with_null_fields, expected);
177    }
178
179    #[test]
180    fn test_json_name_mapping_deserialization() {
181        let name_mapping = r#"
182        [
183            {
184                "field-id": 1,
185                "names": [
186                    "id",
187                    "record_id"
188                ]
189            },
190            {
191                "field-id": 2,
192                "names": [
193                    "data"
194                ]
195            },
196            {
197                "field-id": 3,
198                "names": [
199                    "location"
200                ],
201                "fields": [
202                    {
203                        "field-id": 4,
204                        "names": [
205                            "latitude",
206                            "lat"
207                        ]
208                    },
209                    {
210                        "field-id": 5,
211                        "names": [
212                            "longitude",
213                            "long"
214                        ]
215                    }
216                ]
217            }
218        ]
219        "#;
220
221        let name_mapping: NameMapping = serde_json::from_str(name_mapping).unwrap();
222        assert_eq!(name_mapping, NameMapping {
223            root: vec![
224                MappedField {
225                    field_id: Some(1),
226                    names: vec!["id".to_string(), "record_id".to_string()],
227                    fields: vec![]
228                },
229                MappedField {
230                    field_id: Some(2),
231                    names: vec!["data".to_string()],
232                    fields: vec![]
233                },
234                MappedField {
235                    field_id: Some(3),
236                    names: vec!["location".to_string()],
237                    fields: vec![
238                        MappedField {
239                            field_id: Some(4),
240                            names: vec!["latitude".to_string(), "lat".to_string()],
241                            fields: vec![]
242                        }
243                        .into(),
244                        MappedField {
245                            field_id: Some(5),
246                            names: vec!["longitude".to_string(), "long".to_string()],
247                            fields: vec![]
248                        }
249                        .into(),
250                    ]
251                }
252            ],
253        });
254    }
255
256    #[test]
257    fn test_json_name_mapping_serialization() {
258        let name_mapping = NameMapping {
259            root: vec![
260                MappedField {
261                    field_id: None,
262                    names: vec!["foo".to_string()],
263                    fields: vec![],
264                },
265                MappedField {
266                    field_id: Some(2),
267                    names: vec!["bar".to_string()],
268                    fields: vec![],
269                },
270                MappedField {
271                    field_id: Some(3),
272                    names: vec!["baz".to_string()],
273                    fields: vec![],
274                },
275                MappedField {
276                    field_id: Some(4),
277                    names: vec!["qux".to_string()],
278                    fields: vec![
279                        MappedField {
280                            field_id: Some(5),
281                            names: vec!["element".to_string()],
282                            fields: vec![],
283                        }
284                        .into(),
285                    ],
286                },
287                MappedField {
288                    field_id: Some(6),
289                    names: vec!["quux".to_string()],
290                    fields: vec![
291                        MappedField {
292                            field_id: Some(7),
293                            names: vec!["key".to_string()],
294                            fields: vec![],
295                        }
296                        .into(),
297                        MappedField {
298                            field_id: Some(8),
299                            names: vec!["value".to_string()],
300                            fields: vec![
301                                MappedField {
302                                    field_id: Some(9),
303                                    names: vec!["key".to_string()],
304                                    fields: vec![],
305                                }
306                                .into(),
307                                MappedField {
308                                    field_id: Some(10),
309                                    names: vec!["value".to_string()],
310                                    fields: vec![],
311                                }
312                                .into(),
313                            ],
314                        }
315                        .into(),
316                    ],
317                },
318                MappedField {
319                    field_id: Some(11),
320                    names: vec!["location".to_string()],
321                    fields: vec![
322                        MappedField {
323                            field_id: Some(12),
324                            names: vec!["element".to_string()],
325                            fields: vec![
326                                MappedField {
327                                    field_id: Some(13),
328                                    names: vec!["latitude".to_string()],
329                                    fields: vec![],
330                                }
331                                .into(),
332                                MappedField {
333                                    field_id: Some(14),
334                                    names: vec!["longitude".to_string()],
335                                    fields: vec![],
336                                }
337                                .into(),
338                            ],
339                        }
340                        .into(),
341                    ],
342                },
343                MappedField {
344                    field_id: Some(15),
345                    names: vec!["person".to_string()],
346                    fields: vec![
347                        MappedField {
348                            field_id: Some(16),
349                            names: vec!["name".to_string()],
350                            fields: vec![],
351                        }
352                        .into(),
353                        MappedField {
354                            field_id: Some(17),
355                            names: vec!["age".to_string()],
356                            fields: vec![],
357                        }
358                        .into(),
359                    ],
360                },
361            ],
362        };
363        let expected = r#"[{"names":["foo"]},{"field-id":2,"names":["bar"]},{"field-id":3,"names":["baz"]},{"field-id":4,"names":["qux"],"fields":[{"field-id":5,"names":["element"]}]},{"field-id":6,"names":["quux"],"fields":[{"field-id":7,"names":["key"]},{"field-id":8,"names":["value"],"fields":[{"field-id":9,"names":["key"]},{"field-id":10,"names":["value"]}]}]},{"field-id":11,"names":["location"],"fields":[{"field-id":12,"names":["element"],"fields":[{"field-id":13,"names":["latitude"]},{"field-id":14,"names":["longitude"]}]}]},{"field-id":15,"names":["person"],"fields":[{"field-id":16,"names":["name"]},{"field-id":17,"names":["age"]}]}]"#;
364        assert_eq!(serde_json::to_string(&name_mapping).unwrap(), expected);
365    }
366}