iceberg/spec/schema/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module defines schema in iceberg.
19
20use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43    LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44    PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48/// Type alias for schema id.
49pub type SchemaId = i32;
50/// Reference to [`Schema`].
51pub type SchemaRef = Arc<Schema>;
52/// Default schema id.
53pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54
55/// Defines schema in iceberg.
56#[derive(Debug, Serialize, Deserialize, Clone)]
57#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
58pub struct Schema {
59    r#struct: StructType,
60    schema_id: SchemaId,
61    highest_field_id: i32,
62    identifier_field_ids: HashSet<i32>,
63
64    alias_to_id: BiHashMap<String, i32>,
65    id_to_field: HashMap<i32, NestedFieldRef>,
66
67    name_to_id: HashMap<String, i32>,
68    lowercase_name_to_id: HashMap<String, i32>,
69    id_to_name: HashMap<i32, String>,
70
71    field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
72}
73
74impl PartialEq for Schema {
75    fn eq(&self, other: &Self) -> bool {
76        self.r#struct == other.r#struct
77            && self.schema_id == other.schema_id
78            && self.identifier_field_ids == other.identifier_field_ids
79    }
80}
81
82impl Eq for Schema {}
83
84/// Schema builder.
85#[derive(Debug)]
86pub struct SchemaBuilder {
87    schema_id: i32,
88    fields: Vec<NestedFieldRef>,
89    alias_to_id: BiHashMap<String, i32>,
90    identifier_field_ids: HashSet<i32>,
91    reassign_field_ids_from: Option<i32>,
92}
93
94impl SchemaBuilder {
95    /// Add fields to schema builder.
96    pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
97        self.fields.extend(fields);
98        self
99    }
100
101    /// Reassign all field-ids (including nested) on build.
102    /// Reassignment starts from the field-id specified in `start_from` (inclusive).
103    ///
104    /// All specified aliases and identifier fields will be updated to the new field-ids.
105    pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self {
106        self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX));
107        self
108    }
109
110    /// Set schema id.
111    pub fn with_schema_id(mut self, schema_id: i32) -> Self {
112        self.schema_id = schema_id;
113        self
114    }
115
116    /// Set identifier field ids.
117    pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
118        self.identifier_field_ids.extend(ids);
119        self
120    }
121
122    /// Set alias to filed id mapping.
123    pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
124        self.alias_to_id = alias_to_id;
125        self
126    }
127
128    /// Builds the schema.
129    pub fn build(self) -> Result<Schema> {
130        let field_id_to_accessor = self.build_accessors();
131
132        let r#struct = StructType::new(self.fields);
133        let id_to_field = index_by_id(&r#struct)?;
134
135        Self::validate_identifier_ids(
136            &r#struct,
137            &id_to_field,
138            self.identifier_field_ids.iter().copied(),
139        )?;
140
141        let (name_to_id, id_to_name) = {
142            let mut index = IndexByName::default();
143            visit_struct(&r#struct, &mut index)?;
144            index.indexes()
145        };
146
147        let lowercase_name_to_id = name_to_id
148            .iter()
149            .map(|(k, v)| (k.to_lowercase(), *v))
150            .collect();
151
152        let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
153
154        let mut schema = Schema {
155            r#struct,
156            schema_id: self.schema_id,
157            highest_field_id,
158            identifier_field_ids: self.identifier_field_ids,
159            alias_to_id: self.alias_to_id,
160            id_to_field,
161
162            name_to_id,
163            lowercase_name_to_id,
164            id_to_name,
165
166            field_id_to_accessor,
167        };
168
169        if let Some(start_from) = self.reassign_field_ids_from {
170            let mut id_reassigner = ReassignFieldIds::new(start_from);
171            let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
172            let new_identifier_field_ids =
173                id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
174            let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
175
176            schema = Schema::builder()
177                .with_schema_id(schema.schema_id)
178                .with_fields(new_fields)
179                .with_identifier_field_ids(new_identifier_field_ids)
180                .with_alias(new_alias_to_id)
181                .build()?;
182        }
183
184        Ok(schema)
185    }
186
187    fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
188        let mut map = HashMap::new();
189
190        for (pos, field) in self.fields.iter().enumerate() {
191            match field.field_type.as_ref() {
192                Type::Primitive(prim_type) => {
193                    // add an accessor for this field
194                    let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
195                    map.insert(field.id, accessor.clone());
196                }
197
198                Type::Struct(nested) => {
199                    // add accessors for nested fields
200                    for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
201                        let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
202                        map.insert(field_id, new_accessor.clone());
203                    }
204                }
205                _ => {
206                    // Accessors don't get built for Map or List types
207                }
208            }
209        }
210
211        map
212    }
213
214    fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
215        let mut results = vec![];
216        for (pos, field) in fields.iter().enumerate() {
217            match field.field_type.as_ref() {
218                Type::Primitive(prim_type) => {
219                    let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
220                    results.push((field.id, accessor));
221                }
222                Type::Struct(nested) => {
223                    let nested_accessors = Self::build_accessors_nested(nested.fields());
224
225                    let wrapped_nested_accessors =
226                        nested_accessors.into_iter().map(|(id, accessor)| {
227                            let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
228                            (id, new_accessor.clone())
229                        });
230
231                    results.extend(wrapped_nested_accessors);
232                }
233                _ => {
234                    // Accessors don't get built for Map or List types
235                }
236            }
237        }
238
239        results
240    }
241
242    /// According to [the spec](https://iceberg.apache.org/spec/#identifier-fields), the identifier fields
243    /// must meet the following requirements:
244    /// - Float, double, and optional fields cannot be used as identifier fields.
245    /// - Identifier fields may be nested in structs but cannot be nested within maps or lists.
246    /// - A nested field cannot be used as an identifier field if it is nested in an optional struct, to avoid null values in identifiers.
247    fn validate_identifier_ids(
248        r#struct: &StructType,
249        id_to_field: &HashMap<i32, NestedFieldRef>,
250        identifier_field_ids: impl Iterator<Item = i32>,
251    ) -> Result<()> {
252        let id_to_parent = index_parents(r#struct)?;
253        for identifier_field_id in identifier_field_ids {
254            let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
255                Error::new(
256                    ErrorKind::DataInvalid,
257                    format!(
258                        "Cannot add identifier field {identifier_field_id}: field does not exist"
259                    ),
260                )
261            })?;
262            ensure_data_valid!(
263                field.required,
264                "Cannot add identifier field: {} is an optional field",
265                field.name
266            );
267            if let Type::Primitive(p) = field.field_type.as_ref() {
268                ensure_data_valid!(
269                    !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
270                    "Cannot add identifier field {}: cannot be a float or double type",
271                    field.name
272                );
273            } else {
274                return Err(Error::new(
275                    ErrorKind::DataInvalid,
276                    format!(
277                        "Cannot add field {} as an identifier field: not a primitive type field",
278                        field.name
279                    ),
280                ));
281            }
282
283            let mut cur_field_id = identifier_field_id;
284            while let Some(parent) = id_to_parent.get(&cur_field_id) {
285                let parent_field = id_to_field
286                    .get(parent)
287                    .expect("Field id should not disappear.");
288                ensure_data_valid!(
289                    parent_field.field_type.is_struct(),
290                    "Cannot add field {} as an identifier field: must not be nested in {:?}",
291                    field.name,
292                    parent_field
293                );
294                ensure_data_valid!(
295                    parent_field.required,
296                    "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
297                    field.name,
298                    parent_field
299                );
300                cur_field_id = *parent;
301            }
302        }
303
304        Ok(())
305    }
306}
307
308impl Schema {
309    /// Create a schema builder.
310    pub fn builder() -> SchemaBuilder {
311        SchemaBuilder {
312            schema_id: DEFAULT_SCHEMA_ID,
313            fields: vec![],
314            identifier_field_ids: HashSet::default(),
315            alias_to_id: BiHashMap::default(),
316            reassign_field_ids_from: None,
317        }
318    }
319
320    /// Create a new schema builder from a schema.
321    pub fn into_builder(self) -> SchemaBuilder {
322        SchemaBuilder {
323            schema_id: self.schema_id,
324            fields: self.r#struct.fields().to_vec(),
325            alias_to_id: self.alias_to_id,
326            identifier_field_ids: self.identifier_field_ids,
327            reassign_field_ids_from: None,
328        }
329    }
330
331    /// Get field by field id.
332    pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
333        self.id_to_field.get(&field_id)
334    }
335
336    /// Get field by field name.
337    ///
338    /// Both full name and short name could work here.
339    pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
340        self.name_to_id
341            .get(field_name)
342            .and_then(|id| self.field_by_id(*id))
343    }
344
345    /// Get field by field name, but in case-insensitive way.
346    ///
347    /// Both full name and short name could work here.
348    pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
349        self.lowercase_name_to_id
350            .get(&field_name.to_lowercase())
351            .and_then(|id| self.field_by_id(*id))
352    }
353
354    /// Get field by alias.
355    pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
356        self.alias_to_id
357            .get_by_left(alias)
358            .and_then(|id| self.field_by_id(*id))
359    }
360
361    /// Returns [`highest_field_id`].
362    #[inline]
363    pub fn highest_field_id(&self) -> i32 {
364        self.highest_field_id
365    }
366
367    /// Returns [`schema_id`].
368    #[inline]
369    pub fn schema_id(&self) -> SchemaId {
370        self.schema_id
371    }
372
373    /// Returns [`r#struct`].
374    #[inline]
375    pub fn as_struct(&self) -> &StructType {
376        &self.r#struct
377    }
378
379    /// Returns [`identifier_field_ids`].
380    #[inline]
381    pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
382        self.identifier_field_ids.iter().copied()
383    }
384
385    /// Get field id by full name.
386    pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
387        self.name_to_id.get(name).copied()
388    }
389
390    /// Get full name by field id.
391    pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
392        self.id_to_name.get(&field_id).map(String::as_str)
393    }
394
395    /// Get an accessor for retrieving data in a struct
396    pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
397        self.field_id_to_accessor.get(&field_id).cloned()
398    }
399
400    /// Check if this schema is identical to another schema semantically - excluding schema id.
401    pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
402        self.as_struct().eq(other.as_struct())
403            && self.identifier_field_ids().eq(other.identifier_field_ids())
404    }
405
406    /// Change the schema id of this schema.
407    // This is redundant with the `with_schema_id` method on the builder, but useful
408    // as it is infallible in contrast to the builder `build()` method.
409    pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
410        Self { schema_id, ..self }
411    }
412
413    /// Return A HashMap matching field ids to field names.
414    pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
415        &self.id_to_name
416    }
417
418    /// Return a hashmap matching field ids to nested fields.
419    pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
420        &self.id_to_field
421    }
422}
423
424impl Display for Schema {
425    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426        writeln!(f, "table {{")?;
427        for field in self.as_struct().fields() {
428            writeln!(f, "  {field}")?;
429        }
430        writeln!(f, "}}")
431    }
432}
433
434#[cfg(test)]
435mod tests {
436    use std::collections::HashMap;
437
438    use bimap::BiHashMap;
439
440    use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
441    use crate::spec::datatypes::{
442        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
443    };
444    use crate::spec::schema::Schema;
445    use crate::spec::values::Map as MapValue;
446    use crate::spec::{Datum, Literal};
447
448    #[test]
449    fn test_construct_schema() {
450        let field1: NestedFieldRef =
451            NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
452        let field2: NestedFieldRef =
453            NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
454
455        let schema = Schema::builder()
456            .with_fields(vec![field1.clone()])
457            .with_fields(vec![field2.clone()])
458            .with_schema_id(3)
459            .build()
460            .unwrap();
461
462        assert_eq!(3, schema.schema_id());
463        assert_eq!(2, schema.highest_field_id());
464        assert_eq!(Some(&field1), schema.field_by_id(1));
465        assert_eq!(Some(&field2), schema.field_by_id(2));
466        assert_eq!(None, schema.field_by_id(3));
467    }
468
469    pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
470        let schema = Schema::builder()
471            .with_schema_id(1)
472            .with_identifier_field_ids(vec![2])
473            .with_fields(vec![
474                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
475                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
476                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
477            ])
478            .build()
479            .unwrap();
480        let record = r#"{
481            "type":"struct",
482            "schema-id":1,
483            "fields":[
484                {
485                    "id":1,
486                    "name":"foo",
487                    "required":false,
488                    "type":"string"
489                },
490                {
491                    "id":2,
492                    "name":"bar",
493                    "required":true,
494                    "type":"int"
495                },
496                {
497                    "id":3,
498                    "name":"baz",
499                    "required":false,
500                    "type":"boolean"
501                }
502            ],
503            "identifier-field-ids":[2]
504        }"#;
505        (schema, record)
506    }
507
508    pub fn table_schema_nested() -> Schema {
509        Schema::builder()
510            .with_schema_id(1)
511            .with_identifier_field_ids(vec![2])
512            .with_fields(vec![
513                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
514                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
515                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
516                NestedField::required(
517                    4,
518                    "qux",
519                    Type::List(ListType {
520                        element_field: NestedField::list_element(
521                            5,
522                            Type::Primitive(PrimitiveType::String),
523                            true,
524                        )
525                        .into(),
526                    }),
527                )
528                .into(),
529                NestedField::required(
530                    6,
531                    "quux",
532                    Type::Map(MapType {
533                        key_field: NestedField::map_key_element(
534                            7,
535                            Type::Primitive(PrimitiveType::String),
536                        )
537                        .into(),
538                        value_field: NestedField::map_value_element(
539                            8,
540                            Type::Map(MapType {
541                                key_field: NestedField::map_key_element(
542                                    9,
543                                    Type::Primitive(PrimitiveType::String),
544                                )
545                                .into(),
546                                value_field: NestedField::map_value_element(
547                                    10,
548                                    Type::Primitive(PrimitiveType::Int),
549                                    true,
550                                )
551                                .into(),
552                            }),
553                            true,
554                        )
555                        .into(),
556                    }),
557                )
558                .into(),
559                NestedField::required(
560                    11,
561                    "location",
562                    Type::List(ListType {
563                        element_field: NestedField::list_element(
564                            12,
565                            Type::Struct(StructType::new(vec![
566                                NestedField::optional(
567                                    13,
568                                    "latitude",
569                                    Type::Primitive(PrimitiveType::Float),
570                                )
571                                .into(),
572                                NestedField::optional(
573                                    14,
574                                    "longitude",
575                                    Type::Primitive(PrimitiveType::Float),
576                                )
577                                .into(),
578                            ])),
579                            true,
580                        )
581                        .into(),
582                    }),
583                )
584                .into(),
585                NestedField::optional(
586                    15,
587                    "person",
588                    Type::Struct(StructType::new(vec![
589                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
590                            .into(),
591                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
592                            .into(),
593                    ])),
594                )
595                .into(),
596            ])
597            .build()
598            .unwrap()
599    }
600
601    #[test]
602    fn test_schema_display() {
603        let expected_str = "
604table {
605  1: foo: optional string\x20
606  2: bar: required int\x20
607  3: baz: optional boolean\x20
608}
609";
610
611        assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
612    }
613
614    #[test]
615    fn test_schema_build_failed_on_duplicate_names() {
616        let ret = Schema::builder()
617            .with_schema_id(1)
618            .with_identifier_field_ids(vec![1])
619            .with_fields(vec![
620                NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
621                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
622                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
623                NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
624            ])
625            .build();
626
627        assert!(
628            ret.unwrap_err()
629                .message()
630                .contains("Invalid schema: multiple fields for name baz")
631        );
632    }
633
634    #[test]
635    fn test_schema_into_builder() {
636        let original_schema = table_schema_nested();
637        let builder = original_schema.clone().into_builder();
638        let schema = builder.build().unwrap();
639
640        assert_eq!(original_schema, schema);
641    }
642
643    #[test]
644    fn test_schema_index_by_name() {
645        let expected_name_to_id = HashMap::from(
646            [
647                ("foo", 1),
648                ("bar", 2),
649                ("baz", 3),
650                ("qux", 4),
651                ("qux.element", 5),
652                ("quux", 6),
653                ("quux.key", 7),
654                ("quux.value", 8),
655                ("quux.value.key", 9),
656                ("quux.value.value", 10),
657                ("location", 11),
658                ("location.element", 12),
659                ("location.element.latitude", 13),
660                ("location.element.longitude", 14),
661                ("location.latitude", 13),
662                ("location.longitude", 14),
663                ("person", 15),
664                ("person.name", 16),
665                ("person.age", 17),
666            ]
667            .map(|e| (e.0.to_string(), e.1)),
668        );
669
670        let schema = table_schema_nested();
671        assert_eq!(&expected_name_to_id, &schema.name_to_id);
672    }
673
674    #[test]
675    fn test_schema_index_by_name_case_insensitive() {
676        let expected_name_to_id = HashMap::from(
677            [
678                ("fOo", 1),
679                ("Bar", 2),
680                ("BAz", 3),
681                ("quX", 4),
682                ("quX.ELEment", 5),
683                ("qUUx", 6),
684                ("QUUX.KEY", 7),
685                ("QUUX.Value", 8),
686                ("qUUX.VALUE.Key", 9),
687                ("qUux.VaLue.Value", 10),
688                ("lOCAtION", 11),
689                ("LOCAtioN.ELeMENt", 12),
690                ("LoCATion.element.LATitude", 13),
691                ("locatION.ElemeNT.LONgitude", 14),
692                ("LOCAtiON.LATITUDE", 13),
693                ("LOCATION.LONGITUDE", 14),
694                ("PERSon", 15),
695                ("PERSON.Name", 16),
696                ("peRSON.AGe", 17),
697            ]
698            .map(|e| (e.0.to_string(), e.1)),
699        );
700
701        let schema = table_schema_nested();
702        for (name, id) in expected_name_to_id {
703            assert_eq!(
704                Some(id),
705                schema.field_by_name_case_insensitive(&name).map(|f| f.id)
706            );
707        }
708    }
709
710    #[test]
711    fn test_schema_find_column_name() {
712        let expected_column_name = HashMap::from([
713            (1, "foo"),
714            (2, "bar"),
715            (3, "baz"),
716            (4, "qux"),
717            (5, "qux.element"),
718            (6, "quux"),
719            (7, "quux.key"),
720            (8, "quux.value"),
721            (9, "quux.value.key"),
722            (10, "quux.value.value"),
723            (11, "location"),
724            (12, "location.element"),
725            (13, "location.element.latitude"),
726            (14, "location.element.longitude"),
727        ]);
728
729        let schema = table_schema_nested();
730        for (id, name) in expected_column_name {
731            assert_eq!(
732                Some(name),
733                schema.name_by_field_id(id),
734                "Column name for field id {id} not match."
735            );
736        }
737    }
738
739    #[test]
740    fn test_schema_find_column_name_not_found() {
741        let schema = table_schema_nested();
742
743        assert!(schema.name_by_field_id(99).is_none());
744    }
745
746    #[test]
747    fn test_schema_find_column_name_by_id_simple() {
748        let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
749
750        let schema = table_schema_simple().0;
751
752        for (id, name) in expected_id_to_name {
753            assert_eq!(
754                Some(name),
755                schema.name_by_field_id(id),
756                "Column name for field id {id} not match."
757            );
758        }
759    }
760
761    #[test]
762    fn test_schema_find_simple() {
763        let schema = table_schema_simple().0;
764
765        assert_eq!(
766            Some(schema.r#struct.fields()[0].clone()),
767            schema.field_by_id(1).cloned()
768        );
769        assert_eq!(
770            Some(schema.r#struct.fields()[1].clone()),
771            schema.field_by_id(2).cloned()
772        );
773        assert_eq!(
774            Some(schema.r#struct.fields()[2].clone()),
775            schema.field_by_id(3).cloned()
776        );
777
778        assert!(schema.field_by_id(4).is_none());
779        assert!(schema.field_by_name("non exist").is_none());
780    }
781
782    #[test]
783    fn test_schema_find_nested() {
784        let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
785            (
786                1,
787                NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
788            ),
789            (
790                2,
791                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
792            ),
793            (
794                3,
795                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
796            ),
797            (
798                4,
799                NestedField::required(
800                    4,
801                    "qux",
802                    Type::List(ListType {
803                        element_field: NestedField::list_element(
804                            5,
805                            Type::Primitive(PrimitiveType::String),
806                            true,
807                        )
808                        .into(),
809                    }),
810                ),
811            ),
812            (
813                5,
814                NestedField::required(5, "element", Primitive(PrimitiveType::String)),
815            ),
816            (
817                6,
818                NestedField::required(
819                    6,
820                    "quux",
821                    Map(MapType {
822                        key_field: NestedField::map_key_element(
823                            7,
824                            Primitive(PrimitiveType::String),
825                        )
826                        .into(),
827                        value_field: NestedField::map_value_element(
828                            8,
829                            Map(MapType {
830                                key_field: NestedField::map_key_element(
831                                    9,
832                                    Primitive(PrimitiveType::String),
833                                )
834                                .into(),
835                                value_field: NestedField::map_value_element(
836                                    10,
837                                    Primitive(PrimitiveType::Int),
838                                    true,
839                                )
840                                .into(),
841                            }),
842                            true,
843                        )
844                        .into(),
845                    }),
846                ),
847            ),
848            (
849                7,
850                NestedField::required(7, "key", Primitive(PrimitiveType::String)),
851            ),
852            (
853                8,
854                NestedField::required(
855                    8,
856                    "value",
857                    Map(MapType {
858                        key_field: NestedField::map_key_element(
859                            9,
860                            Primitive(PrimitiveType::String),
861                        )
862                        .into(),
863                        value_field: NestedField::map_value_element(
864                            10,
865                            Primitive(PrimitiveType::Int),
866                            true,
867                        )
868                        .into(),
869                    }),
870                ),
871            ),
872            (
873                9,
874                NestedField::required(9, "key", Primitive(PrimitiveType::String)),
875            ),
876            (
877                10,
878                NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
879            ),
880            (
881                11,
882                NestedField::required(
883                    11,
884                    "location",
885                    List(ListType {
886                        element_field: NestedField::list_element(
887                            12,
888                            Struct(StructType::new(vec![
889                                NestedField::optional(
890                                    13,
891                                    "latitude",
892                                    Primitive(PrimitiveType::Float),
893                                )
894                                .into(),
895                                NestedField::optional(
896                                    14,
897                                    "longitude",
898                                    Primitive(PrimitiveType::Float),
899                                )
900                                .into(),
901                            ])),
902                            true,
903                        )
904                        .into(),
905                    }),
906                ),
907            ),
908            (
909                12,
910                NestedField::list_element(
911                    12,
912                    Struct(StructType::new(vec![
913                        NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
914                            .into(),
915                        NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
916                            .into(),
917                    ])),
918                    true,
919                ),
920            ),
921            (
922                13,
923                NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
924            ),
925            (
926                14,
927                NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
928            ),
929            (
930                15,
931                NestedField::optional(
932                    15,
933                    "person",
934                    Type::Struct(StructType::new(vec![
935                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
936                            .into(),
937                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
938                            .into(),
939                    ])),
940                ),
941            ),
942            (
943                16,
944                NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
945            ),
946            (
947                17,
948                NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
949            ),
950        ]);
951
952        let schema = table_schema_nested();
953        for (id, field) in expected_id_to_field {
954            assert_eq!(
955                Some(&field),
956                schema.field_by_id(id).map(|f| f.as_ref()),
957                "Field for {id} not match."
958            );
959        }
960    }
961
962    #[test]
963    fn test_build_accessors() {
964        let schema = table_schema_nested();
965
966        let test_struct = crate::spec::Struct::from_iter(vec![
967            Some(Literal::string("foo value")),
968            Some(Literal::int(1002)),
969            Some(Literal::bool(true)),
970            Some(Literal::List(vec![
971                Some(Literal::string("qux item 1")),
972                Some(Literal::string("qux item 2")),
973            ])),
974            Some(Literal::Map(MapValue::from([(
975                Literal::string("quux key 1"),
976                Some(Literal::Map(MapValue::from([(
977                    Literal::string("quux nested key 1"),
978                    Some(Literal::int(1000)),
979                )]))),
980            )]))),
981            Some(Literal::List(vec![Some(Literal::Struct(
982                crate::spec::Struct::from_iter(vec![
983                    Some(Literal::float(52.509_09)),
984                    Some(Literal::float(-1.885_249)),
985                ]),
986            ))])),
987            Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
988                Some(Literal::string("Testy McTest")),
989                Some(Literal::int(33)),
990            ]))),
991        ]);
992
993        assert_eq!(
994            schema
995                .accessor_by_field_id(1)
996                .unwrap()
997                .get(&test_struct)
998                .unwrap(),
999            Some(Datum::string("foo value"))
1000        );
1001        assert_eq!(
1002            schema
1003                .accessor_by_field_id(2)
1004                .unwrap()
1005                .get(&test_struct)
1006                .unwrap(),
1007            Some(Datum::int(1002))
1008        );
1009        assert_eq!(
1010            schema
1011                .accessor_by_field_id(3)
1012                .unwrap()
1013                .get(&test_struct)
1014                .unwrap(),
1015            Some(Datum::bool(true))
1016        );
1017        assert_eq!(
1018            schema
1019                .accessor_by_field_id(16)
1020                .unwrap()
1021                .get(&test_struct)
1022                .unwrap(),
1023            Some(Datum::string("Testy McTest"))
1024        );
1025        assert_eq!(
1026            schema
1027                .accessor_by_field_id(17)
1028                .unwrap()
1029                .get(&test_struct)
1030                .unwrap(),
1031            Some(Datum::int(33))
1032        );
1033    }
1034
1035    #[test]
1036    fn test_highest_field_id() {
1037        let schema = table_schema_nested();
1038        assert_eq!(17, schema.highest_field_id());
1039
1040        let schema = table_schema_simple().0;
1041        assert_eq!(3, schema.highest_field_id());
1042    }
1043
1044    #[test]
1045    fn test_highest_field_id_no_fields() {
1046        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1047        assert_eq!(0, schema.highest_field_id());
1048    }
1049
1050    #[test]
1051    fn test_field_ids_must_be_unique() {
1052        let reassigned_schema = Schema::builder()
1053            .with_schema_id(1)
1054            .with_identifier_field_ids(vec![5])
1055            .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1056            .with_fields(vec![
1057                NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1058                NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1059                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1060            ])
1061            .build()
1062            .unwrap_err();
1063
1064        assert!(reassigned_schema.message().contains("'field.id' 3"));
1065    }
1066
1067    #[test]
1068    fn test_reassign_ids_empty_schema() {
1069        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1070        let reassigned_schema = schema
1071            .clone()
1072            .into_builder()
1073            .with_reassigned_field_ids(0)
1074            .build()
1075            .unwrap();
1076
1077        assert_eq!(schema, reassigned_schema);
1078        assert_eq!(schema.highest_field_id(), 0);
1079    }
1080
1081    #[test]
1082    fn test_identifier_field_ids() {
1083        // field in map
1084        assert!(
1085            Schema::builder()
1086                .with_schema_id(1)
1087                .with_identifier_field_ids(vec![2])
1088                .with_fields(vec![
1089                    NestedField::required(
1090                        1,
1091                        "Map",
1092                        Type::Map(MapType::new(
1093                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1094                                .into(),
1095                            NestedField::map_value_element(
1096                                3,
1097                                Type::Primitive(PrimitiveType::Boolean),
1098                                true,
1099                            )
1100                            .into(),
1101                        )),
1102                    )
1103                    .into()
1104                ])
1105                .build()
1106                .is_err()
1107        );
1108        assert!(
1109            Schema::builder()
1110                .with_schema_id(1)
1111                .with_identifier_field_ids(vec![3])
1112                .with_fields(vec![
1113                    NestedField::required(
1114                        1,
1115                        "Map",
1116                        Type::Map(MapType::new(
1117                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1118                                .into(),
1119                            NestedField::map_value_element(
1120                                3,
1121                                Type::Primitive(PrimitiveType::Boolean),
1122                                true,
1123                            )
1124                            .into(),
1125                        )),
1126                    )
1127                    .into()
1128                ])
1129                .build()
1130                .is_err()
1131        );
1132
1133        // field in list
1134        assert!(
1135            Schema::builder()
1136                .with_schema_id(1)
1137                .with_identifier_field_ids(vec![2])
1138                .with_fields(vec![
1139                    NestedField::required(
1140                        1,
1141                        "List",
1142                        Type::List(ListType::new(
1143                            NestedField::list_element(
1144                                2,
1145                                Type::Primitive(PrimitiveType::String),
1146                                true
1147                            )
1148                            .into(),
1149                        )),
1150                    )
1151                    .into()
1152                ])
1153                .build()
1154                .is_err()
1155        );
1156
1157        // field in optional struct
1158        assert!(
1159            Schema::builder()
1160                .with_schema_id(1)
1161                .with_identifier_field_ids(vec![2])
1162                .with_fields(vec![
1163                    NestedField::optional(
1164                        1,
1165                        "Struct",
1166                        Type::Struct(StructType::new(vec![
1167                            NestedField::required(
1168                                2,
1169                                "name",
1170                                Type::Primitive(PrimitiveType::String)
1171                            )
1172                            .into(),
1173                            NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1174                                .into(),
1175                        ])),
1176                    )
1177                    .into()
1178                ])
1179                .build()
1180                .is_err()
1181        );
1182
1183        // float and double
1184        assert!(
1185            Schema::builder()
1186                .with_schema_id(1)
1187                .with_identifier_field_ids(vec![1])
1188                .with_fields(vec![
1189                    NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1190                        .into()
1191                ])
1192                .build()
1193                .is_err()
1194        );
1195        assert!(
1196            Schema::builder()
1197                .with_schema_id(1)
1198                .with_identifier_field_ids(vec![1])
1199                .with_fields(vec![
1200                    NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1201                        .into()
1202                ])
1203                .build()
1204                .is_err()
1205        );
1206
1207        // optional field
1208        assert!(
1209            Schema::builder()
1210                .with_schema_id(1)
1211                .with_identifier_field_ids(vec![1])
1212                .with_fields(vec![
1213                    NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1214                        .into()
1215                ])
1216                .build()
1217                .is_ok()
1218        );
1219        assert!(
1220            Schema::builder()
1221                .with_schema_id(1)
1222                .with_identifier_field_ids(vec![1])
1223                .with_fields(vec![
1224                    NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1225                        .into()
1226                ])
1227                .build()
1228                .is_err()
1229        );
1230    }
1231}