iceberg/spec/schema/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! This module defines schema in iceberg.
19
20use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43    LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44    PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48/// Type alias for schema id.
49pub type SchemaId = i32;
50/// Reference to [`Schema`].
51pub type SchemaRef = Arc<Schema>;
52/// Default schema id.
53pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54/// Delimiter for schema name, which denotes a nested struct.
55pub const SCHEMA_NAME_DELIMITER: &str = ".";
56
57/// Defines schema in iceberg.
58#[derive(Debug, Serialize, Deserialize, Clone)]
59#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
60pub struct Schema {
61    r#struct: StructType,
62    schema_id: SchemaId,
63    highest_field_id: i32,
64    identifier_field_ids: HashSet<i32>,
65
66    alias_to_id: BiHashMap<String, i32>,
67    id_to_field: HashMap<i32, NestedFieldRef>,
68
69    name_to_id: HashMap<String, i32>,
70    lowercase_name_to_id: HashMap<String, i32>,
71    id_to_name: HashMap<i32, String>,
72
73    field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
74}
75
76impl PartialEq for Schema {
77    fn eq(&self, other: &Self) -> bool {
78        self.r#struct == other.r#struct
79            && self.schema_id == other.schema_id
80            && self.identifier_field_ids == other.identifier_field_ids
81    }
82}
83
84impl Eq for Schema {}
85
86/// Schema builder.
87#[derive(Debug)]
88pub struct SchemaBuilder {
89    schema_id: i32,
90    fields: Vec<NestedFieldRef>,
91    alias_to_id: BiHashMap<String, i32>,
92    identifier_field_ids: HashSet<i32>,
93    reassign_field_ids_from: Option<i32>,
94}
95
96impl SchemaBuilder {
97    /// Add fields to schema builder.
98    pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
99        self.fields.extend(fields);
100        self
101    }
102
103    /// Reassign all field-ids (including nested) on build.
104    /// Reassignment starts from the field-id specified in `start_from` (inclusive).
105    ///
106    /// All specified aliases and identifier fields will be updated to the new field-ids.
107    pub(crate) fn with_reassigned_field_ids(mut self, start_from: i32) -> Self {
108        self.reassign_field_ids_from = Some(start_from);
109        self
110    }
111
112    /// Set schema id.
113    pub fn with_schema_id(mut self, schema_id: i32) -> Self {
114        self.schema_id = schema_id;
115        self
116    }
117
118    /// Set identifier field ids.
119    pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
120        self.identifier_field_ids.extend(ids);
121        self
122    }
123
124    /// Set alias to filed id mapping.
125    pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
126        self.alias_to_id = alias_to_id;
127        self
128    }
129
130    /// Builds the schema.
131    pub fn build(self) -> Result<Schema> {
132        let field_id_to_accessor = self.build_accessors();
133
134        let r#struct = StructType::new(self.fields);
135        let id_to_field = index_by_id(&r#struct)?;
136
137        Self::validate_identifier_ids(
138            &r#struct,
139            &id_to_field,
140            self.identifier_field_ids.iter().copied(),
141        )?;
142
143        let (name_to_id, id_to_name) = {
144            let mut index = IndexByName::default();
145            visit_struct(&r#struct, &mut index)?;
146            index.indexes()
147        };
148
149        let lowercase_name_to_id = name_to_id
150            .iter()
151            .map(|(k, v)| (k.to_lowercase(), *v))
152            .collect();
153
154        let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
155
156        let mut schema = Schema {
157            r#struct,
158            schema_id: self.schema_id,
159            highest_field_id,
160            identifier_field_ids: self.identifier_field_ids,
161            alias_to_id: self.alias_to_id,
162            id_to_field,
163
164            name_to_id,
165            lowercase_name_to_id,
166            id_to_name,
167
168            field_id_to_accessor,
169        };
170
171        if let Some(start_from) = self.reassign_field_ids_from {
172            let mut id_reassigner = ReassignFieldIds::new(start_from);
173            let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
174            let new_identifier_field_ids =
175                id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
176            let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
177
178            schema = Schema::builder()
179                .with_schema_id(schema.schema_id)
180                .with_fields(new_fields)
181                .with_identifier_field_ids(new_identifier_field_ids)
182                .with_alias(new_alias_to_id)
183                .build()?;
184        }
185
186        Ok(schema)
187    }
188
189    fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
190        let mut map = HashMap::new();
191
192        for (pos, field) in self.fields.iter().enumerate() {
193            match field.field_type.as_ref() {
194                Type::Primitive(prim_type) => {
195                    // add an accessor for this field
196                    let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
197                    map.insert(field.id, accessor.clone());
198                }
199
200                Type::Struct(nested) => {
201                    // add accessors for nested fields
202                    for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
203                        let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
204                        map.insert(field_id, new_accessor.clone());
205                    }
206                }
207                _ => {
208                    // Accessors don't get built for Map or List types
209                }
210            }
211        }
212
213        map
214    }
215
216    fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
217        let mut results = vec![];
218        for (pos, field) in fields.iter().enumerate() {
219            match field.field_type.as_ref() {
220                Type::Primitive(prim_type) => {
221                    let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
222                    results.push((field.id, accessor));
223                }
224                Type::Struct(nested) => {
225                    let nested_accessors = Self::build_accessors_nested(nested.fields());
226
227                    let wrapped_nested_accessors =
228                        nested_accessors.into_iter().map(|(id, accessor)| {
229                            let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
230                            (id, new_accessor.clone())
231                        });
232
233                    results.extend(wrapped_nested_accessors);
234                }
235                _ => {
236                    // Accessors don't get built for Map or List types
237                }
238            }
239        }
240
241        results
242    }
243
244    /// According to [the spec](https://iceberg.apache.org/spec/#identifier-fields), the identifier fields
245    /// must meet the following requirements:
246    /// - Float, double, and optional fields cannot be used as identifier fields.
247    /// - Identifier fields may be nested in structs but cannot be nested within maps or lists.
248    /// - A nested field cannot be used as an identifier field if it is nested in an optional struct, to avoid null values in identifiers.
249    fn validate_identifier_ids(
250        r#struct: &StructType,
251        id_to_field: &HashMap<i32, NestedFieldRef>,
252        identifier_field_ids: impl Iterator<Item = i32>,
253    ) -> Result<()> {
254        let id_to_parent = index_parents(r#struct)?;
255        for identifier_field_id in identifier_field_ids {
256            let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
257                Error::new(
258                    ErrorKind::DataInvalid,
259                    format!(
260                        "Cannot add identifier field {identifier_field_id}: field does not exist"
261                    ),
262                )
263            })?;
264            ensure_data_valid!(
265                field.required,
266                "Cannot add identifier field: {} is an optional field",
267                field.name
268            );
269            if let Type::Primitive(p) = field.field_type.as_ref() {
270                ensure_data_valid!(
271                    !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
272                    "Cannot add identifier field {}: cannot be a float or double type",
273                    field.name
274                );
275            } else {
276                return Err(Error::new(
277                    ErrorKind::DataInvalid,
278                    format!(
279                        "Cannot add field {} as an identifier field: not a primitive type field",
280                        field.name
281                    ),
282                ));
283            }
284
285            let mut cur_field_id = identifier_field_id;
286            while let Some(parent) = id_to_parent.get(&cur_field_id) {
287                let parent_field = id_to_field
288                    .get(parent)
289                    .expect("Field id should not disappear.");
290                ensure_data_valid!(
291                    parent_field.field_type.is_struct(),
292                    "Cannot add field {} as an identifier field: must not be nested in {:?}",
293                    field.name,
294                    parent_field
295                );
296                ensure_data_valid!(
297                    parent_field.required,
298                    "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
299                    field.name,
300                    parent_field
301                );
302                cur_field_id = *parent;
303            }
304        }
305
306        Ok(())
307    }
308}
309
310impl Schema {
311    /// Create a schema builder.
312    pub fn builder() -> SchemaBuilder {
313        SchemaBuilder {
314            schema_id: DEFAULT_SCHEMA_ID,
315            fields: vec![],
316            identifier_field_ids: HashSet::default(),
317            alias_to_id: BiHashMap::default(),
318            reassign_field_ids_from: None,
319        }
320    }
321
322    /// Create a new schema builder from a schema.
323    pub fn into_builder(self) -> SchemaBuilder {
324        SchemaBuilder {
325            schema_id: self.schema_id,
326            fields: self.r#struct.fields().to_vec(),
327            alias_to_id: self.alias_to_id,
328            identifier_field_ids: self.identifier_field_ids,
329            reassign_field_ids_from: None,
330        }
331    }
332
333    /// Get field by field id.
334    pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
335        self.id_to_field.get(&field_id)
336    }
337
338    /// Get field by field name.
339    ///
340    /// Both full name and short name could work here.
341    pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
342        self.name_to_id
343            .get(field_name)
344            .and_then(|id| self.field_by_id(*id))
345    }
346
347    /// Get field by field name, but in case-insensitive way.
348    ///
349    /// Both full name and short name could work here.
350    pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
351        self.lowercase_name_to_id
352            .get(&field_name.to_lowercase())
353            .and_then(|id| self.field_by_id(*id))
354    }
355
356    /// Get field by alias.
357    pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
358        self.alias_to_id
359            .get_by_left(alias)
360            .and_then(|id| self.field_by_id(*id))
361    }
362
363    /// Returns [`highest_field_id`].
364    #[inline]
365    pub fn highest_field_id(&self) -> i32 {
366        self.highest_field_id
367    }
368
369    /// Returns [`schema_id`].
370    #[inline]
371    pub fn schema_id(&self) -> SchemaId {
372        self.schema_id
373    }
374
375    /// Returns [`r#struct`].
376    #[inline]
377    pub fn as_struct(&self) -> &StructType {
378        &self.r#struct
379    }
380
381    /// Returns [`identifier_field_ids`].
382    #[inline]
383    pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
384        self.identifier_field_ids.iter().copied()
385    }
386
387    /// Get field id by full name.
388    pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
389        self.name_to_id.get(name).copied()
390    }
391
392    /// Get full name by field id.
393    pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
394        self.id_to_name.get(&field_id).map(String::as_str)
395    }
396
397    /// Get an accessor for retrieving data in a struct
398    pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
399        self.field_id_to_accessor.get(&field_id).cloned()
400    }
401
402    /// Check if this schema is identical to another schema semantically - excluding schema id.
403    pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
404        self.as_struct().eq(other.as_struct())
405            && self.identifier_field_ids().eq(other.identifier_field_ids())
406    }
407
408    /// Change the schema id of this schema.
409    // This is redundant with the `with_schema_id` method on the builder, but useful
410    // as it is infallible in contrast to the builder `build()` method.
411    pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
412        Self { schema_id, ..self }
413    }
414
415    /// Return A HashMap matching field ids to field names.
416    pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
417        &self.id_to_name
418    }
419
420    /// Return a hashmap matching field ids to nested fields.
421    pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
422        &self.id_to_field
423    }
424}
425
426impl Display for Schema {
427    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
428        writeln!(f, "table {{")?;
429        for field in self.as_struct().fields() {
430            writeln!(f, "  {field}")?;
431        }
432        writeln!(f, "}}")
433    }
434}
435
436#[cfg(test)]
437mod tests {
438    use std::collections::HashMap;
439
440    use bimap::BiHashMap;
441
442    use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
443    use crate::spec::datatypes::{
444        ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
445    };
446    use crate::spec::schema::Schema;
447    use crate::spec::values::Map as MapValue;
448    use crate::spec::{Datum, Literal};
449
450    #[test]
451    fn test_construct_schema() {
452        let field1: NestedFieldRef =
453            NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
454        let field2: NestedFieldRef =
455            NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
456
457        let schema = Schema::builder()
458            .with_fields(vec![field1.clone()])
459            .with_fields(vec![field2.clone()])
460            .with_schema_id(3)
461            .build()
462            .unwrap();
463
464        assert_eq!(3, schema.schema_id());
465        assert_eq!(2, schema.highest_field_id());
466        assert_eq!(Some(&field1), schema.field_by_id(1));
467        assert_eq!(Some(&field2), schema.field_by_id(2));
468        assert_eq!(None, schema.field_by_id(3));
469    }
470
471    pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
472        let schema = Schema::builder()
473            .with_schema_id(1)
474            .with_identifier_field_ids(vec![2])
475            .with_fields(vec![
476                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
477                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
478                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
479            ])
480            .build()
481            .unwrap();
482        let record = r#"{
483            "type":"struct",
484            "schema-id":1,
485            "fields":[
486                {
487                    "id":1,
488                    "name":"foo",
489                    "required":false,
490                    "type":"string"
491                },
492                {
493                    "id":2,
494                    "name":"bar",
495                    "required":true,
496                    "type":"int"
497                },
498                {
499                    "id":3,
500                    "name":"baz",
501                    "required":false,
502                    "type":"boolean"
503                }
504            ],
505            "identifier-field-ids":[2]
506        }"#;
507        (schema, record)
508    }
509
510    pub fn table_schema_nested() -> Schema {
511        Schema::builder()
512            .with_schema_id(1)
513            .with_identifier_field_ids(vec![2])
514            .with_fields(vec![
515                NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
516                NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
517                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
518                NestedField::required(
519                    4,
520                    "qux",
521                    Type::List(ListType {
522                        element_field: NestedField::list_element(
523                            5,
524                            Type::Primitive(PrimitiveType::String),
525                            true,
526                        )
527                        .into(),
528                    }),
529                )
530                .into(),
531                NestedField::required(
532                    6,
533                    "quux",
534                    Type::Map(MapType {
535                        key_field: NestedField::map_key_element(
536                            7,
537                            Type::Primitive(PrimitiveType::String),
538                        )
539                        .into(),
540                        value_field: NestedField::map_value_element(
541                            8,
542                            Type::Map(MapType {
543                                key_field: NestedField::map_key_element(
544                                    9,
545                                    Type::Primitive(PrimitiveType::String),
546                                )
547                                .into(),
548                                value_field: NestedField::map_value_element(
549                                    10,
550                                    Type::Primitive(PrimitiveType::Int),
551                                    true,
552                                )
553                                .into(),
554                            }),
555                            true,
556                        )
557                        .into(),
558                    }),
559                )
560                .into(),
561                NestedField::required(
562                    11,
563                    "location",
564                    Type::List(ListType {
565                        element_field: NestedField::list_element(
566                            12,
567                            Type::Struct(StructType::new(vec![
568                                NestedField::optional(
569                                    13,
570                                    "latitude",
571                                    Type::Primitive(PrimitiveType::Float),
572                                )
573                                .into(),
574                                NestedField::optional(
575                                    14,
576                                    "longitude",
577                                    Type::Primitive(PrimitiveType::Float),
578                                )
579                                .into(),
580                            ])),
581                            true,
582                        )
583                        .into(),
584                    }),
585                )
586                .into(),
587                NestedField::optional(
588                    15,
589                    "person",
590                    Type::Struct(StructType::new(vec![
591                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
592                            .into(),
593                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
594                            .into(),
595                    ])),
596                )
597                .into(),
598            ])
599            .build()
600            .unwrap()
601    }
602
603    #[test]
604    fn test_schema_display() {
605        let expected_str = "
606table {
607  1: foo: optional string\x20
608  2: bar: required int\x20
609  3: baz: optional boolean\x20
610}
611";
612
613        assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
614    }
615
616    #[test]
617    fn test_schema_build_failed_on_duplicate_names() {
618        let ret = Schema::builder()
619            .with_schema_id(1)
620            .with_identifier_field_ids(vec![1])
621            .with_fields(vec![
622                NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
623                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
624                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
625                NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
626            ])
627            .build();
628
629        assert!(
630            ret.unwrap_err()
631                .message()
632                .contains("Invalid schema: multiple fields for name baz")
633        );
634    }
635
636    #[test]
637    fn test_schema_into_builder() {
638        let original_schema = table_schema_nested();
639        let builder = original_schema.clone().into_builder();
640        let schema = builder.build().unwrap();
641
642        assert_eq!(original_schema, schema);
643    }
644
645    #[test]
646    fn test_schema_index_by_name() {
647        let expected_name_to_id = HashMap::from(
648            [
649                ("foo", 1),
650                ("bar", 2),
651                ("baz", 3),
652                ("qux", 4),
653                ("qux.element", 5),
654                ("quux", 6),
655                ("quux.key", 7),
656                ("quux.value", 8),
657                ("quux.value.key", 9),
658                ("quux.value.value", 10),
659                ("location", 11),
660                ("location.element", 12),
661                ("location.element.latitude", 13),
662                ("location.element.longitude", 14),
663                ("location.latitude", 13),
664                ("location.longitude", 14),
665                ("person", 15),
666                ("person.name", 16),
667                ("person.age", 17),
668            ]
669            .map(|e| (e.0.to_string(), e.1)),
670        );
671
672        let schema = table_schema_nested();
673        assert_eq!(&expected_name_to_id, &schema.name_to_id);
674    }
675
676    #[test]
677    fn test_schema_index_by_name_case_insensitive() {
678        let expected_name_to_id = HashMap::from(
679            [
680                ("fOo", 1),
681                ("Bar", 2),
682                ("BAz", 3),
683                ("quX", 4),
684                ("quX.ELEment", 5),
685                ("qUUx", 6),
686                ("QUUX.KEY", 7),
687                ("QUUX.Value", 8),
688                ("qUUX.VALUE.Key", 9),
689                ("qUux.VaLue.Value", 10),
690                ("lOCAtION", 11),
691                ("LOCAtioN.ELeMENt", 12),
692                ("LoCATion.element.LATitude", 13),
693                ("locatION.ElemeNT.LONgitude", 14),
694                ("LOCAtiON.LATITUDE", 13),
695                ("LOCATION.LONGITUDE", 14),
696                ("PERSon", 15),
697                ("PERSON.Name", 16),
698                ("peRSON.AGe", 17),
699            ]
700            .map(|e| (e.0.to_string(), e.1)),
701        );
702
703        let schema = table_schema_nested();
704        for (name, id) in expected_name_to_id {
705            assert_eq!(
706                Some(id),
707                schema.field_by_name_case_insensitive(&name).map(|f| f.id)
708            );
709        }
710    }
711
712    #[test]
713    fn test_schema_find_column_name() {
714        let expected_column_name = HashMap::from([
715            (1, "foo"),
716            (2, "bar"),
717            (3, "baz"),
718            (4, "qux"),
719            (5, "qux.element"),
720            (6, "quux"),
721            (7, "quux.key"),
722            (8, "quux.value"),
723            (9, "quux.value.key"),
724            (10, "quux.value.value"),
725            (11, "location"),
726            (12, "location.element"),
727            (13, "location.element.latitude"),
728            (14, "location.element.longitude"),
729        ]);
730
731        let schema = table_schema_nested();
732        for (id, name) in expected_column_name {
733            assert_eq!(
734                Some(name),
735                schema.name_by_field_id(id),
736                "Column name for field id {id} not match."
737            );
738        }
739    }
740
741    #[test]
742    fn test_schema_find_column_name_not_found() {
743        let schema = table_schema_nested();
744
745        assert!(schema.name_by_field_id(99).is_none());
746    }
747
748    #[test]
749    fn test_schema_find_column_name_by_id_simple() {
750        let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
751
752        let schema = table_schema_simple().0;
753
754        for (id, name) in expected_id_to_name {
755            assert_eq!(
756                Some(name),
757                schema.name_by_field_id(id),
758                "Column name for field id {id} not match."
759            );
760        }
761    }
762
763    #[test]
764    fn test_schema_find_simple() {
765        let schema = table_schema_simple().0;
766
767        assert_eq!(
768            Some(schema.r#struct.fields()[0].clone()),
769            schema.field_by_id(1).cloned()
770        );
771        assert_eq!(
772            Some(schema.r#struct.fields()[1].clone()),
773            schema.field_by_id(2).cloned()
774        );
775        assert_eq!(
776            Some(schema.r#struct.fields()[2].clone()),
777            schema.field_by_id(3).cloned()
778        );
779
780        assert!(schema.field_by_id(4).is_none());
781        assert!(schema.field_by_name("non exist").is_none());
782    }
783
784    #[test]
785    fn test_schema_find_nested() {
786        let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
787            (
788                1,
789                NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
790            ),
791            (
792                2,
793                NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
794            ),
795            (
796                3,
797                NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
798            ),
799            (
800                4,
801                NestedField::required(
802                    4,
803                    "qux",
804                    Type::List(ListType {
805                        element_field: NestedField::list_element(
806                            5,
807                            Type::Primitive(PrimitiveType::String),
808                            true,
809                        )
810                        .into(),
811                    }),
812                ),
813            ),
814            (
815                5,
816                NestedField::required(5, "element", Primitive(PrimitiveType::String)),
817            ),
818            (
819                6,
820                NestedField::required(
821                    6,
822                    "quux",
823                    Map(MapType {
824                        key_field: NestedField::map_key_element(
825                            7,
826                            Primitive(PrimitiveType::String),
827                        )
828                        .into(),
829                        value_field: NestedField::map_value_element(
830                            8,
831                            Map(MapType {
832                                key_field: NestedField::map_key_element(
833                                    9,
834                                    Primitive(PrimitiveType::String),
835                                )
836                                .into(),
837                                value_field: NestedField::map_value_element(
838                                    10,
839                                    Primitive(PrimitiveType::Int),
840                                    true,
841                                )
842                                .into(),
843                            }),
844                            true,
845                        )
846                        .into(),
847                    }),
848                ),
849            ),
850            (
851                7,
852                NestedField::required(7, "key", Primitive(PrimitiveType::String)),
853            ),
854            (
855                8,
856                NestedField::required(
857                    8,
858                    "value",
859                    Map(MapType {
860                        key_field: NestedField::map_key_element(
861                            9,
862                            Primitive(PrimitiveType::String),
863                        )
864                        .into(),
865                        value_field: NestedField::map_value_element(
866                            10,
867                            Primitive(PrimitiveType::Int),
868                            true,
869                        )
870                        .into(),
871                    }),
872                ),
873            ),
874            (
875                9,
876                NestedField::required(9, "key", Primitive(PrimitiveType::String)),
877            ),
878            (
879                10,
880                NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
881            ),
882            (
883                11,
884                NestedField::required(
885                    11,
886                    "location",
887                    List(ListType {
888                        element_field: NestedField::list_element(
889                            12,
890                            Struct(StructType::new(vec![
891                                NestedField::optional(
892                                    13,
893                                    "latitude",
894                                    Primitive(PrimitiveType::Float),
895                                )
896                                .into(),
897                                NestedField::optional(
898                                    14,
899                                    "longitude",
900                                    Primitive(PrimitiveType::Float),
901                                )
902                                .into(),
903                            ])),
904                            true,
905                        )
906                        .into(),
907                    }),
908                ),
909            ),
910            (
911                12,
912                NestedField::list_element(
913                    12,
914                    Struct(StructType::new(vec![
915                        NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
916                            .into(),
917                        NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
918                            .into(),
919                    ])),
920                    true,
921                ),
922            ),
923            (
924                13,
925                NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
926            ),
927            (
928                14,
929                NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
930            ),
931            (
932                15,
933                NestedField::optional(
934                    15,
935                    "person",
936                    Type::Struct(StructType::new(vec![
937                        NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
938                            .into(),
939                        NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
940                            .into(),
941                    ])),
942                ),
943            ),
944            (
945                16,
946                NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
947            ),
948            (
949                17,
950                NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
951            ),
952        ]);
953
954        let schema = table_schema_nested();
955        for (id, field) in expected_id_to_field {
956            assert_eq!(
957                Some(&field),
958                schema.field_by_id(id).map(|f| f.as_ref()),
959                "Field for {id} not match."
960            );
961        }
962    }
963
964    #[test]
965    fn test_build_accessors() {
966        let schema = table_schema_nested();
967
968        let test_struct = crate::spec::Struct::from_iter(vec![
969            Some(Literal::string("foo value")),
970            Some(Literal::int(1002)),
971            Some(Literal::bool(true)),
972            Some(Literal::List(vec![
973                Some(Literal::string("qux item 1")),
974                Some(Literal::string("qux item 2")),
975            ])),
976            Some(Literal::Map(MapValue::from([(
977                Literal::string("quux key 1"),
978                Some(Literal::Map(MapValue::from([(
979                    Literal::string("quux nested key 1"),
980                    Some(Literal::int(1000)),
981                )]))),
982            )]))),
983            Some(Literal::List(vec![Some(Literal::Struct(
984                crate::spec::Struct::from_iter(vec![
985                    Some(Literal::float(52.509_09)),
986                    Some(Literal::float(-1.885_249)),
987                ]),
988            ))])),
989            Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
990                Some(Literal::string("Testy McTest")),
991                Some(Literal::int(33)),
992            ]))),
993        ]);
994
995        assert_eq!(
996            schema
997                .accessor_by_field_id(1)
998                .unwrap()
999                .get(&test_struct)
1000                .unwrap(),
1001            Some(Datum::string("foo value"))
1002        );
1003        assert_eq!(
1004            schema
1005                .accessor_by_field_id(2)
1006                .unwrap()
1007                .get(&test_struct)
1008                .unwrap(),
1009            Some(Datum::int(1002))
1010        );
1011        assert_eq!(
1012            schema
1013                .accessor_by_field_id(3)
1014                .unwrap()
1015                .get(&test_struct)
1016                .unwrap(),
1017            Some(Datum::bool(true))
1018        );
1019        assert_eq!(
1020            schema
1021                .accessor_by_field_id(16)
1022                .unwrap()
1023                .get(&test_struct)
1024                .unwrap(),
1025            Some(Datum::string("Testy McTest"))
1026        );
1027        assert_eq!(
1028            schema
1029                .accessor_by_field_id(17)
1030                .unwrap()
1031                .get(&test_struct)
1032                .unwrap(),
1033            Some(Datum::int(33))
1034        );
1035    }
1036
1037    #[test]
1038    fn test_highest_field_id() {
1039        let schema = table_schema_nested();
1040        assert_eq!(17, schema.highest_field_id());
1041
1042        let schema = table_schema_simple().0;
1043        assert_eq!(3, schema.highest_field_id());
1044    }
1045
1046    #[test]
1047    fn test_highest_field_id_no_fields() {
1048        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1049        assert_eq!(0, schema.highest_field_id());
1050    }
1051
1052    #[test]
1053    fn test_field_ids_must_be_unique() {
1054        let reassigned_schema = Schema::builder()
1055            .with_schema_id(1)
1056            .with_identifier_field_ids(vec![5])
1057            .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1058            .with_fields(vec![
1059                NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1060                NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1061                NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1062            ])
1063            .build()
1064            .unwrap_err();
1065
1066        assert!(reassigned_schema.message().contains("'field.id' 3"));
1067    }
1068
1069    #[test]
1070    fn test_reassign_ids_empty_schema() {
1071        let schema = Schema::builder().with_schema_id(1).build().unwrap();
1072        let reassigned_schema = schema
1073            .clone()
1074            .into_builder()
1075            .with_reassigned_field_ids(0)
1076            .build()
1077            .unwrap();
1078
1079        assert_eq!(schema, reassigned_schema);
1080        assert_eq!(schema.highest_field_id(), 0);
1081    }
1082
1083    #[test]
1084    fn test_identifier_field_ids() {
1085        // field in map
1086        assert!(
1087            Schema::builder()
1088                .with_schema_id(1)
1089                .with_identifier_field_ids(vec![2])
1090                .with_fields(vec![
1091                    NestedField::required(
1092                        1,
1093                        "Map",
1094                        Type::Map(MapType::new(
1095                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1096                                .into(),
1097                            NestedField::map_value_element(
1098                                3,
1099                                Type::Primitive(PrimitiveType::Boolean),
1100                                true,
1101                            )
1102                            .into(),
1103                        )),
1104                    )
1105                    .into()
1106                ])
1107                .build()
1108                .is_err()
1109        );
1110        assert!(
1111            Schema::builder()
1112                .with_schema_id(1)
1113                .with_identifier_field_ids(vec![3])
1114                .with_fields(vec![
1115                    NestedField::required(
1116                        1,
1117                        "Map",
1118                        Type::Map(MapType::new(
1119                            NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1120                                .into(),
1121                            NestedField::map_value_element(
1122                                3,
1123                                Type::Primitive(PrimitiveType::Boolean),
1124                                true,
1125                            )
1126                            .into(),
1127                        )),
1128                    )
1129                    .into()
1130                ])
1131                .build()
1132                .is_err()
1133        );
1134
1135        // field in list
1136        assert!(
1137            Schema::builder()
1138                .with_schema_id(1)
1139                .with_identifier_field_ids(vec![2])
1140                .with_fields(vec![
1141                    NestedField::required(
1142                        1,
1143                        "List",
1144                        Type::List(ListType::new(
1145                            NestedField::list_element(
1146                                2,
1147                                Type::Primitive(PrimitiveType::String),
1148                                true
1149                            )
1150                            .into(),
1151                        )),
1152                    )
1153                    .into()
1154                ])
1155                .build()
1156                .is_err()
1157        );
1158
1159        // field in optional struct
1160        assert!(
1161            Schema::builder()
1162                .with_schema_id(1)
1163                .with_identifier_field_ids(vec![2])
1164                .with_fields(vec![
1165                    NestedField::optional(
1166                        1,
1167                        "Struct",
1168                        Type::Struct(StructType::new(vec![
1169                            NestedField::required(
1170                                2,
1171                                "name",
1172                                Type::Primitive(PrimitiveType::String)
1173                            )
1174                            .into(),
1175                            NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1176                                .into(),
1177                        ])),
1178                    )
1179                    .into()
1180                ])
1181                .build()
1182                .is_err()
1183        );
1184
1185        // float and double
1186        assert!(
1187            Schema::builder()
1188                .with_schema_id(1)
1189                .with_identifier_field_ids(vec![1])
1190                .with_fields(vec![
1191                    NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1192                        .into()
1193                ])
1194                .build()
1195                .is_err()
1196        );
1197        assert!(
1198            Schema::builder()
1199                .with_schema_id(1)
1200                .with_identifier_field_ids(vec![1])
1201                .with_fields(vec![
1202                    NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1203                        .into()
1204                ])
1205                .build()
1206                .is_err()
1207        );
1208
1209        // optional field
1210        assert!(
1211            Schema::builder()
1212                .with_schema_id(1)
1213                .with_identifier_field_ids(vec![1])
1214                .with_fields(vec![
1215                    NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1216                        .into()
1217                ])
1218                .build()
1219                .is_ok()
1220        );
1221        assert!(
1222            Schema::builder()
1223                .with_schema_id(1)
1224                .with_identifier_field_ids(vec![1])
1225                .with_fields(vec![
1226                    NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1227                        .into()
1228                ])
1229                .build()
1230                .is_err()
1231        );
1232    }
1233}