1use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43 LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44 PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48pub type SchemaId = i32;
50pub type SchemaRef = Arc<Schema>;
52pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54
55#[derive(Debug, Serialize, Deserialize, Clone)]
57#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
58pub struct Schema {
59 r#struct: StructType,
60 schema_id: SchemaId,
61 highest_field_id: i32,
62 identifier_field_ids: HashSet<i32>,
63
64 alias_to_id: BiHashMap<String, i32>,
65 id_to_field: HashMap<i32, NestedFieldRef>,
66
67 name_to_id: HashMap<String, i32>,
68 lowercase_name_to_id: HashMap<String, i32>,
69 id_to_name: HashMap<i32, String>,
70
71 field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
72}
73
74impl PartialEq for Schema {
75 fn eq(&self, other: &Self) -> bool {
76 self.r#struct == other.r#struct
77 && self.schema_id == other.schema_id
78 && self.identifier_field_ids == other.identifier_field_ids
79 }
80}
81
82impl Eq for Schema {}
83
84#[derive(Debug)]
86pub struct SchemaBuilder {
87 schema_id: i32,
88 fields: Vec<NestedFieldRef>,
89 alias_to_id: BiHashMap<String, i32>,
90 identifier_field_ids: HashSet<i32>,
91 reassign_field_ids_from: Option<i32>,
92}
93
94impl SchemaBuilder {
95 pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
97 self.fields.extend(fields);
98 self
99 }
100
101 pub(crate) fn with_reassigned_field_ids(mut self, start_from: u32) -> Self {
106 self.reassign_field_ids_from = Some(start_from.try_into().unwrap_or(i32::MAX));
107 self
108 }
109
110 pub fn with_schema_id(mut self, schema_id: i32) -> Self {
112 self.schema_id = schema_id;
113 self
114 }
115
116 pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
118 self.identifier_field_ids.extend(ids);
119 self
120 }
121
122 pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
124 self.alias_to_id = alias_to_id;
125 self
126 }
127
128 pub fn build(self) -> Result<Schema> {
130 let field_id_to_accessor = self.build_accessors();
131
132 let r#struct = StructType::new(self.fields);
133 let id_to_field = index_by_id(&r#struct)?;
134
135 Self::validate_identifier_ids(
136 &r#struct,
137 &id_to_field,
138 self.identifier_field_ids.iter().copied(),
139 )?;
140
141 let (name_to_id, id_to_name) = {
142 let mut index = IndexByName::default();
143 visit_struct(&r#struct, &mut index)?;
144 index.indexes()
145 };
146
147 let lowercase_name_to_id = name_to_id
148 .iter()
149 .map(|(k, v)| (k.to_lowercase(), *v))
150 .collect();
151
152 let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
153
154 let mut schema = Schema {
155 r#struct,
156 schema_id: self.schema_id,
157 highest_field_id,
158 identifier_field_ids: self.identifier_field_ids,
159 alias_to_id: self.alias_to_id,
160 id_to_field,
161
162 name_to_id,
163 lowercase_name_to_id,
164 id_to_name,
165
166 field_id_to_accessor,
167 };
168
169 if let Some(start_from) = self.reassign_field_ids_from {
170 let mut id_reassigner = ReassignFieldIds::new(start_from);
171 let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
172 let new_identifier_field_ids =
173 id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
174 let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
175
176 schema = Schema::builder()
177 .with_schema_id(schema.schema_id)
178 .with_fields(new_fields)
179 .with_identifier_field_ids(new_identifier_field_ids)
180 .with_alias(new_alias_to_id)
181 .build()?;
182 }
183
184 Ok(schema)
185 }
186
187 fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
188 let mut map = HashMap::new();
189
190 for (pos, field) in self.fields.iter().enumerate() {
191 match field.field_type.as_ref() {
192 Type::Primitive(prim_type) => {
193 let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
195 map.insert(field.id, accessor.clone());
196 }
197
198 Type::Struct(nested) => {
199 for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
201 let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
202 map.insert(field_id, new_accessor.clone());
203 }
204 }
205 _ => {
206 }
208 }
209 }
210
211 map
212 }
213
214 fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
215 let mut results = vec![];
216 for (pos, field) in fields.iter().enumerate() {
217 match field.field_type.as_ref() {
218 Type::Primitive(prim_type) => {
219 let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
220 results.push((field.id, accessor));
221 }
222 Type::Struct(nested) => {
223 let nested_accessors = Self::build_accessors_nested(nested.fields());
224
225 let wrapped_nested_accessors =
226 nested_accessors.into_iter().map(|(id, accessor)| {
227 let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
228 (id, new_accessor.clone())
229 });
230
231 results.extend(wrapped_nested_accessors);
232 }
233 _ => {
234 }
236 }
237 }
238
239 results
240 }
241
242 fn validate_identifier_ids(
248 r#struct: &StructType,
249 id_to_field: &HashMap<i32, NestedFieldRef>,
250 identifier_field_ids: impl Iterator<Item = i32>,
251 ) -> Result<()> {
252 let id_to_parent = index_parents(r#struct)?;
253 for identifier_field_id in identifier_field_ids {
254 let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
255 Error::new(
256 ErrorKind::DataInvalid,
257 format!(
258 "Cannot add identifier field {identifier_field_id}: field does not exist"
259 ),
260 )
261 })?;
262 ensure_data_valid!(
263 field.required,
264 "Cannot add identifier field: {} is an optional field",
265 field.name
266 );
267 if let Type::Primitive(p) = field.field_type.as_ref() {
268 ensure_data_valid!(
269 !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
270 "Cannot add identifier field {}: cannot be a float or double type",
271 field.name
272 );
273 } else {
274 return Err(Error::new(
275 ErrorKind::DataInvalid,
276 format!(
277 "Cannot add field {} as an identifier field: not a primitive type field",
278 field.name
279 ),
280 ));
281 }
282
283 let mut cur_field_id = identifier_field_id;
284 while let Some(parent) = id_to_parent.get(&cur_field_id) {
285 let parent_field = id_to_field
286 .get(parent)
287 .expect("Field id should not disappear.");
288 ensure_data_valid!(
289 parent_field.field_type.is_struct(),
290 "Cannot add field {} as an identifier field: must not be nested in {:?}",
291 field.name,
292 parent_field
293 );
294 ensure_data_valid!(
295 parent_field.required,
296 "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
297 field.name,
298 parent_field
299 );
300 cur_field_id = *parent;
301 }
302 }
303
304 Ok(())
305 }
306}
307
308impl Schema {
309 pub fn builder() -> SchemaBuilder {
311 SchemaBuilder {
312 schema_id: DEFAULT_SCHEMA_ID,
313 fields: vec![],
314 identifier_field_ids: HashSet::default(),
315 alias_to_id: BiHashMap::default(),
316 reassign_field_ids_from: None,
317 }
318 }
319
320 pub fn into_builder(self) -> SchemaBuilder {
322 SchemaBuilder {
323 schema_id: self.schema_id,
324 fields: self.r#struct.fields().to_vec(),
325 alias_to_id: self.alias_to_id,
326 identifier_field_ids: self.identifier_field_ids,
327 reassign_field_ids_from: None,
328 }
329 }
330
331 pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
333 self.id_to_field.get(&field_id)
334 }
335
336 pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
340 self.name_to_id
341 .get(field_name)
342 .and_then(|id| self.field_by_id(*id))
343 }
344
345 pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
349 self.lowercase_name_to_id
350 .get(&field_name.to_lowercase())
351 .and_then(|id| self.field_by_id(*id))
352 }
353
354 pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
356 self.alias_to_id
357 .get_by_left(alias)
358 .and_then(|id| self.field_by_id(*id))
359 }
360
361 #[inline]
363 pub fn highest_field_id(&self) -> i32 {
364 self.highest_field_id
365 }
366
367 #[inline]
369 pub fn schema_id(&self) -> SchemaId {
370 self.schema_id
371 }
372
373 #[inline]
375 pub fn as_struct(&self) -> &StructType {
376 &self.r#struct
377 }
378
379 #[inline]
381 pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
382 self.identifier_field_ids.iter().copied()
383 }
384
385 pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
387 self.name_to_id.get(name).copied()
388 }
389
390 pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
392 self.id_to_name.get(&field_id).map(String::as_str)
393 }
394
395 pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
397 self.field_id_to_accessor.get(&field_id).cloned()
398 }
399
400 pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
402 self.as_struct().eq(other.as_struct())
403 && self.identifier_field_ids().eq(other.identifier_field_ids())
404 }
405
406 pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
410 Self { schema_id, ..self }
411 }
412
413 pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
415 &self.id_to_name
416 }
417
418 pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
420 &self.id_to_field
421 }
422}
423
424impl Display for Schema {
425 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
426 writeln!(f, "table {{")?;
427 for field in self.as_struct().fields() {
428 writeln!(f, " {field}")?;
429 }
430 writeln!(f, "}}")
431 }
432}
433
434#[cfg(test)]
435mod tests {
436 use std::collections::HashMap;
437
438 use bimap::BiHashMap;
439
440 use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
441 use crate::spec::datatypes::{
442 ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
443 };
444 use crate::spec::schema::Schema;
445 use crate::spec::values::Map as MapValue;
446 use crate::spec::{Datum, Literal};
447
448 #[test]
449 fn test_construct_schema() {
450 let field1: NestedFieldRef =
451 NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
452 let field2: NestedFieldRef =
453 NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
454
455 let schema = Schema::builder()
456 .with_fields(vec![field1.clone()])
457 .with_fields(vec![field2.clone()])
458 .with_schema_id(3)
459 .build()
460 .unwrap();
461
462 assert_eq!(3, schema.schema_id());
463 assert_eq!(2, schema.highest_field_id());
464 assert_eq!(Some(&field1), schema.field_by_id(1));
465 assert_eq!(Some(&field2), schema.field_by_id(2));
466 assert_eq!(None, schema.field_by_id(3));
467 }
468
469 pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
470 let schema = Schema::builder()
471 .with_schema_id(1)
472 .with_identifier_field_ids(vec![2])
473 .with_fields(vec![
474 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
475 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
476 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
477 ])
478 .build()
479 .unwrap();
480 let record = r#"{
481 "type":"struct",
482 "schema-id":1,
483 "fields":[
484 {
485 "id":1,
486 "name":"foo",
487 "required":false,
488 "type":"string"
489 },
490 {
491 "id":2,
492 "name":"bar",
493 "required":true,
494 "type":"int"
495 },
496 {
497 "id":3,
498 "name":"baz",
499 "required":false,
500 "type":"boolean"
501 }
502 ],
503 "identifier-field-ids":[2]
504 }"#;
505 (schema, record)
506 }
507
508 pub fn table_schema_nested() -> Schema {
509 Schema::builder()
510 .with_schema_id(1)
511 .with_identifier_field_ids(vec![2])
512 .with_fields(vec![
513 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
514 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
515 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
516 NestedField::required(
517 4,
518 "qux",
519 Type::List(ListType {
520 element_field: NestedField::list_element(
521 5,
522 Type::Primitive(PrimitiveType::String),
523 true,
524 )
525 .into(),
526 }),
527 )
528 .into(),
529 NestedField::required(
530 6,
531 "quux",
532 Type::Map(MapType {
533 key_field: NestedField::map_key_element(
534 7,
535 Type::Primitive(PrimitiveType::String),
536 )
537 .into(),
538 value_field: NestedField::map_value_element(
539 8,
540 Type::Map(MapType {
541 key_field: NestedField::map_key_element(
542 9,
543 Type::Primitive(PrimitiveType::String),
544 )
545 .into(),
546 value_field: NestedField::map_value_element(
547 10,
548 Type::Primitive(PrimitiveType::Int),
549 true,
550 )
551 .into(),
552 }),
553 true,
554 )
555 .into(),
556 }),
557 )
558 .into(),
559 NestedField::required(
560 11,
561 "location",
562 Type::List(ListType {
563 element_field: NestedField::list_element(
564 12,
565 Type::Struct(StructType::new(vec![
566 NestedField::optional(
567 13,
568 "latitude",
569 Type::Primitive(PrimitiveType::Float),
570 )
571 .into(),
572 NestedField::optional(
573 14,
574 "longitude",
575 Type::Primitive(PrimitiveType::Float),
576 )
577 .into(),
578 ])),
579 true,
580 )
581 .into(),
582 }),
583 )
584 .into(),
585 NestedField::optional(
586 15,
587 "person",
588 Type::Struct(StructType::new(vec![
589 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
590 .into(),
591 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
592 .into(),
593 ])),
594 )
595 .into(),
596 ])
597 .build()
598 .unwrap()
599 }
600
601 #[test]
602 fn test_schema_display() {
603 let expected_str = "
604table {
605 1: foo: optional string\x20
606 2: bar: required int\x20
607 3: baz: optional boolean\x20
608}
609";
610
611 assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
612 }
613
614 #[test]
615 fn test_schema_build_failed_on_duplicate_names() {
616 let ret = Schema::builder()
617 .with_schema_id(1)
618 .with_identifier_field_ids(vec![1])
619 .with_fields(vec![
620 NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
621 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
622 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
623 NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
624 ])
625 .build();
626
627 assert!(
628 ret.unwrap_err()
629 .message()
630 .contains("Invalid schema: multiple fields for name baz")
631 );
632 }
633
634 #[test]
635 fn test_schema_into_builder() {
636 let original_schema = table_schema_nested();
637 let builder = original_schema.clone().into_builder();
638 let schema = builder.build().unwrap();
639
640 assert_eq!(original_schema, schema);
641 }
642
643 #[test]
644 fn test_schema_index_by_name() {
645 let expected_name_to_id = HashMap::from(
646 [
647 ("foo", 1),
648 ("bar", 2),
649 ("baz", 3),
650 ("qux", 4),
651 ("qux.element", 5),
652 ("quux", 6),
653 ("quux.key", 7),
654 ("quux.value", 8),
655 ("quux.value.key", 9),
656 ("quux.value.value", 10),
657 ("location", 11),
658 ("location.element", 12),
659 ("location.element.latitude", 13),
660 ("location.element.longitude", 14),
661 ("location.latitude", 13),
662 ("location.longitude", 14),
663 ("person", 15),
664 ("person.name", 16),
665 ("person.age", 17),
666 ]
667 .map(|e| (e.0.to_string(), e.1)),
668 );
669
670 let schema = table_schema_nested();
671 assert_eq!(&expected_name_to_id, &schema.name_to_id);
672 }
673
674 #[test]
675 fn test_schema_index_by_name_case_insensitive() {
676 let expected_name_to_id = HashMap::from(
677 [
678 ("fOo", 1),
679 ("Bar", 2),
680 ("BAz", 3),
681 ("quX", 4),
682 ("quX.ELEment", 5),
683 ("qUUx", 6),
684 ("QUUX.KEY", 7),
685 ("QUUX.Value", 8),
686 ("qUUX.VALUE.Key", 9),
687 ("qUux.VaLue.Value", 10),
688 ("lOCAtION", 11),
689 ("LOCAtioN.ELeMENt", 12),
690 ("LoCATion.element.LATitude", 13),
691 ("locatION.ElemeNT.LONgitude", 14),
692 ("LOCAtiON.LATITUDE", 13),
693 ("LOCATION.LONGITUDE", 14),
694 ("PERSon", 15),
695 ("PERSON.Name", 16),
696 ("peRSON.AGe", 17),
697 ]
698 .map(|e| (e.0.to_string(), e.1)),
699 );
700
701 let schema = table_schema_nested();
702 for (name, id) in expected_name_to_id {
703 assert_eq!(
704 Some(id),
705 schema.field_by_name_case_insensitive(&name).map(|f| f.id)
706 );
707 }
708 }
709
710 #[test]
711 fn test_schema_find_column_name() {
712 let expected_column_name = HashMap::from([
713 (1, "foo"),
714 (2, "bar"),
715 (3, "baz"),
716 (4, "qux"),
717 (5, "qux.element"),
718 (6, "quux"),
719 (7, "quux.key"),
720 (8, "quux.value"),
721 (9, "quux.value.key"),
722 (10, "quux.value.value"),
723 (11, "location"),
724 (12, "location.element"),
725 (13, "location.element.latitude"),
726 (14, "location.element.longitude"),
727 ]);
728
729 let schema = table_schema_nested();
730 for (id, name) in expected_column_name {
731 assert_eq!(
732 Some(name),
733 schema.name_by_field_id(id),
734 "Column name for field id {id} not match."
735 );
736 }
737 }
738
739 #[test]
740 fn test_schema_find_column_name_not_found() {
741 let schema = table_schema_nested();
742
743 assert!(schema.name_by_field_id(99).is_none());
744 }
745
746 #[test]
747 fn test_schema_find_column_name_by_id_simple() {
748 let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
749
750 let schema = table_schema_simple().0;
751
752 for (id, name) in expected_id_to_name {
753 assert_eq!(
754 Some(name),
755 schema.name_by_field_id(id),
756 "Column name for field id {id} not match."
757 );
758 }
759 }
760
761 #[test]
762 fn test_schema_find_simple() {
763 let schema = table_schema_simple().0;
764
765 assert_eq!(
766 Some(schema.r#struct.fields()[0].clone()),
767 schema.field_by_id(1).cloned()
768 );
769 assert_eq!(
770 Some(schema.r#struct.fields()[1].clone()),
771 schema.field_by_id(2).cloned()
772 );
773 assert_eq!(
774 Some(schema.r#struct.fields()[2].clone()),
775 schema.field_by_id(3).cloned()
776 );
777
778 assert!(schema.field_by_id(4).is_none());
779 assert!(schema.field_by_name("non exist").is_none());
780 }
781
782 #[test]
783 fn test_schema_find_nested() {
784 let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
785 (
786 1,
787 NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
788 ),
789 (
790 2,
791 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
792 ),
793 (
794 3,
795 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
796 ),
797 (
798 4,
799 NestedField::required(
800 4,
801 "qux",
802 Type::List(ListType {
803 element_field: NestedField::list_element(
804 5,
805 Type::Primitive(PrimitiveType::String),
806 true,
807 )
808 .into(),
809 }),
810 ),
811 ),
812 (
813 5,
814 NestedField::required(5, "element", Primitive(PrimitiveType::String)),
815 ),
816 (
817 6,
818 NestedField::required(
819 6,
820 "quux",
821 Map(MapType {
822 key_field: NestedField::map_key_element(
823 7,
824 Primitive(PrimitiveType::String),
825 )
826 .into(),
827 value_field: NestedField::map_value_element(
828 8,
829 Map(MapType {
830 key_field: NestedField::map_key_element(
831 9,
832 Primitive(PrimitiveType::String),
833 )
834 .into(),
835 value_field: NestedField::map_value_element(
836 10,
837 Primitive(PrimitiveType::Int),
838 true,
839 )
840 .into(),
841 }),
842 true,
843 )
844 .into(),
845 }),
846 ),
847 ),
848 (
849 7,
850 NestedField::required(7, "key", Primitive(PrimitiveType::String)),
851 ),
852 (
853 8,
854 NestedField::required(
855 8,
856 "value",
857 Map(MapType {
858 key_field: NestedField::map_key_element(
859 9,
860 Primitive(PrimitiveType::String),
861 )
862 .into(),
863 value_field: NestedField::map_value_element(
864 10,
865 Primitive(PrimitiveType::Int),
866 true,
867 )
868 .into(),
869 }),
870 ),
871 ),
872 (
873 9,
874 NestedField::required(9, "key", Primitive(PrimitiveType::String)),
875 ),
876 (
877 10,
878 NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
879 ),
880 (
881 11,
882 NestedField::required(
883 11,
884 "location",
885 List(ListType {
886 element_field: NestedField::list_element(
887 12,
888 Struct(StructType::new(vec![
889 NestedField::optional(
890 13,
891 "latitude",
892 Primitive(PrimitiveType::Float),
893 )
894 .into(),
895 NestedField::optional(
896 14,
897 "longitude",
898 Primitive(PrimitiveType::Float),
899 )
900 .into(),
901 ])),
902 true,
903 )
904 .into(),
905 }),
906 ),
907 ),
908 (
909 12,
910 NestedField::list_element(
911 12,
912 Struct(StructType::new(vec![
913 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
914 .into(),
915 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
916 .into(),
917 ])),
918 true,
919 ),
920 ),
921 (
922 13,
923 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
924 ),
925 (
926 14,
927 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
928 ),
929 (
930 15,
931 NestedField::optional(
932 15,
933 "person",
934 Type::Struct(StructType::new(vec![
935 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
936 .into(),
937 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
938 .into(),
939 ])),
940 ),
941 ),
942 (
943 16,
944 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
945 ),
946 (
947 17,
948 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
949 ),
950 ]);
951
952 let schema = table_schema_nested();
953 for (id, field) in expected_id_to_field {
954 assert_eq!(
955 Some(&field),
956 schema.field_by_id(id).map(|f| f.as_ref()),
957 "Field for {id} not match."
958 );
959 }
960 }
961
962 #[test]
963 fn test_build_accessors() {
964 let schema = table_schema_nested();
965
966 let test_struct = crate::spec::Struct::from_iter(vec![
967 Some(Literal::string("foo value")),
968 Some(Literal::int(1002)),
969 Some(Literal::bool(true)),
970 Some(Literal::List(vec![
971 Some(Literal::string("qux item 1")),
972 Some(Literal::string("qux item 2")),
973 ])),
974 Some(Literal::Map(MapValue::from([(
975 Literal::string("quux key 1"),
976 Some(Literal::Map(MapValue::from([(
977 Literal::string("quux nested key 1"),
978 Some(Literal::int(1000)),
979 )]))),
980 )]))),
981 Some(Literal::List(vec![Some(Literal::Struct(
982 crate::spec::Struct::from_iter(vec![
983 Some(Literal::float(52.509_09)),
984 Some(Literal::float(-1.885_249)),
985 ]),
986 ))])),
987 Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
988 Some(Literal::string("Testy McTest")),
989 Some(Literal::int(33)),
990 ]))),
991 ]);
992
993 assert_eq!(
994 schema
995 .accessor_by_field_id(1)
996 .unwrap()
997 .get(&test_struct)
998 .unwrap(),
999 Some(Datum::string("foo value"))
1000 );
1001 assert_eq!(
1002 schema
1003 .accessor_by_field_id(2)
1004 .unwrap()
1005 .get(&test_struct)
1006 .unwrap(),
1007 Some(Datum::int(1002))
1008 );
1009 assert_eq!(
1010 schema
1011 .accessor_by_field_id(3)
1012 .unwrap()
1013 .get(&test_struct)
1014 .unwrap(),
1015 Some(Datum::bool(true))
1016 );
1017 assert_eq!(
1018 schema
1019 .accessor_by_field_id(16)
1020 .unwrap()
1021 .get(&test_struct)
1022 .unwrap(),
1023 Some(Datum::string("Testy McTest"))
1024 );
1025 assert_eq!(
1026 schema
1027 .accessor_by_field_id(17)
1028 .unwrap()
1029 .get(&test_struct)
1030 .unwrap(),
1031 Some(Datum::int(33))
1032 );
1033 }
1034
1035 #[test]
1036 fn test_highest_field_id() {
1037 let schema = table_schema_nested();
1038 assert_eq!(17, schema.highest_field_id());
1039
1040 let schema = table_schema_simple().0;
1041 assert_eq!(3, schema.highest_field_id());
1042 }
1043
1044 #[test]
1045 fn test_highest_field_id_no_fields() {
1046 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1047 assert_eq!(0, schema.highest_field_id());
1048 }
1049
1050 #[test]
1051 fn test_field_ids_must_be_unique() {
1052 let reassigned_schema = Schema::builder()
1053 .with_schema_id(1)
1054 .with_identifier_field_ids(vec![5])
1055 .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1056 .with_fields(vec![
1057 NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1058 NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1059 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1060 ])
1061 .build()
1062 .unwrap_err();
1063
1064 assert!(reassigned_schema.message().contains("'field.id' 3"));
1065 }
1066
1067 #[test]
1068 fn test_reassign_ids_empty_schema() {
1069 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1070 let reassigned_schema = schema
1071 .clone()
1072 .into_builder()
1073 .with_reassigned_field_ids(0)
1074 .build()
1075 .unwrap();
1076
1077 assert_eq!(schema, reassigned_schema);
1078 assert_eq!(schema.highest_field_id(), 0);
1079 }
1080
1081 #[test]
1082 fn test_identifier_field_ids() {
1083 assert!(
1085 Schema::builder()
1086 .with_schema_id(1)
1087 .with_identifier_field_ids(vec![2])
1088 .with_fields(vec![
1089 NestedField::required(
1090 1,
1091 "Map",
1092 Type::Map(MapType::new(
1093 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1094 .into(),
1095 NestedField::map_value_element(
1096 3,
1097 Type::Primitive(PrimitiveType::Boolean),
1098 true,
1099 )
1100 .into(),
1101 )),
1102 )
1103 .into()
1104 ])
1105 .build()
1106 .is_err()
1107 );
1108 assert!(
1109 Schema::builder()
1110 .with_schema_id(1)
1111 .with_identifier_field_ids(vec![3])
1112 .with_fields(vec![
1113 NestedField::required(
1114 1,
1115 "Map",
1116 Type::Map(MapType::new(
1117 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1118 .into(),
1119 NestedField::map_value_element(
1120 3,
1121 Type::Primitive(PrimitiveType::Boolean),
1122 true,
1123 )
1124 .into(),
1125 )),
1126 )
1127 .into()
1128 ])
1129 .build()
1130 .is_err()
1131 );
1132
1133 assert!(
1135 Schema::builder()
1136 .with_schema_id(1)
1137 .with_identifier_field_ids(vec![2])
1138 .with_fields(vec![
1139 NestedField::required(
1140 1,
1141 "List",
1142 Type::List(ListType::new(
1143 NestedField::list_element(
1144 2,
1145 Type::Primitive(PrimitiveType::String),
1146 true
1147 )
1148 .into(),
1149 )),
1150 )
1151 .into()
1152 ])
1153 .build()
1154 .is_err()
1155 );
1156
1157 assert!(
1159 Schema::builder()
1160 .with_schema_id(1)
1161 .with_identifier_field_ids(vec![2])
1162 .with_fields(vec![
1163 NestedField::optional(
1164 1,
1165 "Struct",
1166 Type::Struct(StructType::new(vec![
1167 NestedField::required(
1168 2,
1169 "name",
1170 Type::Primitive(PrimitiveType::String)
1171 )
1172 .into(),
1173 NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1174 .into(),
1175 ])),
1176 )
1177 .into()
1178 ])
1179 .build()
1180 .is_err()
1181 );
1182
1183 assert!(
1185 Schema::builder()
1186 .with_schema_id(1)
1187 .with_identifier_field_ids(vec![1])
1188 .with_fields(vec![
1189 NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1190 .into()
1191 ])
1192 .build()
1193 .is_err()
1194 );
1195 assert!(
1196 Schema::builder()
1197 .with_schema_id(1)
1198 .with_identifier_field_ids(vec![1])
1199 .with_fields(vec![
1200 NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1201 .into()
1202 ])
1203 .build()
1204 .is_err()
1205 );
1206
1207 assert!(
1209 Schema::builder()
1210 .with_schema_id(1)
1211 .with_identifier_field_ids(vec![1])
1212 .with_fields(vec![
1213 NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1214 .into()
1215 ])
1216 .build()
1217 .is_ok()
1218 );
1219 assert!(
1220 Schema::builder()
1221 .with_schema_id(1)
1222 .with_identifier_field_ids(vec![1])
1223 .with_fields(vec![
1224 NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1225 .into()
1226 ])
1227 .build()
1228 .is_err()
1229 );
1230 }
1231}