1use std::collections::{HashMap, HashSet};
21use std::fmt::{Display, Formatter};
22use std::sync::Arc;
23
24mod utils;
25mod visitor;
26pub use self::visitor::*;
27pub(super) mod _serde;
28mod id_reassigner;
29mod index;
30mod prune_columns;
31use bimap::BiHashMap;
32use itertools::{Itertools, zip_eq};
33use serde::{Deserialize, Serialize};
34
35use self::_serde::SchemaEnum;
36use self::id_reassigner::ReassignFieldIds;
37use self::index::{IndexByName, index_by_id, index_parents};
38pub use self::prune_columns::prune_columns;
39use super::NestedField;
40use crate::error::Result;
41use crate::expr::accessor::StructAccessor;
42use crate::spec::datatypes::{
43 LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef,
44 PrimitiveType, StructType, Type,
45};
46use crate::{Error, ErrorKind, ensure_data_valid};
47
48pub type SchemaId = i32;
50pub type SchemaRef = Arc<Schema>;
52pub const DEFAULT_SCHEMA_ID: SchemaId = 0;
54pub const SCHEMA_NAME_DELIMITER: &str = ".";
56
57#[derive(Debug, Serialize, Deserialize, Clone)]
59#[serde(try_from = "SchemaEnum", into = "SchemaEnum")]
60pub struct Schema {
61 r#struct: StructType,
62 schema_id: SchemaId,
63 highest_field_id: i32,
64 identifier_field_ids: HashSet<i32>,
65
66 alias_to_id: BiHashMap<String, i32>,
67 id_to_field: HashMap<i32, NestedFieldRef>,
68
69 name_to_id: HashMap<String, i32>,
70 lowercase_name_to_id: HashMap<String, i32>,
71 id_to_name: HashMap<i32, String>,
72
73 field_id_to_accessor: HashMap<i32, Arc<StructAccessor>>,
74}
75
76impl PartialEq for Schema {
77 fn eq(&self, other: &Self) -> bool {
78 self.r#struct == other.r#struct
79 && self.schema_id == other.schema_id
80 && self.identifier_field_ids == other.identifier_field_ids
81 }
82}
83
84impl Eq for Schema {}
85
86#[derive(Debug)]
88pub struct SchemaBuilder {
89 schema_id: i32,
90 fields: Vec<NestedFieldRef>,
91 alias_to_id: BiHashMap<String, i32>,
92 identifier_field_ids: HashSet<i32>,
93 reassign_field_ids_from: Option<i32>,
94}
95
96impl SchemaBuilder {
97 pub fn with_fields(mut self, fields: impl IntoIterator<Item = NestedFieldRef>) -> Self {
99 self.fields.extend(fields);
100 self
101 }
102
103 pub(crate) fn with_reassigned_field_ids(mut self, start_from: i32) -> Self {
108 self.reassign_field_ids_from = Some(start_from);
109 self
110 }
111
112 pub fn with_schema_id(mut self, schema_id: i32) -> Self {
114 self.schema_id = schema_id;
115 self
116 }
117
118 pub fn with_identifier_field_ids(mut self, ids: impl IntoIterator<Item = i32>) -> Self {
120 self.identifier_field_ids.extend(ids);
121 self
122 }
123
124 pub fn with_alias(mut self, alias_to_id: BiHashMap<String, i32>) -> Self {
126 self.alias_to_id = alias_to_id;
127 self
128 }
129
130 pub fn build(self) -> Result<Schema> {
132 let field_id_to_accessor = self.build_accessors();
133
134 let r#struct = StructType::new(self.fields);
135 let id_to_field = index_by_id(&r#struct)?;
136
137 Self::validate_identifier_ids(
138 &r#struct,
139 &id_to_field,
140 self.identifier_field_ids.iter().copied(),
141 )?;
142
143 let (name_to_id, id_to_name) = {
144 let mut index = IndexByName::default();
145 visit_struct(&r#struct, &mut index)?;
146 index.indexes()
147 };
148
149 let lowercase_name_to_id = name_to_id
150 .iter()
151 .map(|(k, v)| (k.to_lowercase(), *v))
152 .collect();
153
154 let highest_field_id = id_to_field.keys().max().cloned().unwrap_or(0);
155
156 let mut schema = Schema {
157 r#struct,
158 schema_id: self.schema_id,
159 highest_field_id,
160 identifier_field_ids: self.identifier_field_ids,
161 alias_to_id: self.alias_to_id,
162 id_to_field,
163
164 name_to_id,
165 lowercase_name_to_id,
166 id_to_name,
167
168 field_id_to_accessor,
169 };
170
171 if let Some(start_from) = self.reassign_field_ids_from {
172 let mut id_reassigner = ReassignFieldIds::new(start_from);
173 let new_fields = id_reassigner.reassign_field_ids(schema.r#struct.fields().to_vec())?;
174 let new_identifier_field_ids =
175 id_reassigner.apply_to_identifier_fields(schema.identifier_field_ids)?;
176 let new_alias_to_id = id_reassigner.apply_to_aliases(schema.alias_to_id.clone())?;
177
178 schema = Schema::builder()
179 .with_schema_id(schema.schema_id)
180 .with_fields(new_fields)
181 .with_identifier_field_ids(new_identifier_field_ids)
182 .with_alias(new_alias_to_id)
183 .build()?;
184 }
185
186 Ok(schema)
187 }
188
189 fn build_accessors(&self) -> HashMap<i32, Arc<StructAccessor>> {
190 let mut map = HashMap::new();
191
192 for (pos, field) in self.fields.iter().enumerate() {
193 match field.field_type.as_ref() {
194 Type::Primitive(prim_type) => {
195 let accessor = Arc::new(StructAccessor::new(pos, prim_type.clone()));
197 map.insert(field.id, accessor.clone());
198 }
199
200 Type::Struct(nested) => {
201 for (field_id, accessor) in Self::build_accessors_nested(nested.fields()) {
203 let new_accessor = Arc::new(StructAccessor::wrap(pos, accessor));
204 map.insert(field_id, new_accessor.clone());
205 }
206 }
207 _ => {
208 }
210 }
211 }
212
213 map
214 }
215
216 fn build_accessors_nested(fields: &[NestedFieldRef]) -> Vec<(i32, Box<StructAccessor>)> {
217 let mut results = vec![];
218 for (pos, field) in fields.iter().enumerate() {
219 match field.field_type.as_ref() {
220 Type::Primitive(prim_type) => {
221 let accessor = Box::new(StructAccessor::new(pos, prim_type.clone()));
222 results.push((field.id, accessor));
223 }
224 Type::Struct(nested) => {
225 let nested_accessors = Self::build_accessors_nested(nested.fields());
226
227 let wrapped_nested_accessors =
228 nested_accessors.into_iter().map(|(id, accessor)| {
229 let new_accessor = Box::new(StructAccessor::wrap(pos, accessor));
230 (id, new_accessor.clone())
231 });
232
233 results.extend(wrapped_nested_accessors);
234 }
235 _ => {
236 }
238 }
239 }
240
241 results
242 }
243
244 fn validate_identifier_ids(
250 r#struct: &StructType,
251 id_to_field: &HashMap<i32, NestedFieldRef>,
252 identifier_field_ids: impl Iterator<Item = i32>,
253 ) -> Result<()> {
254 let id_to_parent = index_parents(r#struct)?;
255 for identifier_field_id in identifier_field_ids {
256 let field = id_to_field.get(&identifier_field_id).ok_or_else(|| {
257 Error::new(
258 ErrorKind::DataInvalid,
259 format!(
260 "Cannot add identifier field {identifier_field_id}: field does not exist"
261 ),
262 )
263 })?;
264 ensure_data_valid!(
265 field.required,
266 "Cannot add identifier field: {} is an optional field",
267 field.name
268 );
269 if let Type::Primitive(p) = field.field_type.as_ref() {
270 ensure_data_valid!(
271 !matches!(p, PrimitiveType::Double | PrimitiveType::Float),
272 "Cannot add identifier field {}: cannot be a float or double type",
273 field.name
274 );
275 } else {
276 return Err(Error::new(
277 ErrorKind::DataInvalid,
278 format!(
279 "Cannot add field {} as an identifier field: not a primitive type field",
280 field.name
281 ),
282 ));
283 }
284
285 let mut cur_field_id = identifier_field_id;
286 while let Some(parent) = id_to_parent.get(&cur_field_id) {
287 let parent_field = id_to_field
288 .get(parent)
289 .expect("Field id should not disappear.");
290 ensure_data_valid!(
291 parent_field.field_type.is_struct(),
292 "Cannot add field {} as an identifier field: must not be nested in {:?}",
293 field.name,
294 parent_field
295 );
296 ensure_data_valid!(
297 parent_field.required,
298 "Cannot add field {} as an identifier field: must not be nested in an optional field {}",
299 field.name,
300 parent_field
301 );
302 cur_field_id = *parent;
303 }
304 }
305
306 Ok(())
307 }
308}
309
310impl Schema {
311 pub fn builder() -> SchemaBuilder {
313 SchemaBuilder {
314 schema_id: DEFAULT_SCHEMA_ID,
315 fields: vec![],
316 identifier_field_ids: HashSet::default(),
317 alias_to_id: BiHashMap::default(),
318 reassign_field_ids_from: None,
319 }
320 }
321
322 pub fn into_builder(self) -> SchemaBuilder {
324 SchemaBuilder {
325 schema_id: self.schema_id,
326 fields: self.r#struct.fields().to_vec(),
327 alias_to_id: self.alias_to_id,
328 identifier_field_ids: self.identifier_field_ids,
329 reassign_field_ids_from: None,
330 }
331 }
332
333 pub fn field_by_id(&self, field_id: i32) -> Option<&NestedFieldRef> {
335 self.id_to_field.get(&field_id)
336 }
337
338 pub fn field_by_name(&self, field_name: &str) -> Option<&NestedFieldRef> {
342 self.name_to_id
343 .get(field_name)
344 .and_then(|id| self.field_by_id(*id))
345 }
346
347 pub fn field_by_name_case_insensitive(&self, field_name: &str) -> Option<&NestedFieldRef> {
351 self.lowercase_name_to_id
352 .get(&field_name.to_lowercase())
353 .and_then(|id| self.field_by_id(*id))
354 }
355
356 pub fn field_by_alias(&self, alias: &str) -> Option<&NestedFieldRef> {
358 self.alias_to_id
359 .get_by_left(alias)
360 .and_then(|id| self.field_by_id(*id))
361 }
362
363 #[inline]
365 pub fn highest_field_id(&self) -> i32 {
366 self.highest_field_id
367 }
368
369 #[inline]
371 pub fn schema_id(&self) -> SchemaId {
372 self.schema_id
373 }
374
375 #[inline]
377 pub fn as_struct(&self) -> &StructType {
378 &self.r#struct
379 }
380
381 #[inline]
383 pub fn identifier_field_ids(&self) -> impl ExactSizeIterator<Item = i32> + '_ {
384 self.identifier_field_ids.iter().copied()
385 }
386
387 pub fn field_id_by_name(&self, name: &str) -> Option<i32> {
389 self.name_to_id.get(name).copied()
390 }
391
392 pub fn name_by_field_id(&self, field_id: i32) -> Option<&str> {
394 self.id_to_name.get(&field_id).map(String::as_str)
395 }
396
397 pub fn accessor_by_field_id(&self, field_id: i32) -> Option<Arc<StructAccessor>> {
399 self.field_id_to_accessor.get(&field_id).cloned()
400 }
401
402 pub(crate) fn is_same_schema(&self, other: &SchemaRef) -> bool {
404 self.as_struct().eq(other.as_struct())
405 && self.identifier_field_ids().eq(other.identifier_field_ids())
406 }
407
408 pub(crate) fn with_schema_id(self, schema_id: SchemaId) -> Self {
412 Self { schema_id, ..self }
413 }
414
415 pub fn field_id_to_name_map(&self) -> &HashMap<i32, String> {
417 &self.id_to_name
418 }
419
420 pub fn field_id_to_fields(&self) -> &HashMap<i32, NestedFieldRef> {
422 &self.id_to_field
423 }
424}
425
426impl Display for Schema {
427 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
428 writeln!(f, "table {{")?;
429 for field in self.as_struct().fields() {
430 writeln!(f, " {field}")?;
431 }
432 writeln!(f, "}}")
433 }
434}
435
436#[cfg(test)]
437mod tests {
438 use std::collections::HashMap;
439
440 use bimap::BiHashMap;
441
442 use crate::spec::datatypes::Type::{List, Map, Primitive, Struct};
443 use crate::spec::datatypes::{
444 ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, StructType, Type,
445 };
446 use crate::spec::schema::Schema;
447 use crate::spec::values::Map as MapValue;
448 use crate::spec::{Datum, Literal};
449
450 #[test]
451 fn test_construct_schema() {
452 let field1: NestedFieldRef =
453 NestedField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)).into();
454 let field2: NestedFieldRef =
455 NestedField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)).into();
456
457 let schema = Schema::builder()
458 .with_fields(vec![field1.clone()])
459 .with_fields(vec![field2.clone()])
460 .with_schema_id(3)
461 .build()
462 .unwrap();
463
464 assert_eq!(3, schema.schema_id());
465 assert_eq!(2, schema.highest_field_id());
466 assert_eq!(Some(&field1), schema.field_by_id(1));
467 assert_eq!(Some(&field2), schema.field_by_id(2));
468 assert_eq!(None, schema.field_by_id(3));
469 }
470
471 pub fn table_schema_simple<'a>() -> (Schema, &'a str) {
472 let schema = Schema::builder()
473 .with_schema_id(1)
474 .with_identifier_field_ids(vec![2])
475 .with_fields(vec![
476 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
477 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
478 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
479 ])
480 .build()
481 .unwrap();
482 let record = r#"{
483 "type":"struct",
484 "schema-id":1,
485 "fields":[
486 {
487 "id":1,
488 "name":"foo",
489 "required":false,
490 "type":"string"
491 },
492 {
493 "id":2,
494 "name":"bar",
495 "required":true,
496 "type":"int"
497 },
498 {
499 "id":3,
500 "name":"baz",
501 "required":false,
502 "type":"boolean"
503 }
504 ],
505 "identifier-field-ids":[2]
506 }"#;
507 (schema, record)
508 }
509
510 pub fn table_schema_nested() -> Schema {
511 Schema::builder()
512 .with_schema_id(1)
513 .with_identifier_field_ids(vec![2])
514 .with_fields(vec![
515 NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
516 NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
517 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
518 NestedField::required(
519 4,
520 "qux",
521 Type::List(ListType {
522 element_field: NestedField::list_element(
523 5,
524 Type::Primitive(PrimitiveType::String),
525 true,
526 )
527 .into(),
528 }),
529 )
530 .into(),
531 NestedField::required(
532 6,
533 "quux",
534 Type::Map(MapType {
535 key_field: NestedField::map_key_element(
536 7,
537 Type::Primitive(PrimitiveType::String),
538 )
539 .into(),
540 value_field: NestedField::map_value_element(
541 8,
542 Type::Map(MapType {
543 key_field: NestedField::map_key_element(
544 9,
545 Type::Primitive(PrimitiveType::String),
546 )
547 .into(),
548 value_field: NestedField::map_value_element(
549 10,
550 Type::Primitive(PrimitiveType::Int),
551 true,
552 )
553 .into(),
554 }),
555 true,
556 )
557 .into(),
558 }),
559 )
560 .into(),
561 NestedField::required(
562 11,
563 "location",
564 Type::List(ListType {
565 element_field: NestedField::list_element(
566 12,
567 Type::Struct(StructType::new(vec![
568 NestedField::optional(
569 13,
570 "latitude",
571 Type::Primitive(PrimitiveType::Float),
572 )
573 .into(),
574 NestedField::optional(
575 14,
576 "longitude",
577 Type::Primitive(PrimitiveType::Float),
578 )
579 .into(),
580 ])),
581 true,
582 )
583 .into(),
584 }),
585 )
586 .into(),
587 NestedField::optional(
588 15,
589 "person",
590 Type::Struct(StructType::new(vec![
591 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
592 .into(),
593 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
594 .into(),
595 ])),
596 )
597 .into(),
598 ])
599 .build()
600 .unwrap()
601 }
602
603 #[test]
604 fn test_schema_display() {
605 let expected_str = "
606table {
607 1: foo: optional string\x20
608 2: bar: required int\x20
609 3: baz: optional boolean\x20
610}
611";
612
613 assert_eq!(expected_str, format!("\n{}", table_schema_simple().0));
614 }
615
616 #[test]
617 fn test_schema_build_failed_on_duplicate_names() {
618 let ret = Schema::builder()
619 .with_schema_id(1)
620 .with_identifier_field_ids(vec![1])
621 .with_fields(vec![
622 NestedField::required(1, "foo", Primitive(PrimitiveType::String)).into(),
623 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)).into(),
624 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)).into(),
625 NestedField::optional(4, "baz", Primitive(PrimitiveType::Boolean)).into(),
626 ])
627 .build();
628
629 assert!(
630 ret.unwrap_err()
631 .message()
632 .contains("Invalid schema: multiple fields for name baz")
633 );
634 }
635
636 #[test]
637 fn test_schema_into_builder() {
638 let original_schema = table_schema_nested();
639 let builder = original_schema.clone().into_builder();
640 let schema = builder.build().unwrap();
641
642 assert_eq!(original_schema, schema);
643 }
644
645 #[test]
646 fn test_schema_index_by_name() {
647 let expected_name_to_id = HashMap::from(
648 [
649 ("foo", 1),
650 ("bar", 2),
651 ("baz", 3),
652 ("qux", 4),
653 ("qux.element", 5),
654 ("quux", 6),
655 ("quux.key", 7),
656 ("quux.value", 8),
657 ("quux.value.key", 9),
658 ("quux.value.value", 10),
659 ("location", 11),
660 ("location.element", 12),
661 ("location.element.latitude", 13),
662 ("location.element.longitude", 14),
663 ("location.latitude", 13),
664 ("location.longitude", 14),
665 ("person", 15),
666 ("person.name", 16),
667 ("person.age", 17),
668 ]
669 .map(|e| (e.0.to_string(), e.1)),
670 );
671
672 let schema = table_schema_nested();
673 assert_eq!(&expected_name_to_id, &schema.name_to_id);
674 }
675
676 #[test]
677 fn test_schema_index_by_name_case_insensitive() {
678 let expected_name_to_id = HashMap::from(
679 [
680 ("fOo", 1),
681 ("Bar", 2),
682 ("BAz", 3),
683 ("quX", 4),
684 ("quX.ELEment", 5),
685 ("qUUx", 6),
686 ("QUUX.KEY", 7),
687 ("QUUX.Value", 8),
688 ("qUUX.VALUE.Key", 9),
689 ("qUux.VaLue.Value", 10),
690 ("lOCAtION", 11),
691 ("LOCAtioN.ELeMENt", 12),
692 ("LoCATion.element.LATitude", 13),
693 ("locatION.ElemeNT.LONgitude", 14),
694 ("LOCAtiON.LATITUDE", 13),
695 ("LOCATION.LONGITUDE", 14),
696 ("PERSon", 15),
697 ("PERSON.Name", 16),
698 ("peRSON.AGe", 17),
699 ]
700 .map(|e| (e.0.to_string(), e.1)),
701 );
702
703 let schema = table_schema_nested();
704 for (name, id) in expected_name_to_id {
705 assert_eq!(
706 Some(id),
707 schema.field_by_name_case_insensitive(&name).map(|f| f.id)
708 );
709 }
710 }
711
712 #[test]
713 fn test_schema_find_column_name() {
714 let expected_column_name = HashMap::from([
715 (1, "foo"),
716 (2, "bar"),
717 (3, "baz"),
718 (4, "qux"),
719 (5, "qux.element"),
720 (6, "quux"),
721 (7, "quux.key"),
722 (8, "quux.value"),
723 (9, "quux.value.key"),
724 (10, "quux.value.value"),
725 (11, "location"),
726 (12, "location.element"),
727 (13, "location.element.latitude"),
728 (14, "location.element.longitude"),
729 ]);
730
731 let schema = table_schema_nested();
732 for (id, name) in expected_column_name {
733 assert_eq!(
734 Some(name),
735 schema.name_by_field_id(id),
736 "Column name for field id {id} not match."
737 );
738 }
739 }
740
741 #[test]
742 fn test_schema_find_column_name_not_found() {
743 let schema = table_schema_nested();
744
745 assert!(schema.name_by_field_id(99).is_none());
746 }
747
748 #[test]
749 fn test_schema_find_column_name_by_id_simple() {
750 let expected_id_to_name = HashMap::from([(1, "foo"), (2, "bar"), (3, "baz")]);
751
752 let schema = table_schema_simple().0;
753
754 for (id, name) in expected_id_to_name {
755 assert_eq!(
756 Some(name),
757 schema.name_by_field_id(id),
758 "Column name for field id {id} not match."
759 );
760 }
761 }
762
763 #[test]
764 fn test_schema_find_simple() {
765 let schema = table_schema_simple().0;
766
767 assert_eq!(
768 Some(schema.r#struct.fields()[0].clone()),
769 schema.field_by_id(1).cloned()
770 );
771 assert_eq!(
772 Some(schema.r#struct.fields()[1].clone()),
773 schema.field_by_id(2).cloned()
774 );
775 assert_eq!(
776 Some(schema.r#struct.fields()[2].clone()),
777 schema.field_by_id(3).cloned()
778 );
779
780 assert!(schema.field_by_id(4).is_none());
781 assert!(schema.field_by_name("non exist").is_none());
782 }
783
784 #[test]
785 fn test_schema_find_nested() {
786 let expected_id_to_field: HashMap<i32, NestedField> = HashMap::from([
787 (
788 1,
789 NestedField::optional(1, "foo", Primitive(PrimitiveType::String)),
790 ),
791 (
792 2,
793 NestedField::required(2, "bar", Primitive(PrimitiveType::Int)),
794 ),
795 (
796 3,
797 NestedField::optional(3, "baz", Primitive(PrimitiveType::Boolean)),
798 ),
799 (
800 4,
801 NestedField::required(
802 4,
803 "qux",
804 Type::List(ListType {
805 element_field: NestedField::list_element(
806 5,
807 Type::Primitive(PrimitiveType::String),
808 true,
809 )
810 .into(),
811 }),
812 ),
813 ),
814 (
815 5,
816 NestedField::required(5, "element", Primitive(PrimitiveType::String)),
817 ),
818 (
819 6,
820 NestedField::required(
821 6,
822 "quux",
823 Map(MapType {
824 key_field: NestedField::map_key_element(
825 7,
826 Primitive(PrimitiveType::String),
827 )
828 .into(),
829 value_field: NestedField::map_value_element(
830 8,
831 Map(MapType {
832 key_field: NestedField::map_key_element(
833 9,
834 Primitive(PrimitiveType::String),
835 )
836 .into(),
837 value_field: NestedField::map_value_element(
838 10,
839 Primitive(PrimitiveType::Int),
840 true,
841 )
842 .into(),
843 }),
844 true,
845 )
846 .into(),
847 }),
848 ),
849 ),
850 (
851 7,
852 NestedField::required(7, "key", Primitive(PrimitiveType::String)),
853 ),
854 (
855 8,
856 NestedField::required(
857 8,
858 "value",
859 Map(MapType {
860 key_field: NestedField::map_key_element(
861 9,
862 Primitive(PrimitiveType::String),
863 )
864 .into(),
865 value_field: NestedField::map_value_element(
866 10,
867 Primitive(PrimitiveType::Int),
868 true,
869 )
870 .into(),
871 }),
872 ),
873 ),
874 (
875 9,
876 NestedField::required(9, "key", Primitive(PrimitiveType::String)),
877 ),
878 (
879 10,
880 NestedField::required(10, "value", Primitive(PrimitiveType::Int)),
881 ),
882 (
883 11,
884 NestedField::required(
885 11,
886 "location",
887 List(ListType {
888 element_field: NestedField::list_element(
889 12,
890 Struct(StructType::new(vec![
891 NestedField::optional(
892 13,
893 "latitude",
894 Primitive(PrimitiveType::Float),
895 )
896 .into(),
897 NestedField::optional(
898 14,
899 "longitude",
900 Primitive(PrimitiveType::Float),
901 )
902 .into(),
903 ])),
904 true,
905 )
906 .into(),
907 }),
908 ),
909 ),
910 (
911 12,
912 NestedField::list_element(
913 12,
914 Struct(StructType::new(vec![
915 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float))
916 .into(),
917 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float))
918 .into(),
919 ])),
920 true,
921 ),
922 ),
923 (
924 13,
925 NestedField::optional(13, "latitude", Primitive(PrimitiveType::Float)),
926 ),
927 (
928 14,
929 NestedField::optional(14, "longitude", Primitive(PrimitiveType::Float)),
930 ),
931 (
932 15,
933 NestedField::optional(
934 15,
935 "person",
936 Type::Struct(StructType::new(vec![
937 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String))
938 .into(),
939 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int))
940 .into(),
941 ])),
942 ),
943 ),
944 (
945 16,
946 NestedField::optional(16, "name", Type::Primitive(PrimitiveType::String)),
947 ),
948 (
949 17,
950 NestedField::required(17, "age", Type::Primitive(PrimitiveType::Int)),
951 ),
952 ]);
953
954 let schema = table_schema_nested();
955 for (id, field) in expected_id_to_field {
956 assert_eq!(
957 Some(&field),
958 schema.field_by_id(id).map(|f| f.as_ref()),
959 "Field for {id} not match."
960 );
961 }
962 }
963
964 #[test]
965 fn test_build_accessors() {
966 let schema = table_schema_nested();
967
968 let test_struct = crate::spec::Struct::from_iter(vec![
969 Some(Literal::string("foo value")),
970 Some(Literal::int(1002)),
971 Some(Literal::bool(true)),
972 Some(Literal::List(vec![
973 Some(Literal::string("qux item 1")),
974 Some(Literal::string("qux item 2")),
975 ])),
976 Some(Literal::Map(MapValue::from([(
977 Literal::string("quux key 1"),
978 Some(Literal::Map(MapValue::from([(
979 Literal::string("quux nested key 1"),
980 Some(Literal::int(1000)),
981 )]))),
982 )]))),
983 Some(Literal::List(vec![Some(Literal::Struct(
984 crate::spec::Struct::from_iter(vec![
985 Some(Literal::float(52.509_09)),
986 Some(Literal::float(-1.885_249)),
987 ]),
988 ))])),
989 Some(Literal::Struct(crate::spec::Struct::from_iter(vec![
990 Some(Literal::string("Testy McTest")),
991 Some(Literal::int(33)),
992 ]))),
993 ]);
994
995 assert_eq!(
996 schema
997 .accessor_by_field_id(1)
998 .unwrap()
999 .get(&test_struct)
1000 .unwrap(),
1001 Some(Datum::string("foo value"))
1002 );
1003 assert_eq!(
1004 schema
1005 .accessor_by_field_id(2)
1006 .unwrap()
1007 .get(&test_struct)
1008 .unwrap(),
1009 Some(Datum::int(1002))
1010 );
1011 assert_eq!(
1012 schema
1013 .accessor_by_field_id(3)
1014 .unwrap()
1015 .get(&test_struct)
1016 .unwrap(),
1017 Some(Datum::bool(true))
1018 );
1019 assert_eq!(
1020 schema
1021 .accessor_by_field_id(16)
1022 .unwrap()
1023 .get(&test_struct)
1024 .unwrap(),
1025 Some(Datum::string("Testy McTest"))
1026 );
1027 assert_eq!(
1028 schema
1029 .accessor_by_field_id(17)
1030 .unwrap()
1031 .get(&test_struct)
1032 .unwrap(),
1033 Some(Datum::int(33))
1034 );
1035 }
1036
1037 #[test]
1038 fn test_highest_field_id() {
1039 let schema = table_schema_nested();
1040 assert_eq!(17, schema.highest_field_id());
1041
1042 let schema = table_schema_simple().0;
1043 assert_eq!(3, schema.highest_field_id());
1044 }
1045
1046 #[test]
1047 fn test_highest_field_id_no_fields() {
1048 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1049 assert_eq!(0, schema.highest_field_id());
1050 }
1051
1052 #[test]
1053 fn test_field_ids_must_be_unique() {
1054 let reassigned_schema = Schema::builder()
1055 .with_schema_id(1)
1056 .with_identifier_field_ids(vec![5])
1057 .with_alias(BiHashMap::from_iter(vec![("bar_alias".to_string(), 3)]))
1058 .with_fields(vec![
1059 NestedField::required(5, "foo", Type::Primitive(PrimitiveType::String)).into(),
1060 NestedField::optional(3, "bar", Type::Primitive(PrimitiveType::Int)).into(),
1061 NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
1062 ])
1063 .build()
1064 .unwrap_err();
1065
1066 assert!(reassigned_schema.message().contains("'field.id' 3"));
1067 }
1068
1069 #[test]
1070 fn test_reassign_ids_empty_schema() {
1071 let schema = Schema::builder().with_schema_id(1).build().unwrap();
1072 let reassigned_schema = schema
1073 .clone()
1074 .into_builder()
1075 .with_reassigned_field_ids(0)
1076 .build()
1077 .unwrap();
1078
1079 assert_eq!(schema, reassigned_schema);
1080 assert_eq!(schema.highest_field_id(), 0);
1081 }
1082
1083 #[test]
1084 fn test_identifier_field_ids() {
1085 assert!(
1087 Schema::builder()
1088 .with_schema_id(1)
1089 .with_identifier_field_ids(vec![2])
1090 .with_fields(vec![
1091 NestedField::required(
1092 1,
1093 "Map",
1094 Type::Map(MapType::new(
1095 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1096 .into(),
1097 NestedField::map_value_element(
1098 3,
1099 Type::Primitive(PrimitiveType::Boolean),
1100 true,
1101 )
1102 .into(),
1103 )),
1104 )
1105 .into()
1106 ])
1107 .build()
1108 .is_err()
1109 );
1110 assert!(
1111 Schema::builder()
1112 .with_schema_id(1)
1113 .with_identifier_field_ids(vec![3])
1114 .with_fields(vec![
1115 NestedField::required(
1116 1,
1117 "Map",
1118 Type::Map(MapType::new(
1119 NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String))
1120 .into(),
1121 NestedField::map_value_element(
1122 3,
1123 Type::Primitive(PrimitiveType::Boolean),
1124 true,
1125 )
1126 .into(),
1127 )),
1128 )
1129 .into()
1130 ])
1131 .build()
1132 .is_err()
1133 );
1134
1135 assert!(
1137 Schema::builder()
1138 .with_schema_id(1)
1139 .with_identifier_field_ids(vec![2])
1140 .with_fields(vec![
1141 NestedField::required(
1142 1,
1143 "List",
1144 Type::List(ListType::new(
1145 NestedField::list_element(
1146 2,
1147 Type::Primitive(PrimitiveType::String),
1148 true
1149 )
1150 .into(),
1151 )),
1152 )
1153 .into()
1154 ])
1155 .build()
1156 .is_err()
1157 );
1158
1159 assert!(
1161 Schema::builder()
1162 .with_schema_id(1)
1163 .with_identifier_field_ids(vec![2])
1164 .with_fields(vec![
1165 NestedField::optional(
1166 1,
1167 "Struct",
1168 Type::Struct(StructType::new(vec![
1169 NestedField::required(
1170 2,
1171 "name",
1172 Type::Primitive(PrimitiveType::String)
1173 )
1174 .into(),
1175 NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int))
1176 .into(),
1177 ])),
1178 )
1179 .into()
1180 ])
1181 .build()
1182 .is_err()
1183 );
1184
1185 assert!(
1187 Schema::builder()
1188 .with_schema_id(1)
1189 .with_identifier_field_ids(vec![1])
1190 .with_fields(vec![
1191 NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),)
1192 .into()
1193 ])
1194 .build()
1195 .is_err()
1196 );
1197 assert!(
1198 Schema::builder()
1199 .with_schema_id(1)
1200 .with_identifier_field_ids(vec![1])
1201 .with_fields(vec![
1202 NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),)
1203 .into()
1204 ])
1205 .build()
1206 .is_err()
1207 );
1208
1209 assert!(
1211 Schema::builder()
1212 .with_schema_id(1)
1213 .with_identifier_field_ids(vec![1])
1214 .with_fields(vec![
1215 NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),)
1216 .into()
1217 ])
1218 .build()
1219 .is_ok()
1220 );
1221 assert!(
1222 Schema::builder()
1223 .with_schema_id(1)
1224 .with_identifier_field_ids(vec![1])
1225 .with_fields(vec![
1226 NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),)
1227 .into()
1228 ])
1229 .build()
1230 .is_err()
1231 );
1232 }
1233}