1use super::utils::try_insert_field;
19use super::*;
20
21pub fn index_by_id(r#struct: &StructType) -> Result<HashMap<i32, NestedFieldRef>> {
23 struct IndexById(HashMap<i32, NestedFieldRef>);
24
25 impl SchemaVisitor for IndexById {
26 type T = ();
27
28 fn schema(&mut self, _schema: &Schema, _value: ()) -> Result<()> {
29 Ok(())
30 }
31
32 fn field(&mut self, field: &NestedFieldRef, _value: ()) -> Result<()> {
33 try_insert_field(&mut self.0, field.id, field.clone())
34 }
35
36 fn r#struct(&mut self, _struct: &StructType, _results: Vec<Self::T>) -> Result<Self::T> {
37 Ok(())
38 }
39
40 fn list(&mut self, list: &ListType, _value: Self::T) -> Result<Self::T> {
41 try_insert_field(
42 &mut self.0,
43 list.element_field.id,
44 list.element_field.clone(),
45 )
46 }
47
48 fn map(&mut self, map: &MapType, _key_value: Self::T, _value: Self::T) -> Result<Self::T> {
49 try_insert_field(&mut self.0, map.key_field.id, map.key_field.clone())?;
50 try_insert_field(&mut self.0, map.value_field.id, map.value_field.clone())
51 }
52
53 fn primitive(&mut self, _: &PrimitiveType) -> Result<Self::T> {
54 Ok(())
55 }
56 }
57
58 let mut index = IndexById(HashMap::new());
59 visit_struct(r#struct, &mut index)?;
60 Ok(index.0)
61}
62
63pub fn index_parents(r#struct: &StructType) -> Result<HashMap<i32, i32>> {
65 struct IndexByParent {
66 parents: Vec<i32>,
67 result: HashMap<i32, i32>,
68 }
69
70 impl SchemaVisitor for IndexByParent {
71 type T = ();
72
73 fn before_struct_field(&mut self, field: &NestedFieldRef) -> Result<()> {
74 if let Some(parent) = self.parents.last().copied() {
75 self.result.insert(field.id, parent);
76 }
77 self.parents.push(field.id);
78 Ok(())
79 }
80
81 fn after_struct_field(&mut self, _field: &NestedFieldRef) -> Result<()> {
82 self.parents.pop();
83 Ok(())
84 }
85
86 fn before_list_element(&mut self, field: &NestedFieldRef) -> Result<()> {
87 if let Some(parent) = self.parents.last().copied() {
88 self.result.insert(field.id, parent);
89 }
90 self.parents.push(field.id);
91 Ok(())
92 }
93
94 fn after_list_element(&mut self, _field: &NestedFieldRef) -> Result<()> {
95 self.parents.pop();
96 Ok(())
97 }
98
99 fn before_map_key(&mut self, field: &NestedFieldRef) -> Result<()> {
100 if let Some(parent) = self.parents.last().copied() {
101 self.result.insert(field.id, parent);
102 }
103 self.parents.push(field.id);
104 Ok(())
105 }
106
107 fn after_map_key(&mut self, _field: &NestedFieldRef) -> Result<()> {
108 self.parents.pop();
109 Ok(())
110 }
111
112 fn before_map_value(&mut self, field: &NestedFieldRef) -> Result<()> {
113 if let Some(parent) = self.parents.last().copied() {
114 self.result.insert(field.id, parent);
115 }
116 self.parents.push(field.id);
117 Ok(())
118 }
119
120 fn after_map_value(&mut self, _field: &NestedFieldRef) -> Result<()> {
121 self.parents.pop();
122 Ok(())
123 }
124
125 fn schema(&mut self, _schema: &Schema, _value: Self::T) -> Result<Self::T> {
126 Ok(())
127 }
128
129 fn field(&mut self, _field: &NestedFieldRef, _value: Self::T) -> Result<Self::T> {
130 Ok(())
131 }
132
133 fn r#struct(&mut self, _struct: &StructType, _results: Vec<Self::T>) -> Result<Self::T> {
134 Ok(())
135 }
136
137 fn list(&mut self, _list: &ListType, _value: Self::T) -> Result<Self::T> {
138 Ok(())
139 }
140
141 fn map(&mut self, _map: &MapType, _key_value: Self::T, _value: Self::T) -> Result<Self::T> {
142 Ok(())
143 }
144
145 fn primitive(&mut self, _p: &PrimitiveType) -> Result<Self::T> {
146 Ok(())
147 }
148 }
149
150 let mut index = IndexByParent {
151 parents: vec![],
152 result: HashMap::new(),
153 };
154 visit_struct(r#struct, &mut index)?;
155 Ok(index.result)
156}
157
158#[derive(Default)]
159pub struct IndexByName {
160 name_to_id: HashMap<String, i32>,
162 short_name_to_id: HashMap<String, i32>,
163
164 field_names: Vec<String>,
165 short_field_names: Vec<String>,
166}
167
168impl IndexByName {
169 fn add_field(&mut self, name: &str, field_id: i32) -> Result<()> {
170 let full_name = self
171 .field_names
172 .iter()
173 .map(String::as_str)
174 .chain(vec![name])
175 .join(".");
176 if let Some(existing_field_id) = self.name_to_id.get(full_name.as_str()) {
177 return Err(Error::new(
178 ErrorKind::DataInvalid,
179 format!(
180 "Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}"
181 ),
182 ));
183 } else {
184 self.name_to_id.insert(full_name, field_id);
185 }
186
187 let full_short_name = self
188 .short_field_names
189 .iter()
190 .map(String::as_str)
191 .chain(vec![name])
192 .join(".");
193 self.short_name_to_id
194 .entry(full_short_name)
195 .or_insert_with(|| field_id);
196 Ok(())
197 }
198
199 pub fn indexes(mut self) -> (HashMap<String, i32>, HashMap<i32, String>) {
204 self.short_name_to_id.reserve(self.name_to_id.len());
205 for (name, id) in &self.name_to_id {
206 self.short_name_to_id.insert(name.clone(), *id);
207 }
208
209 let id_to_name = self.name_to_id.into_iter().map(|e| (e.1, e.0)).collect();
210 (self.short_name_to_id, id_to_name)
211 }
212}
213
214impl SchemaVisitor for IndexByName {
215 type T = ();
216
217 fn before_struct_field(&mut self, field: &NestedFieldRef) -> Result<()> {
218 self.field_names.push(field.name.to_string());
219 self.short_field_names.push(field.name.to_string());
220 Ok(())
221 }
222
223 fn after_struct_field(&mut self, _field: &NestedFieldRef) -> Result<()> {
224 self.field_names.pop();
225 self.short_field_names.pop();
226 Ok(())
227 }
228
229 fn before_list_element(&mut self, field: &NestedFieldRef) -> Result<()> {
230 self.field_names.push(field.name.clone());
231 if !field.field_type.is_struct() {
232 self.short_field_names.push(field.name.to_string());
233 }
234
235 Ok(())
236 }
237
238 fn after_list_element(&mut self, field: &NestedFieldRef) -> Result<()> {
239 self.field_names.pop();
240 if !field.field_type.is_struct() {
241 self.short_field_names.pop();
242 }
243
244 Ok(())
245 }
246
247 fn before_map_key(&mut self, field: &NestedFieldRef) -> Result<()> {
248 self.before_struct_field(field)
249 }
250
251 fn after_map_key(&mut self, field: &NestedFieldRef) -> Result<()> {
252 self.after_struct_field(field)
253 }
254
255 fn before_map_value(&mut self, field: &NestedFieldRef) -> Result<()> {
256 self.field_names.push(field.name.to_string());
257 if !field.field_type.is_struct() {
258 self.short_field_names.push(field.name.to_string());
259 }
260 Ok(())
261 }
262
263 fn after_map_value(&mut self, field: &NestedFieldRef) -> Result<()> {
264 self.field_names.pop();
265 if !field.field_type.is_struct() {
266 self.short_field_names.pop();
267 }
268
269 Ok(())
270 }
271
272 fn schema(&mut self, _schema: &Schema, _value: Self::T) -> Result<Self::T> {
273 Ok(())
274 }
275
276 fn field(&mut self, field: &NestedFieldRef, _value: Self::T) -> Result<Self::T> {
277 self.add_field(field.name.as_str(), field.id)
278 }
279
280 fn r#struct(&mut self, _struct: &StructType, _results: Vec<Self::T>) -> Result<Self::T> {
281 Ok(())
282 }
283
284 fn list(&mut self, list: &ListType, _value: Self::T) -> Result<Self::T> {
285 self.add_field(LIST_FIELD_NAME, list.element_field.id)
286 }
287
288 fn map(&mut self, map: &MapType, _key_value: Self::T, _value: Self::T) -> Result<Self::T> {
289 self.add_field(MAP_KEY_FIELD_NAME, map.key_field.id)?;
290 self.add_field(MAP_VALUE_FIELD_NAME, map.value_field.id)
291 }
292
293 fn primitive(&mut self, _p: &PrimitiveType) -> Result<Self::T> {
294 Ok(())
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use crate::spec::schema::tests::table_schema_nested;
302
303 #[test]
304 fn test_index_parent() {
305 let schema = table_schema_nested();
306 let result = index_parents(&schema.r#struct).unwrap();
307 assert_eq!(result.get(&5).unwrap(), &4);
308 assert_eq!(result.get(&7).unwrap(), &6);
309 assert_eq!(result.get(&8).unwrap(), &6);
310 assert_eq!(result.get(&9).unwrap(), &8);
311 assert_eq!(result.get(&10).unwrap(), &8);
312 assert_eq!(result.get(&12).unwrap(), &11);
313 assert_eq!(result.get(&13).unwrap(), &12);
314 assert_eq!(result.get(&14).unwrap(), &12);
315 assert_eq!(result.get(&16).unwrap(), &15);
316 assert_eq!(result.get(&17).unwrap(), &15);
317 }
318}