1use hive_metastore::FieldSchema;
19use iceberg::spec::{PrimitiveType, Schema, SchemaVisitor, visit_schema};
20use iceberg::{Error, ErrorKind, Result};
21
22type HiveSchema = Vec<FieldSchema>;
23
24#[derive(Debug, Default)]
25pub(crate) struct HiveSchemaBuilder {
26 schema: HiveSchema,
27 depth: usize,
28}
29
30impl HiveSchemaBuilder {
31 pub fn from_iceberg(schema: &Schema) -> Result<HiveSchemaBuilder> {
33 let mut builder = Self::default();
34 visit_schema(schema, &mut builder)?;
35 Ok(builder)
36 }
37
38 pub fn build(self) -> HiveSchema {
40 self.schema
41 }
42
43 fn is_inside_struct(&self) -> bool {
45 self.depth > 0
46 }
47}
48
49impl SchemaVisitor for HiveSchemaBuilder {
50 type T = String;
51
52 fn schema(
53 &mut self,
54 _schema: &iceberg::spec::Schema,
55 value: String,
56 ) -> iceberg::Result<String> {
57 Ok(value)
58 }
59
60 fn before_struct_field(
61 &mut self,
62 _field: &iceberg::spec::NestedFieldRef,
63 ) -> iceberg::Result<()> {
64 self.depth += 1;
65 Ok(())
66 }
67
68 fn r#struct(
69 &mut self,
70 r#_struct: &iceberg::spec::StructType,
71 results: Vec<String>,
72 ) -> iceberg::Result<String> {
73 Ok(format!("struct<{}>", results.join(", ")))
74 }
75
76 fn after_struct_field(
77 &mut self,
78 _field: &iceberg::spec::NestedFieldRef,
79 ) -> iceberg::Result<()> {
80 self.depth -= 1;
81 Ok(())
82 }
83
84 fn field(
85 &mut self,
86 field: &iceberg::spec::NestedFieldRef,
87 value: String,
88 ) -> iceberg::Result<String> {
89 if self.is_inside_struct() {
90 return Ok(format!("{}:{}", field.name, value));
91 }
92
93 self.schema.push(FieldSchema {
94 name: Some(field.name.clone().into()),
95 r#type: Some(value.clone().into()),
96 comment: field.doc.clone().map(|doc| doc.into()),
97 });
98
99 Ok(value)
100 }
101
102 fn list(&mut self, _list: &iceberg::spec::ListType, value: String) -> iceberg::Result<String> {
103 Ok(format!("array<{value}>"))
104 }
105
106 fn map(
107 &mut self,
108 _map: &iceberg::spec::MapType,
109 key_value: String,
110 value: String,
111 ) -> iceberg::Result<String> {
112 Ok(format!("map<{key_value},{value}>"))
113 }
114
115 fn primitive(&mut self, p: &iceberg::spec::PrimitiveType) -> iceberg::Result<String> {
116 let hive_type = match p {
117 PrimitiveType::Boolean => "boolean".to_string(),
118 PrimitiveType::Int => "int".to_string(),
119 PrimitiveType::Long => "bigint".to_string(),
120 PrimitiveType::Float => "float".to_string(),
121 PrimitiveType::Double => "double".to_string(),
122 PrimitiveType::Date => "date".to_string(),
123 PrimitiveType::Timestamp => "timestamp".to_string(),
124 PrimitiveType::TimestampNs => "timestamp_ns".to_string(),
125 PrimitiveType::TimestamptzNs => "timestamptz_ns".to_string(),
126 PrimitiveType::Time | PrimitiveType::String | PrimitiveType::Uuid => {
127 "string".to_string()
128 }
129 PrimitiveType::Binary | PrimitiveType::Fixed(_) => "binary".to_string(),
130 PrimitiveType::Decimal { precision, scale } => {
131 format!("decimal({precision},{scale})")
132 }
133 _ => {
134 return Err(Error::new(
135 ErrorKind::FeatureUnsupported,
136 "Conversion from 'Timestamptz' is not supported",
137 ));
138 }
139 };
140
141 Ok(hive_type)
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use iceberg::Result;
148 use iceberg::spec::Schema;
149
150 use super::*;
151
152 #[test]
153 fn test_schema_with_nested_maps() -> Result<()> {
154 let record = r#"
155 {
156 "schema-id": 1,
157 "type": "struct",
158 "fields": [
159 {
160 "id": 1,
161 "name": "quux",
162 "required": true,
163 "type": {
164 "type": "map",
165 "key-id": 2,
166 "key": "string",
167 "value-id": 3,
168 "value-required": true,
169 "value": {
170 "type": "map",
171 "key-id": 4,
172 "key": "string",
173 "value-id": 5,
174 "value-required": true,
175 "value": "int"
176 }
177 }
178 }
179 ]
180 }
181 "#;
182
183 let schema = serde_json::from_str::<Schema>(record)?;
184
185 let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
186
187 let expected = vec![FieldSchema {
188 name: Some("quux".into()),
189 r#type: Some("map<string,map<string,int>>".into()),
190 comment: None,
191 }];
192
193 assert_eq!(result, expected);
194
195 Ok(())
196 }
197
198 #[test]
199 fn test_schema_with_struct_inside_list() -> Result<()> {
200 let record = r#"
201 {
202 "schema-id": 1,
203 "type": "struct",
204 "fields": [
205 {
206 "id": 1,
207 "name": "location",
208 "required": true,
209 "type": {
210 "type": "list",
211 "element-id": 2,
212 "element-required": true,
213 "element": {
214 "type": "struct",
215 "fields": [
216 {
217 "id": 3,
218 "name": "latitude",
219 "required": false,
220 "type": "float"
221 },
222 {
223 "id": 4,
224 "name": "longitude",
225 "required": false,
226 "type": "float"
227 }
228 ]
229 }
230 }
231 }
232 ]
233 }
234 "#;
235
236 let schema = serde_json::from_str::<Schema>(record)?;
237
238 let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
239
240 let expected = vec![FieldSchema {
241 name: Some("location".into()),
242 r#type: Some("array<struct<latitude:float, longitude:float>>".into()),
243 comment: None,
244 }];
245
246 assert_eq!(result, expected);
247
248 Ok(())
249 }
250
251 #[test]
252 fn test_schema_with_structs() -> Result<()> {
253 let record = r#"{
254 "type": "struct",
255 "schema-id": 1,
256 "fields": [
257 {
258 "id": 1,
259 "name": "person",
260 "required": true,
261 "type": {
262 "type": "struct",
263 "fields": [
264 {
265 "id": 2,
266 "name": "name",
267 "required": true,
268 "type": "string"
269 },
270 {
271 "id": 3,
272 "name": "age",
273 "required": false,
274 "type": "int"
275 }
276 ]
277 }
278 }
279 ]
280 }"#;
281
282 let schema = serde_json::from_str::<Schema>(record)?;
283
284 let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
285
286 let expected = vec![FieldSchema {
287 name: Some("person".into()),
288 r#type: Some("struct<name:string, age:int>".into()),
289 comment: None,
290 }];
291
292 assert_eq!(result, expected);
293
294 Ok(())
295 }
296
297 #[test]
298 fn test_schema_with_simple_fields() -> Result<()> {
299 let record = r#"{
300 "type": "struct",
301 "schema-id": 1,
302 "fields": [
303 {
304 "id": 1,
305 "name": "c1",
306 "required": true,
307 "type": "boolean"
308 },
309 {
310 "id": 2,
311 "name": "c2",
312 "required": true,
313 "type": "int"
314 },
315 {
316 "id": 3,
317 "name": "c3",
318 "required": true,
319 "type": "long"
320 },
321 {
322 "id": 4,
323 "name": "c4",
324 "required": true,
325 "type": "float"
326 },
327 {
328 "id": 5,
329 "name": "c5",
330 "required": true,
331 "type": "double"
332 },
333 {
334 "id": 6,
335 "name": "c6",
336 "required": true,
337 "type": "decimal(2,2)"
338 },
339 {
340 "id": 7,
341 "name": "c7",
342 "required": true,
343 "type": "date"
344 },
345 {
346 "id": 8,
347 "name": "c8",
348 "required": true,
349 "type": "time"
350 },
351 {
352 "id": 9,
353 "name": "c9",
354 "required": true,
355 "type": "timestamp"
356 },
357 {
358 "id": 10,
359 "name": "c10",
360 "required": true,
361 "type": "string"
362 },
363 {
364 "id": 11,
365 "name": "c11",
366 "required": true,
367 "type": "uuid"
368 },
369 {
370 "id": 12,
371 "name": "c12",
372 "required": true,
373 "type": "fixed[4]"
374 },
375 {
376 "id": 13,
377 "name": "c13",
378 "required": true,
379 "type": "binary"
380 }
381 ]
382 }"#;
383
384 let schema = serde_json::from_str::<Schema>(record)?;
385
386 let result = HiveSchemaBuilder::from_iceberg(&schema)?.build();
387
388 let expected = vec![
389 FieldSchema {
390 name: Some("c1".into()),
391 r#type: Some("boolean".into()),
392 comment: None,
393 },
394 FieldSchema {
395 name: Some("c2".into()),
396 r#type: Some("int".into()),
397 comment: None,
398 },
399 FieldSchema {
400 name: Some("c3".into()),
401 r#type: Some("bigint".into()),
402 comment: None,
403 },
404 FieldSchema {
405 name: Some("c4".into()),
406 r#type: Some("float".into()),
407 comment: None,
408 },
409 FieldSchema {
410 name: Some("c5".into()),
411 r#type: Some("double".into()),
412 comment: None,
413 },
414 FieldSchema {
415 name: Some("c6".into()),
416 r#type: Some("decimal(2,2)".into()),
417 comment: None,
418 },
419 FieldSchema {
420 name: Some("c7".into()),
421 r#type: Some("date".into()),
422 comment: None,
423 },
424 FieldSchema {
425 name: Some("c8".into()),
426 r#type: Some("string".into()),
427 comment: None,
428 },
429 FieldSchema {
430 name: Some("c9".into()),
431 r#type: Some("timestamp".into()),
432 comment: None,
433 },
434 FieldSchema {
435 name: Some("c10".into()),
436 r#type: Some("string".into()),
437 comment: None,
438 },
439 FieldSchema {
440 name: Some("c11".into()),
441 r#type: Some("string".into()),
442 comment: None,
443 },
444 FieldSchema {
445 name: Some("c12".into()),
446 r#type: Some("binary".into()),
447 comment: None,
448 },
449 FieldSchema {
450 name: Some("c13".into()),
451 r#type: Some("binary".into()),
452 comment: None,
453 },
454 ];
455
456 assert_eq!(result, expected);
457
458 Ok(())
459 }
460}