1use std::any::Any;
21use std::str::FromStr;
22
23use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
24use ordered_float::OrderedFloat;
25use serde_json::{Map as JsonMap, Number, Value as JsonValue};
26use uuid::Uuid;
27
28use super::Map;
29use super::decimal_utils::{
30 decimal_from_str_exact, decimal_mantissa, decimal_rescale, try_decimal_from_i128_with_scale,
31};
32use super::primitive::PrimitiveLiteral;
33use super::struct_value::Struct;
34use super::temporal::{date, time, timestamp, timestamptz};
35use crate::error::Result;
36use crate::spec::datatypes::{PrimitiveType, Type};
37use crate::{Error, ErrorKind};
38
39#[derive(Clone, Debug, PartialEq, Eq, Hash)]
41pub enum Literal {
42 Primitive(PrimitiveLiteral),
44 Struct(Struct),
48 List(Vec<Option<Literal>>),
52 Map(Map),
56}
57
58impl Literal {
59 pub fn bool<T: Into<bool>>(t: T) -> Self {
69 Self::Primitive(PrimitiveLiteral::Boolean(t.into()))
70 }
71
72 pub fn bool_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
83 let v = s.as_ref().parse::<bool>().map_err(|e| {
84 Error::new(ErrorKind::DataInvalid, "Can't parse string to bool.").with_source(e)
85 })?;
86 Ok(Self::Primitive(PrimitiveLiteral::Boolean(v)))
87 }
88
89 pub fn int<T: Into<i32>>(t: T) -> Self {
99 Self::Primitive(PrimitiveLiteral::Int(t.into()))
100 }
101
102 pub fn long<T: Into<i64>>(t: T) -> Self {
112 Self::Primitive(PrimitiveLiteral::Long(t.into()))
113 }
114
115 pub fn float<T: Into<f32>>(t: T) -> Self {
129 Self::Primitive(PrimitiveLiteral::Float(OrderedFloat(t.into())))
130 }
131
132 pub fn double<T: Into<f64>>(t: T) -> Self {
146 Self::Primitive(PrimitiveLiteral::Double(OrderedFloat(t.into())))
147 }
148
149 pub fn date(days: i32) -> Self {
151 Self::Primitive(PrimitiveLiteral::Int(days))
152 }
153
154 pub fn date_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
166 let t = s.as_ref().parse::<NaiveDate>().map_err(|e| {
167 Error::new(
168 ErrorKind::DataInvalid,
169 format!("Can't parse date from string: {}", s.as_ref()),
170 )
171 .with_source(e)
172 })?;
173
174 Ok(Self::date(date::date_from_naive_date(t)))
175 }
176
177 pub fn date_from_ymd(year: i32, month: u32, day: u32) -> Result<Self> {
190 let t = NaiveDate::from_ymd_opt(year, month, day).ok_or_else(|| {
191 Error::new(
192 ErrorKind::DataInvalid,
193 format!("Can't create date from year: {year}, month: {month}, day: {day}"),
194 )
195 })?;
196
197 Ok(Self::date(date::date_from_naive_date(t)))
198 }
199
200 pub fn time(value: i64) -> Self {
202 Self::Primitive(PrimitiveLiteral::Long(value))
203 }
204
205 fn time_from_naive_time(t: NaiveTime) -> Self {
207 let duration = t - date::unix_epoch().time();
208 let micro_secs = duration.num_microseconds().unwrap();
210
211 Literal::time(micro_secs)
212 }
213
214 pub fn time_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
232 let t = s.as_ref().parse::<NaiveTime>().map_err(|e| {
233 Error::new(
234 ErrorKind::DataInvalid,
235 format!("Can't parse time from string: {}", s.as_ref()),
236 )
237 .with_source(e)
238 })?;
239
240 Ok(Self::time_from_naive_time(t))
241 }
242
243 pub fn time_from_hms_micro(hour: u32, min: u32, sec: u32, micro: u32) -> Result<Self> {
255 let t = NaiveTime::from_hms_micro_opt(hour, min, sec, micro)
256 .ok_or_else(|| Error::new(
257 ErrorKind::DataInvalid,
258 format!("Can't create time from hour: {hour}, min: {min}, second: {sec}, microsecond: {micro}"),
259 ))?;
260 Ok(Self::time_from_naive_time(t))
261 }
262
263 pub fn timestamp(value: i64) -> Self {
265 Self::Primitive(PrimitiveLiteral::Long(value))
266 }
267
268 pub fn timestamptz(value: i64) -> Self {
270 Self::Primitive(PrimitiveLiteral::Long(value))
271 }
272
273 pub(crate) fn timestamp_nano(value: i64) -> Self {
275 Self::Primitive(PrimitiveLiteral::Long(value))
276 }
277
278 pub(crate) fn timestamptz_nano(value: i64) -> Self {
280 Self::Primitive(PrimitiveLiteral::Long(value))
281 }
282
283 pub fn timestamp_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
285 Self::timestamp(dt.with_timezone(&Utc).timestamp_micros())
286 }
287
288 pub fn timestamptz_from_datetime<T: TimeZone>(dt: DateTime<T>) -> Self {
290 Self::timestamptz(dt.with_timezone(&Utc).timestamp_micros())
291 }
292
293 pub fn timestamp_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
317 let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
318 Error::new(ErrorKind::DataInvalid, "Can't parse datetime.").with_source(e)
319 })?;
320
321 Ok(Self::timestamp_from_datetime(dt))
322 }
323
324 pub fn timestamptz_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
326 let dt = DateTime::<Utc>::from_str(s.as_ref()).map_err(|e| {
327 Error::new(ErrorKind::DataInvalid, "Can't parse datetime.").with_source(e)
328 })?;
329
330 Ok(Self::timestamptz_from_datetime(dt))
331 }
332
333 pub fn string<S: ToString>(s: S) -> Self {
335 Self::Primitive(PrimitiveLiteral::String(s.to_string()))
336 }
337
338 pub fn uuid(uuid: Uuid) -> Self {
340 Self::Primitive(PrimitiveLiteral::UInt128(uuid.as_u128()))
341 }
342
343 pub fn uuid_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
356 let uuid = Uuid::parse_str(s.as_ref()).map_err(|e| {
357 Error::new(
358 ErrorKind::DataInvalid,
359 format!("Can't parse uuid from string: {}", s.as_ref()),
360 )
361 .with_source(e)
362 })?;
363 Ok(Self::uuid(uuid))
364 }
365
366 pub fn fixed<I: IntoIterator<Item = u8>>(input: I) -> Self {
378 Literal::Primitive(PrimitiveLiteral::Binary(input.into_iter().collect()))
379 }
380
381 pub fn binary<I: IntoIterator<Item = u8>>(input: I) -> Self {
393 Literal::Primitive(PrimitiveLiteral::Binary(input.into_iter().collect()))
394 }
395
396 pub fn decimal(decimal: i128) -> Self {
398 Self::Primitive(PrimitiveLiteral::Int128(decimal))
399 }
400
401 pub fn decimal_from_str<S: AsRef<str>>(s: S) -> Result<Self> {
413 let decimal = decimal_from_str_exact(s.as_ref())?;
414 Ok(Self::decimal(decimal_mantissa(&decimal)))
415 }
416
417 pub fn as_primitive_literal(&self) -> Option<PrimitiveLiteral> {
419 match self {
420 Literal::Primitive(primitive) => Some(primitive.clone()),
421 _ => None,
422 }
423 }
424
425 pub fn try_from_json(value: JsonValue, data_type: &Type) -> Result<Option<Self>> {
429 match data_type {
430 Type::Primitive(primitive) => match (primitive, value) {
431 (PrimitiveType::Boolean, JsonValue::Bool(bool)) => {
432 Ok(Some(Literal::Primitive(PrimitiveLiteral::Boolean(bool))))
433 }
434 (PrimitiveType::Int, JsonValue::Number(number)) => {
435 Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(
436 number
437 .as_i64()
438 .ok_or(Error::new(
439 crate::ErrorKind::DataInvalid,
440 "Failed to convert json number to int",
441 ))?
442 .try_into()?,
443 ))))
444 }
445 (PrimitiveType::Long, JsonValue::Number(number)) => Ok(Some(Literal::Primitive(
446 PrimitiveLiteral::Long(number.as_i64().ok_or(Error::new(
447 crate::ErrorKind::DataInvalid,
448 "Failed to convert json number to long",
449 ))?),
450 ))),
451 (PrimitiveType::Float, JsonValue::Number(number)) => Ok(Some(Literal::Primitive(
452 PrimitiveLiteral::Float(OrderedFloat(number.as_f64().ok_or(Error::new(
453 crate::ErrorKind::DataInvalid,
454 "Failed to convert json number to float",
455 ))? as f32)),
456 ))),
457 (PrimitiveType::Double, JsonValue::Number(number)) => Ok(Some(Literal::Primitive(
458 PrimitiveLiteral::Double(OrderedFloat(number.as_f64().ok_or(Error::new(
459 crate::ErrorKind::DataInvalid,
460 "Failed to convert json number to double",
461 ))?)),
462 ))),
463 (PrimitiveType::Date, JsonValue::String(s)) => {
464 Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(
465 date::date_to_days(&NaiveDate::parse_from_str(&s, "%Y-%m-%d")?),
466 ))))
467 }
468 (PrimitiveType::Date, JsonValue::Number(number)) => {
469 Ok(Some(Literal::Primitive(PrimitiveLiteral::Int(
470 number
471 .as_i64()
472 .ok_or(Error::new(
473 crate::ErrorKind::DataInvalid,
474 "Failed to convert json number to date (days since epoch)",
475 ))?
476 .try_into()?,
477 ))))
478 }
479 (PrimitiveType::Time, JsonValue::String(s)) => {
480 Ok(Some(Literal::Primitive(PrimitiveLiteral::Long(
481 time::time_to_microseconds(&NaiveTime::parse_from_str(&s, "%H:%M:%S%.f")?),
482 ))))
483 }
484 (PrimitiveType::Timestamp, JsonValue::String(s)) => Ok(Some(Literal::Primitive(
485 PrimitiveLiteral::Long(timestamp::datetime_to_microseconds(
486 &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f")?,
487 )),
488 ))),
489 (PrimitiveType::Timestamptz, JsonValue::String(s)) => {
490 Ok(Some(Literal::Primitive(PrimitiveLiteral::Long(
491 timestamptz::datetimetz_to_microseconds(&Utc.from_utc_datetime(
492 &NaiveDateTime::parse_from_str(&s, "%Y-%m-%dT%H:%M:%S%.f+00:00")?,
493 )),
494 ))))
495 }
496 (PrimitiveType::String, JsonValue::String(s)) => {
497 Ok(Some(Literal::Primitive(PrimitiveLiteral::String(s))))
498 }
499 (PrimitiveType::Uuid, JsonValue::String(s)) => Ok(Some(Literal::Primitive(
500 PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()),
501 ))),
502 (PrimitiveType::Fixed(size), JsonValue::String(s)) => {
503 let bytes = decode_hex_bytes(&s)?;
504 validate_fixed_size(bytes.len(), *size)?;
505 Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes))))
506 }
507 (PrimitiveType::Binary, JsonValue::String(s)) => Ok(Some(Literal::Primitive(
508 PrimitiveLiteral::Binary(decode_hex_bytes(&s)?),
509 ))),
510 (
511 PrimitiveType::Decimal {
512 precision: _,
513 scale,
514 },
515 JsonValue::String(s),
516 ) => {
517 let decimal = decimal_from_str_exact(&s)?;
518 let rescaled = decimal_rescale(decimal, *scale);
519 Ok(Some(Literal::Primitive(PrimitiveLiteral::Int128(
520 decimal_mantissa(&rescaled),
521 ))))
522 }
523 (_, JsonValue::Null) => Ok(None),
524 (i, j) => Err(Error::new(
525 crate::ErrorKind::DataInvalid,
526 format!("The json value {j} doesn't fit to the iceberg type {i}."),
527 )),
528 },
529 Type::Struct(schema) => {
530 if let JsonValue::Object(mut object) = value {
531 Ok(Some(Literal::Struct(Struct::from_iter(
532 schema.fields().iter().map(|field| {
533 object.remove(&field.id.to_string()).and_then(|value| {
534 Literal::try_from_json(value, &field.field_type)
535 .and_then(|value| {
536 value.ok_or(Error::new(
537 ErrorKind::DataInvalid,
538 "Key of map cannot be null",
539 ))
540 })
541 .ok()
542 })
543 }),
544 ))))
545 } else {
546 Err(Error::new(
547 crate::ErrorKind::DataInvalid,
548 "The json value for a struct type must be an object.",
549 ))
550 }
551 }
552 Type::List(list) => {
553 if let JsonValue::Array(array) = value {
554 Ok(Some(Literal::List(
555 array
556 .into_iter()
557 .map(|value| {
558 Literal::try_from_json(value, &list.element_field.field_type)
559 })
560 .collect::<Result<Vec<_>>>()?,
561 )))
562 } else {
563 Err(Error::new(
564 crate::ErrorKind::DataInvalid,
565 "The json value for a list type must be an array.",
566 ))
567 }
568 }
569 Type::Map(map) => {
570 if let JsonValue::Object(mut object) = value {
571 if let (Some(JsonValue::Array(keys)), Some(JsonValue::Array(values))) =
572 (object.remove("keys"), object.remove("values"))
573 {
574 Ok(Some(Literal::Map(Map::from_iter(
575 keys.into_iter()
576 .zip(values.into_iter())
577 .map(|(key, value)| {
578 Ok((
579 Literal::try_from_json(key, &map.key_field.field_type)
580 .and_then(|value| {
581 value.ok_or(Error::new(
582 ErrorKind::DataInvalid,
583 "Key of map cannot be null",
584 ))
585 })?,
586 Literal::try_from_json(value, &map.value_field.field_type)?,
587 ))
588 })
589 .collect::<Result<Vec<_>>>()?,
590 ))))
591 } else {
592 Err(Error::new(
593 crate::ErrorKind::DataInvalid,
594 "The json value for a list type must be an array.",
595 ))
596 }
597 } else {
598 Err(Error::new(
599 crate::ErrorKind::DataInvalid,
600 "The json value for a list type must be an array.",
601 ))
602 }
603 }
604 }
605 }
606
607 pub fn try_into_json(self, r#type: &Type) -> Result<JsonValue> {
611 match (self, r#type) {
612 (Literal::Primitive(prim), Type::Primitive(prim_type)) => match (prim_type, prim) {
613 (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(val)) => {
614 Ok(JsonValue::Bool(val))
615 }
616 (PrimitiveType::Int, PrimitiveLiteral::Int(val)) => {
617 Ok(JsonValue::Number((val).into()))
618 }
619 (PrimitiveType::Long, PrimitiveLiteral::Long(val)) => {
620 Ok(JsonValue::Number((val).into()))
621 }
622 (PrimitiveType::Float, PrimitiveLiteral::Float(val)) => {
623 match Number::from_f64(val.0 as f64) {
624 Some(number) => Ok(JsonValue::Number(number)),
625 None => Ok(JsonValue::Null),
626 }
627 }
628 (PrimitiveType::Double, PrimitiveLiteral::Double(val)) => {
629 match Number::from_f64(val.0) {
630 Some(number) => Ok(JsonValue::Number(number)),
631 None => Ok(JsonValue::Null),
632 }
633 }
634 (PrimitiveType::Date, PrimitiveLiteral::Int(val)) => {
635 Ok(JsonValue::String(date::days_to_date(val).to_string()))
636 }
637 (PrimitiveType::Time, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String(
638 time::microseconds_to_time(val).to_string(),
639 )),
640 (PrimitiveType::Timestamp, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String(
641 timestamp::microseconds_to_datetime(val)
642 .format("%Y-%m-%dT%H:%M:%S%.f")
643 .to_string(),
644 )),
645 (PrimitiveType::Timestamptz, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String(
646 timestamptz::microseconds_to_datetimetz(val)
647 .format("%Y-%m-%dT%H:%M:%S%.f+00:00")
648 .to_string(),
649 )),
650 (PrimitiveType::TimestampNs, PrimitiveLiteral::Long(val)) => Ok(JsonValue::String(
651 timestamp::nanoseconds_to_datetime(val)
652 .format("%Y-%m-%dT%H:%M:%S%.f")
653 .to_string(),
654 )),
655 (PrimitiveType::TimestamptzNs, PrimitiveLiteral::Long(val)) => {
656 Ok(JsonValue::String(
657 timestamptz::nanoseconds_to_datetimetz(val)
658 .format("%Y-%m-%dT%H:%M:%S%.f+00:00")
659 .to_string(),
660 ))
661 }
662 (PrimitiveType::String, PrimitiveLiteral::String(val)) => {
663 Ok(JsonValue::String(val.clone()))
664 }
665 (_, PrimitiveLiteral::UInt128(val)) => {
666 Ok(JsonValue::String(Uuid::from_u128(val).to_string()))
667 }
668 (PrimitiveType::Fixed(size), PrimitiveLiteral::Binary(val)) => {
669 validate_fixed_size(val.len(), *size)?;
670 Ok(JsonValue::String(encode_hex_bytes(&val)))
671 }
672 (PrimitiveType::Binary, PrimitiveLiteral::Binary(val)) => {
673 Ok(JsonValue::String(encode_hex_bytes(&val)))
674 }
675 (_, PrimitiveLiteral::Int128(val)) => match r#type {
676 Type::Primitive(PrimitiveType::Decimal {
677 precision: _precision,
678 scale,
679 }) => {
680 let decimal = try_decimal_from_i128_with_scale(val, *scale)?;
681 Ok(JsonValue::String(decimal.to_string()))
682 }
683 _ => Err(Error::new(
684 ErrorKind::DataInvalid,
685 "The iceberg type for decimal literal must be decimal.",
686 ))?,
687 },
688 _ => Err(Error::new(
689 ErrorKind::DataInvalid,
690 "The iceberg value doesn't fit to the iceberg type.",
691 )),
692 },
693 (Literal::Struct(s), Type::Struct(struct_type)) => {
694 let mut id_and_value = Vec::with_capacity(struct_type.fields().len());
695 for (value, field) in s.into_iter().zip(struct_type.fields()) {
696 let json = match value {
697 Some(val) => val.try_into_json(&field.field_type)?,
698 None => JsonValue::Null,
699 };
700 id_and_value.push((field.id.to_string(), json));
701 }
702 Ok(JsonValue::Object(JsonMap::from_iter(id_and_value)))
703 }
704 (Literal::List(list), Type::List(list_type)) => Ok(JsonValue::Array(
705 list.into_iter()
706 .map(|opt| match opt {
707 Some(literal) => literal.try_into_json(&list_type.element_field.field_type),
708 None => Ok(JsonValue::Null),
709 })
710 .collect::<Result<Vec<JsonValue>>>()?,
711 )),
712 (Literal::Map(map), Type::Map(map_type)) => {
713 let mut object = JsonMap::with_capacity(2);
714 let mut json_keys = Vec::with_capacity(map.len());
715 let mut json_values = Vec::with_capacity(map.len());
716 for (key, value) in map.into_iter() {
717 json_keys.push(key.try_into_json(&map_type.key_field.field_type)?);
718 json_values.push(match value {
719 Some(literal) => literal.try_into_json(&map_type.value_field.field_type)?,
720 None => JsonValue::Null,
721 });
722 }
723 object.insert("keys".to_string(), JsonValue::Array(json_keys));
724 object.insert("values".to_string(), JsonValue::Array(json_values));
725 Ok(JsonValue::Object(object))
726 }
727 (value, r#type) => Err(Error::new(
728 ErrorKind::DataInvalid,
729 format!("The iceberg value {value:?} doesn't fit to the iceberg type {type}."),
730 )),
731 }
732 }
733
734 pub fn into_any(self) -> Box<dyn Any> {
736 match self {
737 Literal::Primitive(prim) => match prim {
738 PrimitiveLiteral::Boolean(any) => Box::new(any),
739 PrimitiveLiteral::Int(any) => Box::new(any),
740 PrimitiveLiteral::Long(any) => Box::new(any),
741 PrimitiveLiteral::Float(any) => Box::new(any),
742 PrimitiveLiteral::Double(any) => Box::new(any),
743 PrimitiveLiteral::Binary(any) => Box::new(any),
744 PrimitiveLiteral::String(any) => Box::new(any),
745 PrimitiveLiteral::UInt128(any) => Box::new(any),
746 PrimitiveLiteral::Int128(any) => Box::new(any),
747 PrimitiveLiteral::AboveMax | PrimitiveLiteral::BelowMin => unimplemented!(),
748 },
749 _ => unimplemented!(),
750 }
751 }
752}
753
754fn decode_hex_bytes(value: &str) -> Result<Vec<u8>> {
755 if !value.len().is_multiple_of(2) {
756 return Err(Error::new(
757 ErrorKind::DataInvalid,
758 format!("Hex string must have an even number of characters: {value:?}"),
759 ));
760 }
761
762 value
763 .as_bytes()
764 .chunks_exact(2)
765 .map(|chunk| {
766 let high = decode_hex_digit(chunk[0], value)?;
767 let low = decode_hex_digit(chunk[1], value)?;
768 Ok((high << 4) | low)
769 })
770 .collect()
771}
772
773fn decode_hex_digit(digit: u8, value: &str) -> Result<u8> {
774 match digit {
775 b'0'..=b'9' => Ok(digit - b'0'),
776 b'a'..=b'f' => Ok(digit - b'a' + 10),
777 b'A'..=b'F' => Ok(digit - b'A' + 10),
778 _ => Err(Error::new(
779 ErrorKind::DataInvalid,
780 format!("Hex string contains invalid character: {value:?}"),
781 )),
782 }
783}
784
785fn encode_hex_bytes(bytes: &[u8]) -> String {
786 const HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
787
788 let mut output = String::with_capacity(bytes.len() * 2);
789 for byte in bytes {
790 output.push(HEX_DIGITS[(byte >> 4) as usize] as char);
791 output.push(HEX_DIGITS[(byte & 0x0f) as usize] as char);
792 }
793 output
794}
795
796fn validate_fixed_size(actual: usize, expected: u64) -> Result<()> {
797 if actual as u64 == expected {
798 Ok(())
799 } else {
800 Err(Error::new(
801 ErrorKind::DataInvalid,
802 format!("Fixed type must be exactly {expected} bytes, got {actual}"),
803 ))
804 }
805}