iceberg/spec/values/map.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Map collection type for Iceberg
19
20use std::collections::HashMap;
21use std::hash::Hash;
22
23use super::Literal;
24
25/// Map is a collection of key-value pairs with a key type and a value type.
26/// It used in Literal::Map, to make it hashable, the order of key-value pairs is stored in a separate vector
27/// so that we can hash the map in a deterministic way. But it also means that the order of key-value pairs is matter
28/// for the hash value.
29///
30/// When converting to Arrow (e.g., for Iceberg), the map should be represented using
31/// the default field name "key_value".
32///
33/// Example:
34///
35/// ```text
36/// let key_value_field = Field::new(
37/// DEFAULT_MAP_FIELD_NAME,
38/// arrow_schema::DataType::Struct(vec![
39/// Arc::new(key_field.clone()),
40/// Arc::new(value_field.clone()),
41/// ].into()),
42/// false
43/// );
44/// '''
45#[derive(Clone, Debug, PartialEq, Eq)]
46pub struct Map {
47 index: HashMap<Literal, usize>,
48 pair: Vec<(Literal, Option<Literal>)>,
49}
50
51impl Map {
52 /// Creates a new empty map.
53 pub fn new() -> Self {
54 Self {
55 index: HashMap::new(),
56 pair: Vec::new(),
57 }
58 }
59
60 /// Return the number of key-value pairs in the map.
61 pub fn len(&self) -> usize {
62 self.pair.len()
63 }
64
65 /// Returns true if the map contains no elements.
66 pub fn is_empty(&self) -> bool {
67 self.pair.is_empty()
68 }
69
70 /// Inserts a key-value pair into the map.
71 /// If the map did not have this key present, None is returned.
72 /// If the map did have this key present, the value is updated, and the old value is returned.
73 pub fn insert(&mut self, key: Literal, value: Option<Literal>) -> Option<Option<Literal>> {
74 if let Some(index) = self.index.get(&key) {
75 let old_value = std::mem::replace(&mut self.pair[*index].1, value);
76 Some(old_value)
77 } else {
78 self.pair.push((key.clone(), value));
79 self.index.insert(key, self.pair.len() - 1);
80 None
81 }
82 }
83
84 /// Returns a reference to the value corresponding to the key.
85 /// If the key is not present in the map, None is returned.
86 pub fn get(&self, key: &Literal) -> Option<&Option<Literal>> {
87 self.index.get(key).map(|index| &self.pair[*index].1)
88 }
89
90 /// The order of map is matter, so this method used to compare two maps has same key-value pairs without considering the order.
91 pub fn has_same_content(&self, other: &Map) -> bool {
92 if self.len() != other.len() {
93 return false;
94 }
95
96 for (key, value) in &self.pair {
97 match other.get(key) {
98 Some(other_value) if value == other_value => (),
99 _ => return false,
100 }
101 }
102
103 true
104 }
105}
106
107impl Default for Map {
108 fn default() -> Self {
109 Self::new()
110 }
111}
112
113impl Hash for Map {
114 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
115 for (key, value) in &self.pair {
116 key.hash(state);
117 value.hash(state);
118 }
119 }
120}
121
122impl FromIterator<(Literal, Option<Literal>)> for Map {
123 fn from_iter<T: IntoIterator<Item = (Literal, Option<Literal>)>>(iter: T) -> Self {
124 let mut map = Map::new();
125 for (key, value) in iter {
126 map.insert(key, value);
127 }
128 map
129 }
130}
131
132impl IntoIterator for Map {
133 type Item = (Literal, Option<Literal>);
134 type IntoIter = std::vec::IntoIter<Self::Item>;
135
136 fn into_iter(self) -> Self::IntoIter {
137 self.pair.into_iter()
138 }
139}
140
141impl<const N: usize> From<[(Literal, Option<Literal>); N]> for Map {
142 fn from(value: [(Literal, Option<Literal>); N]) -> Self {
143 value.iter().cloned().collect()
144 }
145}