iceberg/spec/values/
map.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Map collection type for Iceberg
19
20use std::collections::HashMap;
21use std::hash::Hash;
22
23use super::Literal;
24
25/// Map is a collection of key-value pairs with a key type and a value type.
26/// It used in Literal::Map, to make it hashable, the order of key-value pairs is stored in a separate vector
27/// so that we can hash the map in a deterministic way. But it also means that the order of key-value pairs is matter
28/// for the hash value.
29///
30/// When converting to Arrow (e.g., for Iceberg), the map should be represented using
31/// the default field name "key_value".
32///
33/// Example:
34///
35/// ```text
36/// let key_value_field = Field::new(
37///     DEFAULT_MAP_FIELD_NAME,
38///     arrow_schema::DataType::Struct(vec![
39///         Arc::new(key_field.clone()),
40///         Arc::new(value_field.clone()),
41///     ].into()),
42///     false
43/// );
44/// '''
45#[derive(Clone, Debug, PartialEq, Eq)]
46pub struct Map {
47    index: HashMap<Literal, usize>,
48    pair: Vec<(Literal, Option<Literal>)>,
49}
50
51impl Map {
52    /// Creates a new empty map.
53    pub fn new() -> Self {
54        Self {
55            index: HashMap::new(),
56            pair: Vec::new(),
57        }
58    }
59
60    /// Return the number of key-value pairs in the map.
61    pub fn len(&self) -> usize {
62        self.pair.len()
63    }
64
65    /// Returns true if the map contains no elements.
66    pub fn is_empty(&self) -> bool {
67        self.pair.is_empty()
68    }
69
70    /// Inserts a key-value pair into the map.
71    /// If the map did not have this key present, None is returned.
72    /// If the map did have this key present, the value is updated, and the old value is returned.
73    pub fn insert(&mut self, key: Literal, value: Option<Literal>) -> Option<Option<Literal>> {
74        if let Some(index) = self.index.get(&key) {
75            let old_value = std::mem::replace(&mut self.pair[*index].1, value);
76            Some(old_value)
77        } else {
78            self.pair.push((key.clone(), value));
79            self.index.insert(key, self.pair.len() - 1);
80            None
81        }
82    }
83
84    /// Returns a reference to the value corresponding to the key.
85    /// If the key is not present in the map, None is returned.
86    pub fn get(&self, key: &Literal) -> Option<&Option<Literal>> {
87        self.index.get(key).map(|index| &self.pair[*index].1)
88    }
89
90    /// The order of map is matter, so this method used to compare two maps has same key-value pairs without considering the order.
91    pub fn has_same_content(&self, other: &Map) -> bool {
92        if self.len() != other.len() {
93            return false;
94        }
95
96        for (key, value) in &self.pair {
97            match other.get(key) {
98                Some(other_value) if value == other_value => (),
99                _ => return false,
100            }
101        }
102
103        true
104    }
105}
106
107impl Default for Map {
108    fn default() -> Self {
109        Self::new()
110    }
111}
112
113impl Hash for Map {
114    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
115        for (key, value) in &self.pair {
116            key.hash(state);
117            value.hash(state);
118        }
119    }
120}
121
122impl FromIterator<(Literal, Option<Literal>)> for Map {
123    fn from_iter<T: IntoIterator<Item = (Literal, Option<Literal>)>>(iter: T) -> Self {
124        let mut map = Map::new();
125        for (key, value) in iter {
126            map.insert(key, value);
127        }
128        map
129    }
130}
131
132impl IntoIterator for Map {
133    type Item = (Literal, Option<Literal>);
134    type IntoIter = std::vec::IntoIter<Self::Item>;
135
136    fn into_iter(self) -> Self::IntoIter {
137        self.pair.into_iter()
138    }
139}
140
141impl<const N: usize> From<[(Literal, Option<Literal>); N]> for Map {
142    fn from(value: [(Literal, Option<Literal>); N]) -> Self {
143        value.iter().cloned().collect()
144    }
145}