iceberg/spec/
table_properties.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19
20// Helper function to parse a property from a HashMap
21// If the property is not found, use the default value
22fn parse_property<T: std::str::FromStr>(
23    properties: &HashMap<String, String>,
24    key: &str,
25    default: T,
26) -> Result<T, anyhow::Error>
27where
28    <T as std::str::FromStr>::Err: std::fmt::Display,
29{
30    properties.get(key).map_or(Ok(default), |value| {
31        value
32            .parse::<T>()
33            .map_err(|e| anyhow::anyhow!("Invalid value for {key}: {e}"))
34    })
35}
36
37/// TableProperties that contains the properties of a table.
38#[derive(Debug)]
39pub struct TableProperties {
40    /// The number of times to retry a commit.
41    pub commit_num_retries: usize,
42    /// The minimum wait time between retries.
43    pub commit_min_retry_wait_ms: u64,
44    /// The maximum wait time between retries.
45    pub commit_max_retry_wait_ms: u64,
46    /// The total timeout for commit retries.
47    pub commit_total_retry_timeout_ms: u64,
48    /// The default format for files.
49    pub write_format_default: String,
50    /// The target file size for files.
51    pub write_target_file_size_bytes: usize,
52}
53
54impl TableProperties {
55    /// Reserved table property for table format version.
56    ///
57    /// Iceberg will default a new table's format version to the latest stable and recommended
58    /// version. This reserved property keyword allows users to override the Iceberg format version of
59    /// the table metadata.
60    ///
61    /// If this table property exists when creating a table, the table will use the specified format
62    /// version. If a table updates this property, it will try to upgrade to the specified format
63    /// version.
64    pub const PROPERTY_FORMAT_VERSION: &str = "format-version";
65    /// Reserved table property for table UUID.
66    pub const PROPERTY_UUID: &str = "uuid";
67    /// Reserved table property for the total number of snapshots.
68    pub const PROPERTY_SNAPSHOT_COUNT: &str = "snapshot-count";
69    /// Reserved table property for current snapshot summary.
70    pub const PROPERTY_CURRENT_SNAPSHOT_SUMMARY: &str = "current-snapshot-summary";
71    /// Reserved table property for current snapshot id.
72    pub const PROPERTY_CURRENT_SNAPSHOT_ID: &str = "current-snapshot-id";
73    /// Reserved table property for current snapshot timestamp.
74    pub const PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP: &str = "current-snapshot-timestamp-ms";
75    /// Reserved table property for the JSON representation of current schema.
76    pub const PROPERTY_CURRENT_SCHEMA: &str = "current-schema";
77    /// Reserved table property for the JSON representation of current(default) partition spec.
78    pub const PROPERTY_DEFAULT_PARTITION_SPEC: &str = "default-partition-spec";
79    /// Reserved table property for the JSON representation of current(default) sort order.
80    pub const PROPERTY_DEFAULT_SORT_ORDER: &str = "default-sort-order";
81
82    /// Property key for max number of previous versions to keep.
83    pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX: &str =
84        "write.metadata.previous-versions-max";
85    /// Default value for max number of previous versions to keep.
86    pub const PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT: usize = 100;
87
88    /// Property key for max number of partitions to keep summary stats for.
89    pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT: &str = "write.summary.partition-limit";
90    /// Default value for the max number of partitions to keep summary stats for.
91    pub const PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT: u64 = 0;
92
93    /// Reserved Iceberg table properties list.
94    ///
95    /// Reserved table properties are only used to control behaviors when creating or updating a
96    /// table. The value of these properties are not persisted as a part of the table metadata.
97    pub const RESERVED_PROPERTIES: [&str; 9] = [
98        Self::PROPERTY_FORMAT_VERSION,
99        Self::PROPERTY_UUID,
100        Self::PROPERTY_SNAPSHOT_COUNT,
101        Self::PROPERTY_CURRENT_SNAPSHOT_ID,
102        Self::PROPERTY_CURRENT_SNAPSHOT_SUMMARY,
103        Self::PROPERTY_CURRENT_SNAPSHOT_TIMESTAMP,
104        Self::PROPERTY_CURRENT_SCHEMA,
105        Self::PROPERTY_DEFAULT_PARTITION_SPEC,
106        Self::PROPERTY_DEFAULT_SORT_ORDER,
107    ];
108
109    /// Property key for number of commit retries.
110    pub const PROPERTY_COMMIT_NUM_RETRIES: &str = "commit.retry.num-retries";
111    /// Default value for number of commit retries.
112    pub const PROPERTY_COMMIT_NUM_RETRIES_DEFAULT: usize = 4;
113
114    /// Property key for minimum wait time (ms) between retries.
115    pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS: &str = "commit.retry.min-wait-ms";
116    /// Default value for minimum wait time (ms) between retries.
117    pub const PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT: u64 = 100;
118
119    /// Property key for maximum wait time (ms) between retries.
120    pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS: &str = "commit.retry.max-wait-ms";
121    /// Default value for maximum wait time (ms) between retries.
122    pub const PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT: u64 = 60 * 1000; // 1 minute
123
124    /// Property key for total maximum retry time (ms).
125    pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS: &str = "commit.retry.total-timeout-ms";
126    /// Default value for total maximum retry time (ms).
127    pub const PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT: u64 = 30 * 60 * 1000; // 30 minutes
128
129    /// Default file format for data files
130    pub const PROPERTY_DEFAULT_FILE_FORMAT: &str = "write.format.default";
131    /// Default file format for delete files
132    pub const PROPERTY_DELETE_DEFAULT_FILE_FORMAT: &str = "write.delete.format.default";
133    /// Default value for data file format
134    pub const PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT: &str = "parquet";
135
136    /// Target file size for newly written files.
137    pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES: &str = "write.target-file-size-bytes";
138    /// Default target file size
139    pub const PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT: usize = 512 * 1024 * 1024; // 512 MB
140}
141
142impl TryFrom<&HashMap<String, String>> for TableProperties {
143    // parse by entry key or use default value
144    type Error = anyhow::Error;
145
146    fn try_from(props: &HashMap<String, String>) -> Result<Self, Self::Error> {
147        Ok(TableProperties {
148            commit_num_retries: parse_property(
149                props,
150                TableProperties::PROPERTY_COMMIT_NUM_RETRIES,
151                TableProperties::PROPERTY_COMMIT_NUM_RETRIES_DEFAULT,
152            )?,
153            commit_min_retry_wait_ms: parse_property(
154                props,
155                TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS,
156                TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT,
157            )?,
158            commit_max_retry_wait_ms: parse_property(
159                props,
160                TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS,
161                TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT,
162            )?,
163            commit_total_retry_timeout_ms: parse_property(
164                props,
165                TableProperties::PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS,
166                TableProperties::PROPERTY_COMMIT_TOTAL_RETRY_TIME_MS_DEFAULT,
167            )?,
168            write_format_default: parse_property(
169                props,
170                TableProperties::PROPERTY_DEFAULT_FILE_FORMAT,
171                TableProperties::PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string(),
172            )?,
173            write_target_file_size_bytes: parse_property(
174                props,
175                TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES,
176                TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
177            )?,
178        })
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn test_table_properties_default() {
188        let props = HashMap::new();
189        let table_properties = TableProperties::try_from(&props).unwrap();
190        assert_eq!(
191            table_properties.commit_num_retries,
192            TableProperties::PROPERTY_COMMIT_NUM_RETRIES_DEFAULT
193        );
194        assert_eq!(
195            table_properties.commit_min_retry_wait_ms,
196            TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS_DEFAULT
197        );
198        assert_eq!(
199            table_properties.commit_max_retry_wait_ms,
200            TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS_DEFAULT
201        );
202        assert_eq!(
203            table_properties.write_format_default,
204            TableProperties::PROPERTY_DEFAULT_FILE_FORMAT_DEFAULT.to_string()
205        );
206        assert_eq!(
207            table_properties.write_target_file_size_bytes,
208            TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT
209        );
210    }
211
212    #[test]
213    fn test_table_properties_valid() {
214        let props = HashMap::from([
215            (
216                TableProperties::PROPERTY_COMMIT_NUM_RETRIES.to_string(),
217                "10".to_string(),
218            ),
219            (
220                TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS.to_string(),
221                "20".to_string(),
222            ),
223            (
224                TableProperties::PROPERTY_DEFAULT_FILE_FORMAT.to_string(),
225                "avro".to_string(),
226            ),
227            (
228                TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES.to_string(),
229                "512".to_string(),
230            ),
231        ]);
232        let table_properties = TableProperties::try_from(&props).unwrap();
233        assert_eq!(table_properties.commit_num_retries, 10);
234        assert_eq!(table_properties.commit_max_retry_wait_ms, 20);
235        assert_eq!(table_properties.write_format_default, "avro".to_string());
236        assert_eq!(table_properties.write_target_file_size_bytes, 512);
237    }
238
239    #[test]
240    fn test_table_properties_invalid() {
241        let invalid_retries = HashMap::from([(
242            TableProperties::PROPERTY_COMMIT_NUM_RETRIES.to_string(),
243            "abc".to_string(),
244        )]);
245
246        let table_properties = TableProperties::try_from(&invalid_retries).unwrap_err();
247        assert!(
248            table_properties.to_string().contains(
249                "Invalid value for commit.retry.num-retries: invalid digit found in string"
250            )
251        );
252
253        let invalid_min_wait = HashMap::from([(
254            TableProperties::PROPERTY_COMMIT_MIN_RETRY_WAIT_MS.to_string(),
255            "abc".to_string(),
256        )]);
257        let table_properties = TableProperties::try_from(&invalid_min_wait).unwrap_err();
258        assert!(
259            table_properties.to_string().contains(
260                "Invalid value for commit.retry.min-wait-ms: invalid digit found in string"
261            )
262        );
263
264        let invalid_max_wait = HashMap::from([(
265            TableProperties::PROPERTY_COMMIT_MAX_RETRY_WAIT_MS.to_string(),
266            "abc".to_string(),
267        )]);
268        let table_properties = TableProperties::try_from(&invalid_max_wait).unwrap_err();
269        assert!(
270            table_properties.to_string().contains(
271                "Invalid value for commit.retry.max-wait-ms: invalid digit found in string"
272            )
273        );
274
275        let invalid_target_size = HashMap::from([(
276            TableProperties::PROPERTY_WRITE_TARGET_FILE_SIZE_BYTES.to_string(),
277            "abc".to_string(),
278        )]);
279        let table_properties = TableProperties::try_from(&invalid_target_size).unwrap_err();
280        assert!(table_properties.to_string().contains(
281            "Invalid value for write.target-file-size-bytes: invalid digit found in string"
282        ));
283    }
284}