iceberg/catalog/
metadata_location.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::fmt::Display;
20use std::str::FromStr;
21
22use uuid::Uuid;
23
24use crate::compression::CompressionCodec;
25use crate::spec::{TableMetadata, parse_metadata_file_compression};
26use crate::{Error, ErrorKind, Result};
27
28/// Helper for parsing a location of the format: `<location>/metadata/<version>-<uuid>.metadata.json`
29/// or with compression: `<location>/metadata/<version>-<uuid>.gz.metadata.json`
30#[derive(Clone, Debug, PartialEq)]
31pub struct MetadataLocation {
32    table_location: String,
33    version: i32,
34    id: Uuid,
35    compression_codec: CompressionCodec,
36}
37
38impl MetadataLocation {
39    /// Determines the compression codec from table properties.
40    /// Parse errors result in CompressionCodec::None.
41    fn compression_from_properties(properties: &HashMap<String, String>) -> CompressionCodec {
42        parse_metadata_file_compression(properties).unwrap_or(CompressionCodec::None)
43    }
44
45    /// Creates a completely new metadata location starting at version 0.
46    /// Only used for creating a new table. For updates, see `next_version`.
47    #[deprecated(
48        since = "0.8.0",
49        note = "Use new_with_metadata instead to properly handle compression settings"
50    )]
51    pub fn new_with_table_location(table_location: impl ToString) -> Self {
52        Self {
53            table_location: table_location.to_string(),
54            version: 0,
55            id: Uuid::new_v4(),
56            compression_codec: CompressionCodec::None,
57        }
58    }
59
60    /// Creates a completely new metadata location starting at version 0,
61    /// with compression settings from the table metadata.
62    /// Only used for creating a new table. For updates, see `next_version`.
63    pub fn new_with_metadata(table_location: impl ToString, metadata: &TableMetadata) -> Self {
64        Self {
65            table_location: table_location.to_string(),
66            version: 0,
67            id: Uuid::new_v4(),
68            compression_codec: Self::compression_from_properties(metadata.properties()),
69        }
70    }
71
72    /// Creates a new metadata location for an updated metadata file.
73    /// Increments the version number and generates a new UUID.
74    pub fn with_next_version(&self) -> Self {
75        Self {
76            table_location: self.table_location.clone(),
77            version: self.version + 1,
78            id: Uuid::new_v4(),
79            compression_codec: self.compression_codec,
80        }
81    }
82
83    /// Updates the metadata location with compression settings from the new metadata.
84    pub fn with_new_metadata(&self, new_metadata: &TableMetadata) -> Self {
85        Self {
86            table_location: self.table_location.clone(),
87            version: self.version,
88            id: self.id,
89            compression_codec: Self::compression_from_properties(new_metadata.properties()),
90        }
91    }
92
93    /// Returns the compression codec used for this metadata location.
94    pub fn compression_codec(&self) -> CompressionCodec {
95        self.compression_codec
96    }
97
98    fn parse_metadata_path_prefix(path: &str) -> Result<String> {
99        let prefix = path.strip_suffix("/metadata").ok_or(Error::new(
100            ErrorKind::Unexpected,
101            format!("Metadata location not under \"/metadata\" subdirectory: {path}"),
102        ))?;
103
104        Ok(prefix.to_string())
105    }
106
107    /// Parses a file name of the format `<version>-<uuid>.metadata.json`
108    /// or with compression: `<version>-<uuid>.gz.metadata.json`.
109    /// Parse errors for compression codec result in CompressionCodec::None.
110    fn parse_file_name(file_name: &str) -> Result<(i32, Uuid, CompressionCodec)> {
111        let stripped = file_name.strip_suffix(".metadata.json").ok_or(Error::new(
112            ErrorKind::Unexpected,
113            format!("Invalid metadata file ending: {file_name}"),
114        ))?;
115
116        // Check for compression suffix (e.g., .gz)
117        let gzip_suffix = CompressionCodec::Gzip.suffix()?;
118        let (stripped, compression_codec) = if let Some(s) = stripped.strip_suffix(gzip_suffix) {
119            (s, CompressionCodec::Gzip)
120        } else {
121            (stripped, CompressionCodec::None)
122        };
123
124        let (version, id) = stripped.split_once('-').ok_or(Error::new(
125            ErrorKind::Unexpected,
126            format!("Invalid metadata file name format: {file_name}"),
127        ))?;
128
129        Ok((
130            version.parse::<i32>()?,
131            Uuid::parse_str(id)?,
132            compression_codec,
133        ))
134    }
135}
136
137impl Display for MetadataLocation {
138    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139        let suffix = self.compression_codec.suffix().unwrap_or("");
140        write!(
141            f,
142            "{}/metadata/{:0>5}-{}{}.metadata.json",
143            self.table_location, self.version, self.id, suffix
144        )
145    }
146}
147
148impl FromStr for MetadataLocation {
149    type Err = Error;
150
151    fn from_str(s: &str) -> Result<Self> {
152        let (path, file_name) = s.rsplit_once('/').ok_or(Error::new(
153            ErrorKind::Unexpected,
154            format!("Invalid metadata location: {s}"),
155        ))?;
156
157        let prefix = Self::parse_metadata_path_prefix(path)?;
158        let (version, id, compression_codec) = Self::parse_file_name(file_name)?;
159
160        Ok(MetadataLocation {
161            table_location: prefix,
162            version,
163            id,
164            compression_codec,
165        })
166    }
167}
168
169#[cfg(test)]
170mod test {
171    use std::collections::HashMap;
172    use std::str::FromStr;
173
174    use uuid::Uuid;
175
176    use crate::compression::CompressionCodec;
177    use crate::spec::{Schema, TableMetadata, TableMetadataBuilder};
178    use crate::{MetadataLocation, TableCreation};
179
180    fn create_test_metadata(properties: HashMap<String, String>) -> TableMetadata {
181        let table_creation = TableCreation::builder()
182            .name("test_table".to_string())
183            .location("/test/table".to_string())
184            .schema(Schema::builder().build().unwrap())
185            .properties(properties)
186            .build();
187        TableMetadataBuilder::from_table_creation(table_creation)
188            .unwrap()
189            .build()
190            .unwrap()
191            .metadata
192    }
193
194    #[test]
195    fn test_metadata_location_from_string() {
196        let test_cases = vec![
197            // No prefix
198            (
199                "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
200                Ok(MetadataLocation {
201                    table_location: "".to_string(),
202                    version: 1234567,
203                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
204                    compression_codec: CompressionCodec::None,
205                }),
206            ),
207            // Some prefix
208            (
209                "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
210                Ok(MetadataLocation {
211                    table_location: "/abc".to_string(),
212                    version: 1234567,
213                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
214                    compression_codec: CompressionCodec::None,
215                }),
216            ),
217            // Longer prefix
218            (
219                "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
220                Ok(MetadataLocation {
221                    table_location: "/abc/def".to_string(),
222                    version: 1234567,
223                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
224                    compression_codec: CompressionCodec::None,
225                }),
226            ),
227            // Prefix with special characters
228            (
229                "https://127.0.0.1/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
230                Ok(MetadataLocation {
231                    table_location: "https://127.0.0.1".to_string(),
232                    version: 1234567,
233                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
234                    compression_codec: CompressionCodec::None,
235                }),
236            ),
237            // Another id
238            (
239                "/abc/metadata/1234567-81056704-ce5b-41c4-bb83-eb6408081af6.metadata.json",
240                Ok(MetadataLocation {
241                    table_location: "/abc".to_string(),
242                    version: 1234567,
243                    id: Uuid::from_str("81056704-ce5b-41c4-bb83-eb6408081af6").unwrap(),
244                    compression_codec: CompressionCodec::None,
245                }),
246            ),
247            // Version 0
248            (
249                "/abc/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
250                Ok(MetadataLocation {
251                    table_location: "/abc".to_string(),
252                    version: 0,
253                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
254                    compression_codec: CompressionCodec::None,
255                }),
256            ),
257            // With gzip compression
258            (
259                "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json",
260                Ok(MetadataLocation {
261                    table_location: "/abc".to_string(),
262                    version: 1234567,
263                    id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
264                    compression_codec: CompressionCodec::Gzip,
265                }),
266            ),
267            // Negative version
268            (
269                "/metadata/-123-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
270                Err("".to_string()),
271            ),
272            // Invalid uuid
273            (
274                "/metadata/1234567-no-valid-id.metadata.json",
275                Err("".to_string()),
276            ),
277            // Non-numeric version
278            (
279                "/metadata/noversion-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
280                Err("".to_string()),
281            ),
282            // No /metadata subdirectory
283            (
284                "/wrongsubdir/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
285                Err("".to_string()),
286            ),
287            // No .metadata.json suffix
288            (
289                "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata",
290                Err("".to_string()),
291            ),
292            (
293                "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.wrong.file",
294                Err("".to_string()),
295            ),
296        ];
297
298        for (input, expected) in test_cases {
299            match MetadataLocation::from_str(input) {
300                Ok(metadata_location) => {
301                    assert!(expected.is_ok());
302                    assert_eq!(metadata_location, expected.unwrap());
303                }
304                Err(_) => assert!(expected.is_err()),
305            }
306        }
307    }
308
309    #[test]
310    fn test_metadata_location_with_next_version() {
311        let metadata = create_test_metadata(HashMap::new());
312        let test_cases = vec![
313            MetadataLocation::new_with_metadata("/abc", &metadata),
314            MetadataLocation::from_str(
315                "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
316            )
317            .unwrap(),
318        ];
319
320        for input in test_cases {
321            let next = MetadataLocation::from_str(&input.to_string())
322                .unwrap()
323                .with_next_version();
324            assert_eq!(next.table_location, input.table_location);
325            assert_eq!(next.version, input.version + 1);
326            assert_ne!(next.id, input.id);
327        }
328    }
329
330    #[test]
331    fn test_with_next_version_preserves_compression() {
332        // Start from a parsed location with no compression
333        let location_none = MetadataLocation::from_str(
334            "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
335        )
336        .unwrap();
337        assert_eq!(location_none.compression_codec, CompressionCodec::None);
338
339        let next_none = location_none.with_next_version();
340        assert_eq!(next_none.compression_codec, CompressionCodec::None);
341        assert_eq!(next_none.version, 1);
342
343        // Start from a parsed location with gzip compression
344        let location_gzip = MetadataLocation::from_str(
345            "/test/table/metadata/00005-81056704-ce5b-41c4-bb83-eb6408081af6.gz.metadata.json",
346        )
347        .unwrap();
348        assert_eq!(location_gzip.compression_codec, CompressionCodec::Gzip);
349
350        let next_gzip = location_gzip.with_next_version();
351        assert_eq!(next_gzip.compression_codec, CompressionCodec::Gzip);
352        assert_eq!(next_gzip.version, 6);
353    }
354
355    #[test]
356    fn test_with_new_metadata_updates_compression() {
357        // Start from a parsed location with no compression
358        let location = MetadataLocation::from_str(
359            "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
360        )
361        .unwrap();
362        assert_eq!(location.compression_codec, CompressionCodec::None);
363
364        // Update to gzip compression
365        let mut props_gzip = HashMap::new();
366        props_gzip.insert(
367            "write.metadata.compression-codec".to_string(),
368            "gzip".to_string(),
369        );
370        let metadata_gzip = create_test_metadata(props_gzip);
371        let updated_gzip = location.with_new_metadata(&metadata_gzip);
372        assert_eq!(updated_gzip.compression_codec, CompressionCodec::Gzip);
373        assert_eq!(updated_gzip.version, 0);
374        assert_eq!(
375            updated_gzip.to_string(),
376            "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json"
377        );
378
379        // Update back to no compression
380        let props_none = HashMap::new();
381        let metadata_none = create_test_metadata(props_none);
382        let updated_none = updated_gzip.with_new_metadata(&metadata_none);
383        assert_eq!(updated_none.compression_codec, CompressionCodec::None);
384        assert_eq!(updated_none.version, 0);
385        assert_eq!(
386            updated_none.to_string(),
387            "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json"
388        );
389
390        // Test explicit "none" codec
391        let mut props_explicit_none = HashMap::new();
392        props_explicit_none.insert(
393            "write.metadata.compression-codec".to_string(),
394            "none".to_string(),
395        );
396        let metadata_explicit_none = create_test_metadata(props_explicit_none);
397        let updated_explicit = updated_gzip.with_new_metadata(&metadata_explicit_none);
398        assert_eq!(updated_explicit.compression_codec, CompressionCodec::None);
399        assert_eq!(
400            updated_explicit.to_string(),
401            "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json"
402        );
403    }
404}