1use std::collections::HashMap;
19use std::fmt::Display;
20use std::str::FromStr;
21
22use uuid::Uuid;
23
24use crate::compression::CompressionCodec;
25use crate::spec::{TableMetadata, parse_metadata_file_compression};
26use crate::{Error, ErrorKind, Result};
27
28#[derive(Clone, Debug, PartialEq)]
31pub struct MetadataLocation {
32 table_location: String,
33 version: i32,
34 id: Uuid,
35 compression_codec: CompressionCodec,
36}
37
38impl MetadataLocation {
39 fn compression_from_properties(properties: &HashMap<String, String>) -> CompressionCodec {
42 parse_metadata_file_compression(properties).unwrap_or(CompressionCodec::None)
43 }
44
45 #[deprecated(
48 since = "0.8.0",
49 note = "Use new_with_metadata instead to properly handle compression settings"
50 )]
51 pub fn new_with_table_location(table_location: impl ToString) -> Self {
52 Self {
53 table_location: table_location.to_string(),
54 version: 0,
55 id: Uuid::new_v4(),
56 compression_codec: CompressionCodec::None,
57 }
58 }
59
60 pub fn new_with_metadata(table_location: impl ToString, metadata: &TableMetadata) -> Self {
64 Self {
65 table_location: table_location.to_string(),
66 version: 0,
67 id: Uuid::new_v4(),
68 compression_codec: Self::compression_from_properties(metadata.properties()),
69 }
70 }
71
72 pub fn with_next_version(&self) -> Self {
75 Self {
76 table_location: self.table_location.clone(),
77 version: self.version + 1,
78 id: Uuid::new_v4(),
79 compression_codec: self.compression_codec,
80 }
81 }
82
83 pub fn with_new_metadata(&self, new_metadata: &TableMetadata) -> Self {
85 Self {
86 table_location: self.table_location.clone(),
87 version: self.version,
88 id: self.id,
89 compression_codec: Self::compression_from_properties(new_metadata.properties()),
90 }
91 }
92
93 pub fn compression_codec(&self) -> CompressionCodec {
95 self.compression_codec
96 }
97
98 fn parse_metadata_path_prefix(path: &str) -> Result<String> {
99 let prefix = path.strip_suffix("/metadata").ok_or(Error::new(
100 ErrorKind::Unexpected,
101 format!("Metadata location not under \"/metadata\" subdirectory: {path}"),
102 ))?;
103
104 Ok(prefix.to_string())
105 }
106
107 fn parse_file_name(file_name: &str) -> Result<(i32, Uuid, CompressionCodec)> {
111 let stripped = file_name.strip_suffix(".metadata.json").ok_or(Error::new(
112 ErrorKind::Unexpected,
113 format!("Invalid metadata file ending: {file_name}"),
114 ))?;
115
116 let gzip_suffix = CompressionCodec::Gzip.suffix()?;
118 let (stripped, compression_codec) = if let Some(s) = stripped.strip_suffix(gzip_suffix) {
119 (s, CompressionCodec::Gzip)
120 } else {
121 (stripped, CompressionCodec::None)
122 };
123
124 let (version, id) = stripped.split_once('-').ok_or(Error::new(
125 ErrorKind::Unexpected,
126 format!("Invalid metadata file name format: {file_name}"),
127 ))?;
128
129 Ok((
130 version.parse::<i32>()?,
131 Uuid::parse_str(id)?,
132 compression_codec,
133 ))
134 }
135}
136
137impl Display for MetadataLocation {
138 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
139 let suffix = self.compression_codec.suffix().unwrap_or("");
140 write!(
141 f,
142 "{}/metadata/{:0>5}-{}{}.metadata.json",
143 self.table_location, self.version, self.id, suffix
144 )
145 }
146}
147
148impl FromStr for MetadataLocation {
149 type Err = Error;
150
151 fn from_str(s: &str) -> Result<Self> {
152 let (path, file_name) = s.rsplit_once('/').ok_or(Error::new(
153 ErrorKind::Unexpected,
154 format!("Invalid metadata location: {s}"),
155 ))?;
156
157 let prefix = Self::parse_metadata_path_prefix(path)?;
158 let (version, id, compression_codec) = Self::parse_file_name(file_name)?;
159
160 Ok(MetadataLocation {
161 table_location: prefix,
162 version,
163 id,
164 compression_codec,
165 })
166 }
167}
168
169#[cfg(test)]
170mod test {
171 use std::collections::HashMap;
172 use std::str::FromStr;
173
174 use uuid::Uuid;
175
176 use crate::compression::CompressionCodec;
177 use crate::spec::{Schema, TableMetadata, TableMetadataBuilder};
178 use crate::{MetadataLocation, TableCreation};
179
180 fn create_test_metadata(properties: HashMap<String, String>) -> TableMetadata {
181 let table_creation = TableCreation::builder()
182 .name("test_table".to_string())
183 .location("/test/table".to_string())
184 .schema(Schema::builder().build().unwrap())
185 .properties(properties)
186 .build();
187 TableMetadataBuilder::from_table_creation(table_creation)
188 .unwrap()
189 .build()
190 .unwrap()
191 .metadata
192 }
193
194 #[test]
195 fn test_metadata_location_from_string() {
196 let test_cases = vec![
197 (
199 "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
200 Ok(MetadataLocation {
201 table_location: "".to_string(),
202 version: 1234567,
203 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
204 compression_codec: CompressionCodec::None,
205 }),
206 ),
207 (
209 "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
210 Ok(MetadataLocation {
211 table_location: "/abc".to_string(),
212 version: 1234567,
213 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
214 compression_codec: CompressionCodec::None,
215 }),
216 ),
217 (
219 "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
220 Ok(MetadataLocation {
221 table_location: "/abc/def".to_string(),
222 version: 1234567,
223 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
224 compression_codec: CompressionCodec::None,
225 }),
226 ),
227 (
229 "https://127.0.0.1/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
230 Ok(MetadataLocation {
231 table_location: "https://127.0.0.1".to_string(),
232 version: 1234567,
233 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
234 compression_codec: CompressionCodec::None,
235 }),
236 ),
237 (
239 "/abc/metadata/1234567-81056704-ce5b-41c4-bb83-eb6408081af6.metadata.json",
240 Ok(MetadataLocation {
241 table_location: "/abc".to_string(),
242 version: 1234567,
243 id: Uuid::from_str("81056704-ce5b-41c4-bb83-eb6408081af6").unwrap(),
244 compression_codec: CompressionCodec::None,
245 }),
246 ),
247 (
249 "/abc/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
250 Ok(MetadataLocation {
251 table_location: "/abc".to_string(),
252 version: 0,
253 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
254 compression_codec: CompressionCodec::None,
255 }),
256 ),
257 (
259 "/abc/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json",
260 Ok(MetadataLocation {
261 table_location: "/abc".to_string(),
262 version: 1234567,
263 id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
264 compression_codec: CompressionCodec::Gzip,
265 }),
266 ),
267 (
269 "/metadata/-123-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
270 Err("".to_string()),
271 ),
272 (
274 "/metadata/1234567-no-valid-id.metadata.json",
275 Err("".to_string()),
276 ),
277 (
279 "/metadata/noversion-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
280 Err("".to_string()),
281 ),
282 (
284 "/wrongsubdir/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
285 Err("".to_string()),
286 ),
287 (
289 "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata",
290 Err("".to_string()),
291 ),
292 (
293 "/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.wrong.file",
294 Err("".to_string()),
295 ),
296 ];
297
298 for (input, expected) in test_cases {
299 match MetadataLocation::from_str(input) {
300 Ok(metadata_location) => {
301 assert!(expected.is_ok());
302 assert_eq!(metadata_location, expected.unwrap());
303 }
304 Err(_) => assert!(expected.is_err()),
305 }
306 }
307 }
308
309 #[test]
310 fn test_metadata_location_with_next_version() {
311 let metadata = create_test_metadata(HashMap::new());
312 let test_cases = vec![
313 MetadataLocation::new_with_metadata("/abc", &metadata),
314 MetadataLocation::from_str(
315 "/abc/def/metadata/1234567-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
316 )
317 .unwrap(),
318 ];
319
320 for input in test_cases {
321 let next = MetadataLocation::from_str(&input.to_string())
322 .unwrap()
323 .with_next_version();
324 assert_eq!(next.table_location, input.table_location);
325 assert_eq!(next.version, input.version + 1);
326 assert_ne!(next.id, input.id);
327 }
328 }
329
330 #[test]
331 fn test_with_next_version_preserves_compression() {
332 let location_none = MetadataLocation::from_str(
334 "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
335 )
336 .unwrap();
337 assert_eq!(location_none.compression_codec, CompressionCodec::None);
338
339 let next_none = location_none.with_next_version();
340 assert_eq!(next_none.compression_codec, CompressionCodec::None);
341 assert_eq!(next_none.version, 1);
342
343 let location_gzip = MetadataLocation::from_str(
345 "/test/table/metadata/00005-81056704-ce5b-41c4-bb83-eb6408081af6.gz.metadata.json",
346 )
347 .unwrap();
348 assert_eq!(location_gzip.compression_codec, CompressionCodec::Gzip);
349
350 let next_gzip = location_gzip.with_next_version();
351 assert_eq!(next_gzip.compression_codec, CompressionCodec::Gzip);
352 assert_eq!(next_gzip.version, 6);
353 }
354
355 #[test]
356 fn test_with_new_metadata_updates_compression() {
357 let location = MetadataLocation::from_str(
359 "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json",
360 )
361 .unwrap();
362 assert_eq!(location.compression_codec, CompressionCodec::None);
363
364 let mut props_gzip = HashMap::new();
366 props_gzip.insert(
367 "write.metadata.compression-codec".to_string(),
368 "gzip".to_string(),
369 );
370 let metadata_gzip = create_test_metadata(props_gzip);
371 let updated_gzip = location.with_new_metadata(&metadata_gzip);
372 assert_eq!(updated_gzip.compression_codec, CompressionCodec::Gzip);
373 assert_eq!(updated_gzip.version, 0);
374 assert_eq!(
375 updated_gzip.to_string(),
376 "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.gz.metadata.json"
377 );
378
379 let props_none = HashMap::new();
381 let metadata_none = create_test_metadata(props_none);
382 let updated_none = updated_gzip.with_new_metadata(&metadata_none);
383 assert_eq!(updated_none.compression_codec, CompressionCodec::None);
384 assert_eq!(updated_none.version, 0);
385 assert_eq!(
386 updated_none.to_string(),
387 "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json"
388 );
389
390 let mut props_explicit_none = HashMap::new();
392 props_explicit_none.insert(
393 "write.metadata.compression-codec".to_string(),
394 "none".to_string(),
395 );
396 let metadata_explicit_none = create_test_metadata(props_explicit_none);
397 let updated_explicit = updated_gzip.with_new_metadata(&metadata_explicit_none);
398 assert_eq!(updated_explicit.compression_codec, CompressionCodec::None);
399 assert_eq!(
400 updated_explicit.to_string(),
401 "/test/table/metadata/00000-2cd22b57-5127-4198-92ba-e4e67c79821b.metadata.json"
402 );
403 }
404}