iceberg/spec/manifest/
metadata.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::collections::HashMap;
19use std::sync::Arc;
20
21use typed_builder::TypedBuilder;
22
23use super::{FormatVersion, ManifestContentType, PartitionSpec, Schema};
24use crate::error::Result;
25use crate::spec::{PartitionField, SchemaId, SchemaRef};
26use crate::{Error, ErrorKind};
27
28/// Meta data of a manifest that is stored in the key-value metadata of the Avro file
29#[derive(Debug, PartialEq, Clone, Eq, TypedBuilder)]
30pub struct ManifestMetadata {
31    /// The table schema at the time the manifest
32    /// was written
33    pub schema: SchemaRef,
34    /// ID of the schema used to write the manifest as a string
35    pub schema_id: SchemaId,
36    /// The partition spec used to write the manifest
37    pub partition_spec: PartitionSpec,
38    /// Table format version number of the manifest as a string
39    pub format_version: FormatVersion,
40    /// Type of content files tracked by the manifest: “data” or “deletes”
41    pub content: ManifestContentType,
42}
43
44impl ManifestMetadata {
45    /// Parse from metadata in avro file.
46    pub fn parse(meta: &HashMap<String, Vec<u8>>) -> Result<Self> {
47        let schema = Arc::new({
48            let bs = meta.get("schema").ok_or_else(|| {
49                Error::new(
50                    ErrorKind::DataInvalid,
51                    "schema is required in manifest metadata but not found",
52                )
53            })?;
54            serde_json::from_slice::<Schema>(bs).map_err(|err| {
55                Error::new(
56                    ErrorKind::DataInvalid,
57                    "Fail to parse schema in manifest metadata",
58                )
59                .with_source(err)
60            })?
61        });
62        let schema_id: i32 = meta
63            .get("schema-id")
64            .map(|bs| {
65                String::from_utf8_lossy(bs).parse().map_err(|err| {
66                    Error::new(
67                        ErrorKind::DataInvalid,
68                        "Fail to parse schema id in manifest metadata",
69                    )
70                    .with_source(err)
71                })
72            })
73            .transpose()?
74            .unwrap_or(0);
75        let partition_spec = {
76            let fields = {
77                let bs = meta.get("partition-spec").ok_or_else(|| {
78                    Error::new(
79                        ErrorKind::DataInvalid,
80                        "partition-spec is required in manifest metadata but not found",
81                    )
82                })?;
83                serde_json::from_slice::<Vec<PartitionField>>(bs).map_err(|err| {
84                    Error::new(
85                        ErrorKind::DataInvalid,
86                        "Fail to parse partition spec in manifest metadata",
87                    )
88                    .with_source(err)
89                })?
90            };
91            let spec_id = meta
92                .get("partition-spec-id")
93                .map(|bs| {
94                    String::from_utf8_lossy(bs).parse().map_err(|err| {
95                        Error::new(
96                            ErrorKind::DataInvalid,
97                            "Fail to parse partition spec id in manifest metadata",
98                        )
99                        .with_source(err)
100                    })
101                })
102                .transpose()?
103                .unwrap_or(0);
104            PartitionSpec::builder(schema.clone())
105                .with_spec_id(spec_id)
106                .add_unbound_fields(fields.into_iter().map(|f| f.into_unbound()))?
107                .build()?
108        };
109        let format_version = if let Some(bs) = meta.get("format-version") {
110            serde_json::from_slice::<FormatVersion>(bs).map_err(|err| {
111                Error::new(
112                    ErrorKind::DataInvalid,
113                    "Fail to parse format version in manifest metadata",
114                )
115                .with_source(err)
116            })?
117        } else {
118            FormatVersion::V1
119        };
120        let content = if let Some(v) = meta.get("content") {
121            let v = String::from_utf8_lossy(v);
122            v.parse()?
123        } else {
124            ManifestContentType::Data
125        };
126        Ok(ManifestMetadata {
127            schema,
128            schema_id,
129            partition_spec,
130            format_version,
131            content,
132        })
133    }
134
135    /// Get the schema of table at the time manifest was written
136    pub fn schema(&self) -> &SchemaRef {
137        &self.schema
138    }
139
140    /// Get the ID of schema used to write the manifest
141    pub fn schema_id(&self) -> SchemaId {
142        self.schema_id
143    }
144
145    /// Get the partition spec used to write manifest
146    pub fn partition_spec(&self) -> &PartitionSpec {
147        &self.partition_spec
148    }
149
150    /// Get the table format version
151    pub fn format_version(&self) -> &FormatVersion {
152        &self.format_version
153    }
154
155    /// Get the type of content files tracked by manifest
156    pub fn content(&self) -> &ManifestContentType {
157        &self.content
158    }
159}