iceberg/spec/manifest_list/
manifest_file.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::str::FromStr;
19
20use serde_derive::{Deserialize, Serialize};
21
22use super::ByteBuf;
23use crate::error::Result;
24use crate::io::FileIO;
25use crate::spec::Manifest;
26use crate::{Error, ErrorKind};
27
28/// Entry in a manifest list.
29#[derive(Debug, PartialEq, Clone, Eq, Hash)]
30pub struct ManifestFile {
31    /// field: 500
32    ///
33    /// Location of the manifest file
34    pub manifest_path: String,
35    /// field: 501
36    ///
37    /// Length of the manifest file in bytes
38    pub manifest_length: i64,
39    /// field: 502
40    ///
41    /// ID of a partition spec used to write the manifest; must be listed
42    /// in table metadata partition-specs
43    pub partition_spec_id: i32,
44    /// field: 517
45    ///
46    /// The type of files tracked by the manifest, either data or delete
47    /// files; 0 for all v1 manifests
48    pub content: ManifestContentType,
49    /// field: 515
50    ///
51    /// The sequence number when the manifest was added to the table; use 0
52    /// when reading v1 manifest lists
53    pub sequence_number: i64,
54    /// field: 516
55    ///
56    /// The minimum data sequence number of all live data or delete files in
57    /// the manifest; use 0 when reading v1 manifest lists
58    pub min_sequence_number: i64,
59    /// field: 503
60    ///
61    /// ID of the snapshot where the manifest file was added
62    pub added_snapshot_id: i64,
63    /// field: 504
64    ///
65    /// Number of entries in the manifest that have status ADDED, when null
66    /// this is assumed to be non-zero
67    pub added_files_count: Option<u32>,
68    /// field: 505
69    ///
70    /// Number of entries in the manifest that have status EXISTING (0),
71    /// when null this is assumed to be non-zero
72    pub existing_files_count: Option<u32>,
73    /// field: 506
74    ///
75    /// Number of entries in the manifest that have status DELETED (2),
76    /// when null this is assumed to be non-zero
77    pub deleted_files_count: Option<u32>,
78    /// field: 512
79    ///
80    /// Number of rows in all of files in the manifest that have status
81    /// ADDED, when null this is assumed to be non-zero
82    pub added_rows_count: Option<u64>,
83    /// field: 513
84    ///
85    /// Number of rows in all of files in the manifest that have status
86    /// EXISTING, when null this is assumed to be non-zero
87    pub existing_rows_count: Option<u64>,
88    /// field: 514
89    ///
90    /// Number of rows in all of files in the manifest that have status
91    /// DELETED, when null this is assumed to be non-zero
92    pub deleted_rows_count: Option<u64>,
93    /// field: 507
94    /// element_field: 508
95    ///
96    /// A list of field summaries for each partition field in the spec. Each
97    /// field in the list corresponds to a field in the manifest file’s
98    /// partition spec.
99    pub partitions: Option<Vec<FieldSummary>>,
100    /// field: 519
101    ///
102    /// Implementation-specific key metadata for encryption
103    pub key_metadata: Option<Vec<u8>>,
104    /// field 520
105    ///
106    /// The starting _row_id to assign to rows added by ADDED data files
107    pub first_row_id: Option<u64>,
108}
109
110impl ManifestFile {
111    /// Checks if the manifest file has any added files.
112    pub fn has_added_files(&self) -> bool {
113        self.added_files_count.map(|c| c > 0).unwrap_or(true)
114    }
115
116    /// Checks whether this manifest contains entries with DELETED status.
117    pub fn has_deleted_files(&self) -> bool {
118        self.deleted_files_count.map(|c| c > 0).unwrap_or(true)
119    }
120
121    /// Checks if the manifest file has any existed files.
122    pub fn has_existing_files(&self) -> bool {
123        self.existing_files_count.map(|c| c > 0).unwrap_or(true)
124    }
125}
126
127/// The type of files tracked by the manifest, either data or delete files; Data(0) for all v1 manifests
128#[derive(Debug, PartialEq, Clone, Copy, Eq, Hash, Default)]
129pub enum ManifestContentType {
130    /// The manifest content is data.
131    #[default]
132    Data = 0,
133    /// The manifest content is deletes.
134    Deletes = 1,
135}
136
137impl FromStr for ManifestContentType {
138    type Err = Error;
139
140    fn from_str(s: &str) -> Result<Self> {
141        match s {
142            "data" => Ok(ManifestContentType::Data),
143            "deletes" => Ok(ManifestContentType::Deletes),
144            _ => Err(Error::new(
145                ErrorKind::DataInvalid,
146                format!("Invalid manifest content type: {s}"),
147            )),
148        }
149    }
150}
151
152impl std::fmt::Display for ManifestContentType {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        match self {
155            ManifestContentType::Data => write!(f, "data"),
156            ManifestContentType::Deletes => write!(f, "deletes"),
157        }
158    }
159}
160
161impl TryFrom<i32> for ManifestContentType {
162    type Error = Error;
163
164    fn try_from(value: i32) -> std::result::Result<Self, Self::Error> {
165        match value {
166            0 => Ok(ManifestContentType::Data),
167            1 => Ok(ManifestContentType::Deletes),
168            _ => Err(Error::new(
169                crate::ErrorKind::DataInvalid,
170                format!("Invalid manifest content type. Expected 0 or 1, got {value}"),
171            )),
172        }
173    }
174}
175
176impl ManifestFile {
177    /// Load [`Manifest`].
178    ///
179    /// This method will also initialize inherited values of [`ManifestEntry`](crate::spec::ManifestEntry), such as `sequence_number`.
180    pub async fn load_manifest(&self, file_io: &FileIO) -> Result<Manifest> {
181        let avro = file_io.new_input(&self.manifest_path)?.read().await?;
182
183        let (metadata, mut entries) = Manifest::try_from_avro_bytes(&avro)?;
184
185        // Let entries inherit values from the manifest list entry.
186        for entry in &mut entries {
187            entry.inherit_data(self);
188        }
189
190        Ok(Manifest::new(metadata, entries))
191    }
192}
193
194/// Field summary for partition field in the spec.
195///
196/// Each field in the list corresponds to a field in the manifest file’s partition spec.
197#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, Hash)]
198pub struct FieldSummary {
199    /// field: 509
200    ///
201    /// Whether the manifest contains at least one partition with a null
202    /// value for the field
203    pub contains_null: bool,
204    /// field: 518
205    /// Whether the manifest contains at least one partition with a NaN
206    /// value for the field
207    pub contains_nan: Option<bool>,
208    /// field: 510
209    /// The minimum value for the field in the manifests
210    /// partitions.
211    pub lower_bound: Option<ByteBuf>,
212    /// field: 511
213    /// The maximum value for the field in the manifests
214    /// partitions.
215    pub upper_bound: Option<ByteBuf>,
216}
217
218#[cfg(test)]
219mod test {
220    use super::ManifestContentType;
221
222    #[test]
223    fn test_manifest_content_type_default() {
224        assert_eq!(ManifestContentType::default(), ManifestContentType::Data);
225    }
226
227    #[test]
228    fn test_manifest_content_type_default_value() {
229        assert_eq!(ManifestContentType::default() as i32, 0);
230    }
231}