iceberg/spec/manifest_list/manifest_file.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::str::FromStr;
19
20use serde_derive::{Deserialize, Serialize};
21
22use super::ByteBuf;
23use crate::error::Result;
24use crate::io::FileIO;
25use crate::spec::Manifest;
26use crate::{Error, ErrorKind};
27
28/// Entry in a manifest list.
29#[derive(Debug, PartialEq, Clone, Eq, Hash)]
30pub struct ManifestFile {
31 /// field: 500
32 ///
33 /// Location of the manifest file
34 pub manifest_path: String,
35 /// field: 501
36 ///
37 /// Length of the manifest file in bytes
38 pub manifest_length: i64,
39 /// field: 502
40 ///
41 /// ID of a partition spec used to write the manifest; must be listed
42 /// in table metadata partition-specs
43 pub partition_spec_id: i32,
44 /// field: 517
45 ///
46 /// The type of files tracked by the manifest, either data or delete
47 /// files; 0 for all v1 manifests
48 pub content: ManifestContentType,
49 /// field: 515
50 ///
51 /// The sequence number when the manifest was added to the table; use 0
52 /// when reading v1 manifest lists
53 pub sequence_number: i64,
54 /// field: 516
55 ///
56 /// The minimum data sequence number of all live data or delete files in
57 /// the manifest; use 0 when reading v1 manifest lists
58 pub min_sequence_number: i64,
59 /// field: 503
60 ///
61 /// ID of the snapshot where the manifest file was added
62 pub added_snapshot_id: i64,
63 /// field: 504
64 ///
65 /// Number of entries in the manifest that have status ADDED, when null
66 /// this is assumed to be non-zero
67 pub added_files_count: Option<u32>,
68 /// field: 505
69 ///
70 /// Number of entries in the manifest that have status EXISTING (0),
71 /// when null this is assumed to be non-zero
72 pub existing_files_count: Option<u32>,
73 /// field: 506
74 ///
75 /// Number of entries in the manifest that have status DELETED (2),
76 /// when null this is assumed to be non-zero
77 pub deleted_files_count: Option<u32>,
78 /// field: 512
79 ///
80 /// Number of rows in all of files in the manifest that have status
81 /// ADDED, when null this is assumed to be non-zero
82 pub added_rows_count: Option<u64>,
83 /// field: 513
84 ///
85 /// Number of rows in all of files in the manifest that have status
86 /// EXISTING, when null this is assumed to be non-zero
87 pub existing_rows_count: Option<u64>,
88 /// field: 514
89 ///
90 /// Number of rows in all of files in the manifest that have status
91 /// DELETED, when null this is assumed to be non-zero
92 pub deleted_rows_count: Option<u64>,
93 /// field: 507
94 /// element_field: 508
95 ///
96 /// A list of field summaries for each partition field in the spec. Each
97 /// field in the list corresponds to a field in the manifest file’s
98 /// partition spec.
99 pub partitions: Option<Vec<FieldSummary>>,
100 /// field: 519
101 ///
102 /// Implementation-specific key metadata for encryption
103 pub key_metadata: Option<Vec<u8>>,
104 /// field 520
105 ///
106 /// The starting _row_id to assign to rows added by ADDED data files
107 pub first_row_id: Option<u64>,
108}
109
110impl ManifestFile {
111 /// Checks if the manifest file has any added files.
112 pub fn has_added_files(&self) -> bool {
113 self.added_files_count.map(|c| c > 0).unwrap_or(true)
114 }
115
116 /// Checks whether this manifest contains entries with DELETED status.
117 pub fn has_deleted_files(&self) -> bool {
118 self.deleted_files_count.map(|c| c > 0).unwrap_or(true)
119 }
120
121 /// Checks if the manifest file has any existed files.
122 pub fn has_existing_files(&self) -> bool {
123 self.existing_files_count.map(|c| c > 0).unwrap_or(true)
124 }
125}
126
127/// The type of files tracked by the manifest, either data or delete files; Data(0) for all v1 manifests
128#[derive(Debug, PartialEq, Clone, Copy, Eq, Hash, Default)]
129pub enum ManifestContentType {
130 /// The manifest content is data.
131 #[default]
132 Data = 0,
133 /// The manifest content is deletes.
134 Deletes = 1,
135}
136
137impl FromStr for ManifestContentType {
138 type Err = Error;
139
140 fn from_str(s: &str) -> Result<Self> {
141 match s {
142 "data" => Ok(ManifestContentType::Data),
143 "deletes" => Ok(ManifestContentType::Deletes),
144 _ => Err(Error::new(
145 ErrorKind::DataInvalid,
146 format!("Invalid manifest content type: {s}"),
147 )),
148 }
149 }
150}
151
152impl std::fmt::Display for ManifestContentType {
153 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154 match self {
155 ManifestContentType::Data => write!(f, "data"),
156 ManifestContentType::Deletes => write!(f, "deletes"),
157 }
158 }
159}
160
161impl TryFrom<i32> for ManifestContentType {
162 type Error = Error;
163
164 fn try_from(value: i32) -> std::result::Result<Self, Self::Error> {
165 match value {
166 0 => Ok(ManifestContentType::Data),
167 1 => Ok(ManifestContentType::Deletes),
168 _ => Err(Error::new(
169 crate::ErrorKind::DataInvalid,
170 format!("Invalid manifest content type. Expected 0 or 1, got {value}"),
171 )),
172 }
173 }
174}
175
176impl ManifestFile {
177 /// Load [`Manifest`].
178 ///
179 /// This method will also initialize inherited values of [`ManifestEntry`](crate::spec::ManifestEntry), such as `sequence_number`.
180 pub async fn load_manifest(&self, file_io: &FileIO) -> Result<Manifest> {
181 let avro = file_io.new_input(&self.manifest_path)?.read().await?;
182
183 let (metadata, mut entries) = Manifest::try_from_avro_bytes(&avro)?;
184
185 // Let entries inherit values from the manifest list entry.
186 for entry in &mut entries {
187 entry.inherit_data(self);
188 }
189
190 Ok(Manifest::new(metadata, entries))
191 }
192}
193
194/// Field summary for partition field in the spec.
195///
196/// Each field in the list corresponds to a field in the manifest file’s partition spec.
197#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default, Hash)]
198pub struct FieldSummary {
199 /// field: 509
200 ///
201 /// Whether the manifest contains at least one partition with a null
202 /// value for the field
203 pub contains_null: bool,
204 /// field: 518
205 /// Whether the manifest contains at least one partition with a NaN
206 /// value for the field
207 pub contains_nan: Option<bool>,
208 /// field: 510
209 /// The minimum value for the field in the manifests
210 /// partitions.
211 pub lower_bound: Option<ByteBuf>,
212 /// field: 511
213 /// The maximum value for the field in the manifests
214 /// partitions.
215 pub upper_bound: Option<ByteBuf>,
216}
217
218#[cfg(test)]
219mod test {
220 use super::ManifestContentType;
221
222 #[test]
223 fn test_manifest_content_type_default() {
224 assert_eq!(ManifestContentType::default(), ManifestContentType::Data);
225 }
226
227 #[test]
228 fn test_manifest_content_type_default_value() {
229 assert_eq!(ManifestContentType::default() as i32, 0);
230 }
231}