iceberg/
table.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Table API for Apache Iceberg
19
20use std::sync::Arc;
21
22use crate::arrow::ArrowReaderBuilder;
23use crate::inspect::MetadataTable;
24use crate::io::FileIO;
25use crate::io::object_cache::ObjectCache;
26use crate::scan::TableScanBuilder;
27use crate::spec::{SchemaRef, TableMetadata, TableMetadataRef};
28use crate::{Error, ErrorKind, Result, TableIdent};
29
30/// Builder to create table scan.
31pub struct TableBuilder {
32    file_io: Option<FileIO>,
33    metadata_location: Option<String>,
34    metadata: Option<TableMetadataRef>,
35    identifier: Option<TableIdent>,
36    readonly: bool,
37    disable_cache: bool,
38    cache_size_bytes: Option<u64>,
39}
40
41impl TableBuilder {
42    pub(crate) fn new() -> Self {
43        Self {
44            file_io: None,
45            metadata_location: None,
46            metadata: None,
47            identifier: None,
48            readonly: false,
49            disable_cache: false,
50            cache_size_bytes: None,
51        }
52    }
53
54    /// required - sets the necessary FileIO to use for the table
55    pub fn file_io(mut self, file_io: FileIO) -> Self {
56        self.file_io = Some(file_io);
57        self
58    }
59
60    /// optional - sets the tables metadata location
61    pub fn metadata_location<T: Into<String>>(mut self, metadata_location: T) -> Self {
62        self.metadata_location = Some(metadata_location.into());
63        self
64    }
65
66    /// required - passes in the TableMetadata to use for the Table
67    pub fn metadata<T: Into<TableMetadataRef>>(mut self, metadata: T) -> Self {
68        self.metadata = Some(metadata.into());
69        self
70    }
71
72    /// required - passes in the TableIdent to use for the Table
73    pub fn identifier(mut self, identifier: TableIdent) -> Self {
74        self.identifier = Some(identifier);
75        self
76    }
77
78    /// specifies if the Table is readonly or not (default not)
79    pub fn readonly(mut self, readonly: bool) -> Self {
80        self.readonly = readonly;
81        self
82    }
83
84    /// specifies if the Table's metadata cache will be disabled,
85    /// so that reads of Manifests and ManifestLists will never
86    /// get cached.
87    pub fn disable_cache(mut self) -> Self {
88        self.disable_cache = true;
89        self
90    }
91
92    /// optionally set a non-default metadata cache size
93    pub fn cache_size_bytes(mut self, cache_size_bytes: u64) -> Self {
94        self.cache_size_bytes = Some(cache_size_bytes);
95        self
96    }
97
98    /// build the Table
99    pub fn build(self) -> Result<Table> {
100        let Self {
101            file_io,
102            metadata_location,
103            metadata,
104            identifier,
105            readonly,
106            disable_cache,
107            cache_size_bytes,
108        } = self;
109
110        let Some(file_io) = file_io else {
111            return Err(Error::new(
112                ErrorKind::DataInvalid,
113                "FileIO must be provided with TableBuilder.file_io()",
114            ));
115        };
116
117        let Some(metadata) = metadata else {
118            return Err(Error::new(
119                ErrorKind::DataInvalid,
120                "TableMetadataRef must be provided with TableBuilder.metadata()",
121            ));
122        };
123
124        let Some(identifier) = identifier else {
125            return Err(Error::new(
126                ErrorKind::DataInvalid,
127                "TableIdent must be provided with TableBuilder.identifier()",
128            ));
129        };
130
131        let object_cache = if disable_cache {
132            Arc::new(ObjectCache::with_disabled_cache(file_io.clone()))
133        } else if let Some(cache_size_bytes) = cache_size_bytes {
134            Arc::new(ObjectCache::new_with_capacity(
135                file_io.clone(),
136                cache_size_bytes,
137            ))
138        } else {
139            Arc::new(ObjectCache::new(file_io.clone()))
140        };
141
142        Ok(Table {
143            file_io,
144            metadata_location,
145            metadata,
146            identifier,
147            readonly,
148            object_cache,
149        })
150    }
151}
152
153/// Table represents a table in the catalog.
154#[derive(Debug, Clone)]
155pub struct Table {
156    file_io: FileIO,
157    metadata_location: Option<String>,
158    metadata: TableMetadataRef,
159    identifier: TableIdent,
160    readonly: bool,
161    object_cache: Arc<ObjectCache>,
162}
163
164impl Table {
165    /// Sets the [`Table`] metadata and returns an updated instance with the new metadata applied.
166    pub(crate) fn with_metadata(mut self, metadata: TableMetadataRef) -> Self {
167        self.metadata = metadata;
168        self
169    }
170
171    /// Sets the [`Table`] metadata location and returns an updated instance.
172    pub(crate) fn with_metadata_location(mut self, metadata_location: String) -> Self {
173        self.metadata_location = Some(metadata_location);
174        self
175    }
176
177    /// Returns a TableBuilder to build a table
178    pub fn builder() -> TableBuilder {
179        TableBuilder::new()
180    }
181
182    /// Returns table identifier.
183    pub fn identifier(&self) -> &TableIdent {
184        &self.identifier
185    }
186    /// Returns current metadata.
187    pub fn metadata(&self) -> &TableMetadata {
188        &self.metadata
189    }
190
191    /// Returns current metadata ref.
192    pub fn metadata_ref(&self) -> TableMetadataRef {
193        self.metadata.clone()
194    }
195
196    /// Returns current metadata location.
197    pub fn metadata_location(&self) -> Option<&str> {
198        self.metadata_location.as_deref()
199    }
200
201    /// Returns current metadata location in a result.
202    pub fn metadata_location_result(&self) -> Result<&str> {
203        self.metadata_location.as_deref().ok_or(Error::new(
204            ErrorKind::DataInvalid,
205            format!(
206                "Metadata location does not exist for table: {}",
207                self.identifier
208            ),
209        ))
210    }
211
212    /// Returns file io used in this table.
213    pub fn file_io(&self) -> &FileIO {
214        &self.file_io
215    }
216
217    /// Returns this table's object cache
218    pub(crate) fn object_cache(&self) -> Arc<ObjectCache> {
219        self.object_cache.clone()
220    }
221
222    /// Creates a table scan.
223    pub fn scan(&self) -> TableScanBuilder<'_> {
224        TableScanBuilder::new(self)
225    }
226
227    /// Creates a metadata table which provides table-like APIs for inspecting metadata.
228    /// See [`MetadataTable`] for more details.
229    pub fn inspect(&self) -> MetadataTable<'_> {
230        MetadataTable::new(self)
231    }
232
233    /// Returns the flag indicating whether the `Table` is readonly or not
234    pub fn readonly(&self) -> bool {
235        self.readonly
236    }
237
238    /// Returns the current schema as a shared reference.
239    pub fn current_schema_ref(&self) -> SchemaRef {
240        self.metadata.current_schema().clone()
241    }
242
243    /// Create a reader for the table.
244    pub fn reader_builder(&self) -> ArrowReaderBuilder {
245        ArrowReaderBuilder::new(self.file_io.clone())
246    }
247}
248
249/// `StaticTable` is a read-only table struct that can be created from a metadata file or from `TableMetaData` without a catalog.
250/// It can only be used to read metadata and for table scan.
251/// # Examples
252///
253/// ```rust, no_run
254/// # use iceberg::io::FileIO;
255/// # use iceberg::table::StaticTable;
256/// # use iceberg::TableIdent;
257/// # async fn example() {
258/// let metadata_file_location = "s3://bucket_name/path/to/metadata.json";
259/// let file_io = FileIO::from_path(&metadata_file_location)
260///     .unwrap()
261///     .build()
262///     .unwrap();
263/// let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
264/// let static_table =
265///     StaticTable::from_metadata_file(&metadata_file_location, static_identifier, file_io)
266///         .await
267///         .unwrap();
268/// let snapshot_id = static_table
269///     .metadata()
270///     .current_snapshot()
271///     .unwrap()
272///     .snapshot_id();
273/// # }
274/// ```
275#[derive(Debug, Clone)]
276pub struct StaticTable(Table);
277
278impl StaticTable {
279    /// Creates a static table from a given `TableMetadata` and `FileIO`
280    pub async fn from_metadata(
281        metadata: TableMetadata,
282        table_ident: TableIdent,
283        file_io: FileIO,
284    ) -> Result<Self> {
285        let table = Table::builder()
286            .metadata(metadata)
287            .identifier(table_ident)
288            .file_io(file_io.clone())
289            .readonly(true)
290            .build();
291
292        Ok(Self(table?))
293    }
294    /// Creates a static table directly from metadata file and `FileIO`
295    pub async fn from_metadata_file(
296        metadata_location: &str,
297        table_ident: TableIdent,
298        file_io: FileIO,
299    ) -> Result<Self> {
300        let metadata = TableMetadata::read_from(&file_io, metadata_location).await?;
301
302        let table = Table::builder()
303            .metadata(metadata)
304            .metadata_location(metadata_location)
305            .identifier(table_ident)
306            .file_io(file_io.clone())
307            .readonly(true)
308            .build();
309
310        Ok(Self(table?))
311    }
312
313    /// Create a TableScanBuilder for the static table.
314    pub fn scan(&self) -> TableScanBuilder<'_> {
315        self.0.scan()
316    }
317
318    /// Get TableMetadataRef for the static table
319    pub fn metadata(&self) -> TableMetadataRef {
320        self.0.metadata_ref()
321    }
322
323    /// Consumes the `StaticTable` and return it as a `Table`
324    /// Please use this method carefully as the Table it returns remains detached from a catalog
325    /// and can't be used to perform modifications on the table.
326    pub fn into_table(self) -> Table {
327        self.0
328    }
329
330    /// Create a reader for the table.
331    pub fn reader_builder(&self) -> ArrowReaderBuilder {
332        ArrowReaderBuilder::new(self.0.file_io.clone())
333    }
334}
335
336#[cfg(test)]
337mod tests {
338    use super::*;
339
340    #[tokio::test]
341    async fn test_static_table_from_file() {
342        let metadata_file_name = "TableMetadataV2Valid.json";
343        let metadata_file_path = format!(
344            "{}/testdata/table_metadata/{}",
345            env!("CARGO_MANIFEST_DIR"),
346            metadata_file_name
347        );
348        let file_io = FileIO::from_path(&metadata_file_path)
349            .unwrap()
350            .build()
351            .unwrap();
352        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
353        let static_table =
354            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
355                .await
356                .unwrap();
357        let snapshot_id = static_table
358            .metadata()
359            .current_snapshot()
360            .unwrap()
361            .snapshot_id();
362        assert_eq!(
363            snapshot_id, 3055729675574597004,
364            "snapshot id from metadata don't match"
365        );
366    }
367
368    #[tokio::test]
369    async fn test_static_into_table() {
370        let metadata_file_name = "TableMetadataV2Valid.json";
371        let metadata_file_path = format!(
372            "{}/testdata/table_metadata/{}",
373            env!("CARGO_MANIFEST_DIR"),
374            metadata_file_name
375        );
376        let file_io = FileIO::from_path(&metadata_file_path)
377            .unwrap()
378            .build()
379            .unwrap();
380        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
381        let static_table =
382            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
383                .await
384                .unwrap();
385        let table = static_table.into_table();
386        assert!(table.readonly());
387        assert_eq!(table.identifier.name(), "static_table");
388        assert_eq!(
389            table.metadata_location(),
390            Some(metadata_file_path).as_deref()
391        );
392    }
393
394    #[tokio::test]
395    async fn test_table_readonly_flag() {
396        let metadata_file_name = "TableMetadataV2Valid.json";
397        let metadata_file_path = format!(
398            "{}/testdata/table_metadata/{}",
399            env!("CARGO_MANIFEST_DIR"),
400            metadata_file_name
401        );
402        let file_io = FileIO::from_path(&metadata_file_path)
403            .unwrap()
404            .build()
405            .unwrap();
406        let metadata_file = file_io.new_input(metadata_file_path).unwrap();
407        let metadata_file_content = metadata_file.read().await.unwrap();
408        let table_metadata =
409            serde_json::from_slice::<TableMetadata>(&metadata_file_content).unwrap();
410        let static_identifier = TableIdent::from_strs(["ns", "table"]).unwrap();
411        let table = Table::builder()
412            .metadata(table_metadata)
413            .identifier(static_identifier)
414            .file_io(file_io.clone())
415            .build()
416            .unwrap();
417        assert!(!table.readonly());
418        assert_eq!(table.identifier.name(), "table");
419    }
420}