iceberg/
table.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Table API for Apache Iceberg
19
20use std::sync::Arc;
21
22use crate::arrow::ArrowReaderBuilder;
23use crate::inspect::MetadataTable;
24use crate::io::FileIO;
25use crate::io::object_cache::ObjectCache;
26use crate::runtime::Runtime;
27use crate::scan::TableScanBuilder;
28use crate::spec::{SchemaRef, TableMetadata, TableMetadataRef};
29use crate::{Error, ErrorKind, Result, TableIdent};
30
31/// Builder to create table scan.
32pub struct TableBuilder {
33    file_io: Option<FileIO>,
34    metadata_location: Option<String>,
35    metadata: Option<TableMetadataRef>,
36    identifier: Option<TableIdent>,
37    readonly: bool,
38    disable_cache: bool,
39    cache_size_bytes: Option<u64>,
40    runtime: Option<Runtime>,
41}
42
43impl TableBuilder {
44    pub(crate) fn new() -> Self {
45        Self {
46            file_io: None,
47            metadata_location: None,
48            metadata: None,
49            identifier: None,
50            readonly: false,
51            disable_cache: false,
52            cache_size_bytes: None,
53            runtime: None,
54        }
55    }
56
57    /// required - sets the necessary FileIO to use for the table
58    pub fn file_io(mut self, file_io: FileIO) -> Self {
59        self.file_io = Some(file_io);
60        self
61    }
62
63    /// optional - sets the tables metadata location
64    pub fn metadata_location<T: Into<String>>(mut self, metadata_location: T) -> Self {
65        self.metadata_location = Some(metadata_location.into());
66        self
67    }
68
69    /// required - passes in the TableMetadata to use for the Table
70    pub fn metadata<T: Into<TableMetadataRef>>(mut self, metadata: T) -> Self {
71        self.metadata = Some(metadata.into());
72        self
73    }
74
75    /// required - passes in the TableIdent to use for the Table
76    pub fn identifier(mut self, identifier: TableIdent) -> Self {
77        self.identifier = Some(identifier);
78        self
79    }
80
81    /// specifies if the Table is readonly or not (default not)
82    pub fn readonly(mut self, readonly: bool) -> Self {
83        self.readonly = readonly;
84        self
85    }
86
87    /// specifies if the Table's metadata cache will be disabled,
88    /// so that reads of Manifests and ManifestLists will never
89    /// get cached.
90    pub fn disable_cache(mut self) -> Self {
91        self.disable_cache = true;
92        self
93    }
94
95    /// optionally set a non-default metadata cache size
96    pub fn cache_size_bytes(mut self, cache_size_bytes: u64) -> Self {
97        self.cache_size_bytes = Some(cache_size_bytes);
98        self
99    }
100
101    /// Set the Runtime for this table to use when spawning tasks.
102    pub fn runtime(mut self, runtime: Runtime) -> Self {
103        self.runtime = Some(runtime);
104        self
105    }
106
107    /// build the Table
108    pub fn build(self) -> Result<Table> {
109        let Self {
110            file_io,
111            metadata_location,
112            metadata,
113            identifier,
114            readonly,
115            disable_cache,
116            cache_size_bytes,
117            runtime,
118        } = self;
119
120        let Some(file_io) = file_io else {
121            return Err(Error::new(
122                ErrorKind::DataInvalid,
123                "FileIO must be provided with TableBuilder.file_io()",
124            ));
125        };
126
127        let Some(metadata) = metadata else {
128            return Err(Error::new(
129                ErrorKind::DataInvalid,
130                "TableMetadataRef must be provided with TableBuilder.metadata()",
131            ));
132        };
133
134        let Some(identifier) = identifier else {
135            return Err(Error::new(
136                ErrorKind::DataInvalid,
137                "TableIdent must be provided with TableBuilder.identifier()",
138            ));
139        };
140
141        let Some(runtime) = runtime else {
142            return Err(Error::new(
143                ErrorKind::DataInvalid,
144                "Runtime must be provided with TableBuilder.runtime()",
145            ));
146        };
147
148        let object_cache = if disable_cache {
149            Arc::new(ObjectCache::with_disabled_cache(file_io.clone()))
150        } else if let Some(cache_size_bytes) = cache_size_bytes {
151            Arc::new(ObjectCache::new_with_capacity(
152                file_io.clone(),
153                cache_size_bytes,
154            ))
155        } else {
156            Arc::new(ObjectCache::new(file_io.clone()))
157        };
158
159        Ok(Table {
160            file_io,
161            metadata_location,
162            metadata,
163            identifier,
164            readonly,
165            object_cache,
166            runtime,
167        })
168    }
169}
170
171/// Table represents a table in the catalog.
172#[derive(Debug, Clone)]
173pub struct Table {
174    file_io: FileIO,
175    metadata_location: Option<String>,
176    metadata: TableMetadataRef,
177    identifier: TableIdent,
178    readonly: bool,
179    object_cache: Arc<ObjectCache>,
180    runtime: Runtime,
181}
182
183impl Table {
184    /// Sets the [`Table`] metadata and returns an updated instance with the new metadata applied.
185    pub(crate) fn with_metadata(mut self, metadata: TableMetadataRef) -> Self {
186        self.metadata = metadata;
187        self
188    }
189
190    /// Sets the [`Table`] metadata location and returns an updated instance.
191    pub(crate) fn with_metadata_location(mut self, metadata_location: String) -> Self {
192        self.metadata_location = Some(metadata_location);
193        self
194    }
195
196    /// Returns a TableBuilder to build a table
197    pub fn builder() -> TableBuilder {
198        TableBuilder::new()
199    }
200
201    /// Returns table identifier.
202    pub fn identifier(&self) -> &TableIdent {
203        &self.identifier
204    }
205    /// Returns current metadata.
206    pub fn metadata(&self) -> &TableMetadata {
207        &self.metadata
208    }
209
210    /// Returns current metadata ref.
211    pub fn metadata_ref(&self) -> TableMetadataRef {
212        self.metadata.clone()
213    }
214
215    /// Returns current metadata location.
216    pub fn metadata_location(&self) -> Option<&str> {
217        self.metadata_location.as_deref()
218    }
219
220    /// Returns current metadata location in a result.
221    pub fn metadata_location_result(&self) -> Result<&str> {
222        self.metadata_location.as_deref().ok_or(Error::new(
223            ErrorKind::DataInvalid,
224            format!(
225                "Metadata location does not exist for table: {}",
226                self.identifier
227            ),
228        ))
229    }
230
231    /// Returns file io used in this table.
232    pub fn file_io(&self) -> &FileIO {
233        &self.file_io
234    }
235
236    /// Returns this table's object cache
237    pub(crate) fn object_cache(&self) -> Arc<ObjectCache> {
238        self.object_cache.clone()
239    }
240
241    /// Creates a table scan.
242    pub fn scan(&self) -> TableScanBuilder<'_> {
243        TableScanBuilder::new(self)
244    }
245
246    /// Creates a metadata table which provides table-like APIs for inspecting metadata.
247    /// See [`MetadataTable`] for more details.
248    pub fn inspect(&self) -> MetadataTable<'_> {
249        MetadataTable::new(self)
250    }
251
252    /// Returns the [`Runtime`] for this table.
253    pub(crate) fn runtime(&self) -> &Runtime {
254        &self.runtime
255    }
256
257    /// Returns the flag indicating whether the `Table` is readonly or not
258    pub fn readonly(&self) -> bool {
259        self.readonly
260    }
261
262    /// Returns the current schema as a shared reference.
263    pub fn current_schema_ref(&self) -> SchemaRef {
264        self.metadata.current_schema().clone()
265    }
266
267    /// Create a reader for the table.
268    pub fn reader_builder(&self) -> ArrowReaderBuilder {
269        ArrowReaderBuilder::new(self.file_io.clone(), self.runtime().clone())
270    }
271}
272
273/// `StaticTable` is a read-only table struct that can be created from a metadata file or from `TableMetaData` without a catalog.
274/// It can only be used to read metadata and for table scan.
275/// # Examples
276///
277/// ```rust, no_run
278/// # use iceberg::io::FileIO;
279/// # use iceberg::table::StaticTable;
280/// # use iceberg::TableIdent;
281/// # async fn example() {
282/// let metadata_file_location = "s3://bucket_name/path/to/metadata.json";
283/// let file_io = FileIO::new_with_fs();
284/// let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
285/// let static_table =
286///     StaticTable::from_metadata_file(&metadata_file_location, static_identifier, file_io)
287///         .await
288///         .unwrap();
289/// let snapshot_id = static_table
290///     .metadata()
291///     .current_snapshot()
292///     .unwrap()
293///     .snapshot_id();
294/// # }
295/// ```
296#[derive(Debug, Clone)]
297pub struct StaticTable(Table);
298
299impl StaticTable {
300    /// Creates a static table from a given `TableMetadata` and `FileIO`
301    pub async fn from_metadata(
302        metadata: TableMetadata,
303        table_ident: TableIdent,
304        file_io: FileIO,
305    ) -> Result<Self> {
306        let table = Table::builder()
307            .metadata(metadata)
308            .identifier(table_ident)
309            .file_io(file_io.clone())
310            .runtime(Runtime::try_current()?)
311            .readonly(true)
312            .build();
313
314        Ok(Self(table?))
315    }
316    /// Creates a static table directly from metadata file and `FileIO`
317    pub async fn from_metadata_file(
318        metadata_location: &str,
319        table_ident: TableIdent,
320        file_io: FileIO,
321    ) -> Result<Self> {
322        let metadata = TableMetadata::read_from(&file_io, metadata_location).await?;
323
324        let table = Table::builder()
325            .metadata(metadata)
326            .metadata_location(metadata_location)
327            .identifier(table_ident)
328            .file_io(file_io.clone())
329            .runtime(Runtime::try_current()?)
330            .readonly(true)
331            .build();
332
333        Ok(Self(table?))
334    }
335
336    /// Create a TableScanBuilder for the static table.
337    pub fn scan(&self) -> TableScanBuilder<'_> {
338        self.0.scan()
339    }
340
341    /// Get TableMetadataRef for the static table
342    pub fn metadata(&self) -> TableMetadataRef {
343        self.0.metadata_ref()
344    }
345
346    /// Consumes the `StaticTable` and return it as a `Table`
347    /// Please use this method carefully as the Table it returns remains detached from a catalog
348    /// and can't be used to perform modifications on the table.
349    pub fn into_table(self) -> Table {
350        self.0
351    }
352
353    /// Create a reader for the table.
354    pub fn reader_builder(&self) -> ArrowReaderBuilder {
355        self.0.reader_builder()
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362
363    #[tokio::test]
364    async fn test_static_table_from_file() {
365        let metadata_file_name = "TableMetadataV2Valid.json";
366        let metadata_file_path = format!(
367            "{}/testdata/table_metadata/{}",
368            env!("CARGO_MANIFEST_DIR"),
369            metadata_file_name
370        );
371        let file_io = FileIO::new_with_fs();
372        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
373        let static_table =
374            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
375                .await
376                .unwrap();
377        let snapshot_id = static_table
378            .metadata()
379            .current_snapshot()
380            .unwrap()
381            .snapshot_id();
382        assert_eq!(
383            snapshot_id, 3055729675574597004,
384            "snapshot id from metadata don't match"
385        );
386    }
387
388    #[tokio::test]
389    async fn test_static_into_table() {
390        let metadata_file_name = "TableMetadataV2Valid.json";
391        let metadata_file_path = format!(
392            "{}/testdata/table_metadata/{}",
393            env!("CARGO_MANIFEST_DIR"),
394            metadata_file_name
395        );
396        let file_io = FileIO::new_with_fs();
397        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
398        let static_table =
399            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
400                .await
401                .unwrap();
402        let table = static_table.into_table();
403        assert!(table.readonly());
404        assert_eq!(table.identifier.name(), "static_table");
405        assert_eq!(
406            table.metadata_location(),
407            Some(metadata_file_path).as_deref()
408        );
409    }
410
411    #[tokio::test]
412    async fn test_table_readonly_flag() {
413        let metadata_file_name = "TableMetadataV2Valid.json";
414        let metadata_file_path = format!(
415            "{}/testdata/table_metadata/{}",
416            env!("CARGO_MANIFEST_DIR"),
417            metadata_file_name
418        );
419        let file_io = FileIO::new_with_fs();
420        let metadata_file = file_io.new_input(metadata_file_path).unwrap();
421        let metadata_file_content = metadata_file.read().await.unwrap();
422        let table_metadata =
423            serde_json::from_slice::<TableMetadata>(&metadata_file_content).unwrap();
424        let static_identifier = TableIdent::from_strs(["ns", "table"]).unwrap();
425        let table = Table::builder()
426            .metadata(table_metadata)
427            .identifier(static_identifier)
428            .file_io(file_io.clone())
429            .runtime(Runtime::try_current().unwrap())
430            .build()
431            .unwrap();
432        assert!(!table.readonly());
433        assert_eq!(table.identifier.name(), "table");
434    }
435}