iceberg/
table.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Table API for Apache Iceberg
19
20use std::sync::Arc;
21
22use crate::arrow::ArrowReaderBuilder;
23use crate::inspect::MetadataTable;
24use crate::io::FileIO;
25use crate::io::object_cache::ObjectCache;
26use crate::runtime::Runtime;
27use crate::scan::TableScanBuilder;
28use crate::spec::{ManifestListReader, SchemaRef, SnapshotRef, TableMetadata, TableMetadataRef};
29use crate::{Error, ErrorKind, Result, TableIdent};
30
31/// Builder to create table scan.
32pub struct TableBuilder {
33    file_io: Option<FileIO>,
34    metadata_location: Option<String>,
35    metadata: Option<TableMetadataRef>,
36    identifier: Option<TableIdent>,
37    readonly: bool,
38    disable_cache: bool,
39    cache_size_bytes: Option<u64>,
40    runtime: Option<Runtime>,
41}
42
43impl TableBuilder {
44    pub(crate) fn new() -> Self {
45        Self {
46            file_io: None,
47            metadata_location: None,
48            metadata: None,
49            identifier: None,
50            readonly: false,
51            disable_cache: false,
52            cache_size_bytes: None,
53            runtime: None,
54        }
55    }
56
57    /// required - sets the necessary FileIO to use for the table
58    pub fn file_io(mut self, file_io: FileIO) -> Self {
59        self.file_io = Some(file_io);
60        self
61    }
62
63    /// optional - sets the tables metadata location
64    pub fn metadata_location<T: Into<String>>(mut self, metadata_location: T) -> Self {
65        self.metadata_location = Some(metadata_location.into());
66        self
67    }
68
69    /// required - passes in the TableMetadata to use for the Table
70    pub fn metadata<T: Into<TableMetadataRef>>(mut self, metadata: T) -> Self {
71        self.metadata = Some(metadata.into());
72        self
73    }
74
75    /// required - passes in the TableIdent to use for the Table
76    pub fn identifier(mut self, identifier: TableIdent) -> Self {
77        self.identifier = Some(identifier);
78        self
79    }
80
81    /// specifies if the Table is readonly or not (default not)
82    pub fn readonly(mut self, readonly: bool) -> Self {
83        self.readonly = readonly;
84        self
85    }
86
87    /// specifies if the Table's metadata cache will be disabled,
88    /// so that reads of Manifests and ManifestLists will never
89    /// get cached.
90    pub fn disable_cache(mut self) -> Self {
91        self.disable_cache = true;
92        self
93    }
94
95    /// optionally set a non-default metadata cache size
96    pub fn cache_size_bytes(mut self, cache_size_bytes: u64) -> Self {
97        self.cache_size_bytes = Some(cache_size_bytes);
98        self
99    }
100
101    /// Set the Runtime for this table to use when spawning tasks.
102    pub fn runtime(mut self, runtime: Runtime) -> Self {
103        self.runtime = Some(runtime);
104        self
105    }
106
107    /// build the Table
108    pub fn build(self) -> Result<Table> {
109        let Self {
110            file_io,
111            metadata_location,
112            metadata,
113            identifier,
114            readonly,
115            disable_cache,
116            cache_size_bytes,
117            runtime,
118        } = self;
119
120        let Some(file_io) = file_io else {
121            return Err(Error::new(
122                ErrorKind::DataInvalid,
123                "FileIO must be provided with TableBuilder.file_io()",
124            ));
125        };
126
127        let Some(metadata) = metadata else {
128            return Err(Error::new(
129                ErrorKind::DataInvalid,
130                "TableMetadataRef must be provided with TableBuilder.metadata()",
131            ));
132        };
133
134        let Some(identifier) = identifier else {
135            return Err(Error::new(
136                ErrorKind::DataInvalid,
137                "TableIdent must be provided with TableBuilder.identifier()",
138            ));
139        };
140
141        let Some(runtime) = runtime else {
142            return Err(Error::new(
143                ErrorKind::DataInvalid,
144                "Runtime must be provided with TableBuilder.runtime()",
145            ));
146        };
147
148        let object_cache = if disable_cache {
149            Arc::new(ObjectCache::with_disabled_cache(file_io.clone()))
150        } else if let Some(cache_size_bytes) = cache_size_bytes {
151            Arc::new(ObjectCache::new_with_capacity(
152                file_io.clone(),
153                cache_size_bytes,
154            ))
155        } else {
156            Arc::new(ObjectCache::new(file_io.clone()))
157        };
158
159        Ok(Table {
160            file_io,
161            metadata_location,
162            metadata,
163            identifier,
164            readonly,
165            object_cache,
166            runtime,
167        })
168    }
169}
170
171/// Table represents a table in the catalog.
172#[derive(Debug, Clone)]
173pub struct Table {
174    file_io: FileIO,
175    metadata_location: Option<String>,
176    metadata: TableMetadataRef,
177    identifier: TableIdent,
178    readonly: bool,
179    object_cache: Arc<ObjectCache>,
180    runtime: Runtime,
181}
182
183impl Table {
184    /// Sets the [`Table`] metadata and returns an updated instance with the new metadata applied.
185    pub(crate) fn with_metadata(mut self, metadata: TableMetadataRef) -> Self {
186        self.metadata = metadata;
187        self
188    }
189
190    /// Sets the [`Table`] metadata location and returns an updated instance.
191    pub(crate) fn with_metadata_location(mut self, metadata_location: String) -> Self {
192        self.metadata_location = Some(metadata_location);
193        self
194    }
195
196    /// Returns a TableBuilder to build a table
197    pub fn builder() -> TableBuilder {
198        TableBuilder::new()
199    }
200
201    /// Returns table identifier.
202    pub fn identifier(&self) -> &TableIdent {
203        &self.identifier
204    }
205    /// Returns current metadata.
206    pub fn metadata(&self) -> &TableMetadata {
207        &self.metadata
208    }
209
210    /// Returns current metadata ref.
211    pub fn metadata_ref(&self) -> TableMetadataRef {
212        self.metadata.clone()
213    }
214
215    /// Returns current metadata location.
216    pub fn metadata_location(&self) -> Option<&str> {
217        self.metadata_location.as_deref()
218    }
219
220    /// Returns current metadata location in a result.
221    pub fn metadata_location_result(&self) -> Result<&str> {
222        self.metadata_location.as_deref().ok_or(Error::new(
223            ErrorKind::DataInvalid,
224            format!(
225                "Metadata location does not exist for table: {}",
226                self.identifier
227            ),
228        ))
229    }
230
231    /// Returns file io used in this table.
232    pub fn file_io(&self) -> &FileIO {
233        &self.file_io
234    }
235
236    /// Returns this table's object cache
237    pub(crate) fn object_cache(&self) -> Arc<ObjectCache> {
238        self.object_cache.clone()
239    }
240
241    /// Creates a table scan.
242    pub fn scan(&self) -> TableScanBuilder<'_> {
243        TableScanBuilder::new(self)
244    }
245
246    /// Creates a metadata table which provides table-like APIs for inspecting metadata.
247    /// See [`MetadataTable`] for more details.
248    pub fn inspect(&self) -> MetadataTable<'_> {
249        MetadataTable::new(self)
250    }
251
252    /// Returns the [`Runtime`] for this table.
253    pub(crate) fn runtime(&self) -> &Runtime {
254        &self.runtime
255    }
256
257    /// Returns the flag indicating whether the `Table` is readonly or not
258    pub fn readonly(&self) -> bool {
259        self.readonly
260    }
261
262    /// Returns the current schema as a shared reference.
263    pub fn current_schema_ref(&self) -> SchemaRef {
264        self.metadata.current_schema().clone()
265    }
266
267    /// Creates a [`ManifestListReader`] for the given snapshot.
268    pub fn manifest_list_reader(&self, snapshot: &SnapshotRef) -> ManifestListReader {
269        ManifestListReader::new(
270            snapshot.clone(),
271            self.file_io.clone(),
272            self.metadata.clone(),
273        )
274    }
275
276    /// Create a reader for the table.
277    pub fn reader_builder(&self) -> ArrowReaderBuilder {
278        ArrowReaderBuilder::new(self.file_io.clone(), self.runtime().clone())
279    }
280}
281
282/// `StaticTable` is a read-only table struct that can be created from a metadata file or from `TableMetaData` without a catalog.
283/// It can only be used to read metadata and for table scan.
284/// # Examples
285///
286/// ```rust, no_run
287/// # use iceberg::io::FileIO;
288/// # use iceberg::table::StaticTable;
289/// # use iceberg::TableIdent;
290/// # async fn example() {
291/// let metadata_file_location = "s3://bucket_name/path/to/metadata.json";
292/// let file_io = FileIO::new_with_fs();
293/// let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
294/// let static_table =
295///     StaticTable::from_metadata_file(&metadata_file_location, static_identifier, file_io)
296///         .await
297///         .unwrap();
298/// let snapshot_id = static_table
299///     .metadata()
300///     .current_snapshot()
301///     .unwrap()
302///     .snapshot_id();
303/// # }
304/// ```
305#[derive(Debug, Clone)]
306pub struct StaticTable(Table);
307
308impl StaticTable {
309    /// Creates a static table from a given `TableMetadata` and `FileIO`
310    pub async fn from_metadata(
311        metadata: TableMetadata,
312        table_ident: TableIdent,
313        file_io: FileIO,
314    ) -> Result<Self> {
315        let table = Table::builder()
316            .metadata(metadata)
317            .identifier(table_ident)
318            .file_io(file_io.clone())
319            .runtime(Runtime::try_current()?)
320            .readonly(true)
321            .build();
322
323        Ok(Self(table?))
324    }
325    /// Creates a static table directly from metadata file and `FileIO`
326    pub async fn from_metadata_file(
327        metadata_location: &str,
328        table_ident: TableIdent,
329        file_io: FileIO,
330    ) -> Result<Self> {
331        let metadata = TableMetadata::read_from(&file_io, metadata_location).await?;
332
333        let table = Table::builder()
334            .metadata(metadata)
335            .metadata_location(metadata_location)
336            .identifier(table_ident)
337            .file_io(file_io.clone())
338            .runtime(Runtime::try_current()?)
339            .readonly(true)
340            .build();
341
342        Ok(Self(table?))
343    }
344
345    /// Create a TableScanBuilder for the static table.
346    pub fn scan(&self) -> TableScanBuilder<'_> {
347        self.0.scan()
348    }
349
350    /// Get TableMetadataRef for the static table
351    pub fn metadata(&self) -> TableMetadataRef {
352        self.0.metadata_ref()
353    }
354
355    /// Consumes the `StaticTable` and return it as a `Table`
356    /// Please use this method carefully as the Table it returns remains detached from a catalog
357    /// and can't be used to perform modifications on the table.
358    pub fn into_table(self) -> Table {
359        self.0
360    }
361
362    /// Create a reader for the table.
363    pub fn reader_builder(&self) -> ArrowReaderBuilder {
364        self.0.reader_builder()
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    #[tokio::test]
373    async fn test_static_table_from_file() {
374        let metadata_file_name = "TableMetadataV2Valid.json";
375        let metadata_file_path = format!(
376            "{}/testdata/table_metadata/{}",
377            env!("CARGO_MANIFEST_DIR"),
378            metadata_file_name
379        );
380        let file_io = FileIO::new_with_fs();
381        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
382        let static_table =
383            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
384                .await
385                .unwrap();
386        let snapshot_id = static_table
387            .metadata()
388            .current_snapshot()
389            .unwrap()
390            .snapshot_id();
391        assert_eq!(
392            snapshot_id, 3055729675574597004,
393            "snapshot id from metadata don't match"
394        );
395    }
396
397    #[tokio::test]
398    async fn test_static_into_table() {
399        let metadata_file_name = "TableMetadataV2Valid.json";
400        let metadata_file_path = format!(
401            "{}/testdata/table_metadata/{}",
402            env!("CARGO_MANIFEST_DIR"),
403            metadata_file_name
404        );
405        let file_io = FileIO::new_with_fs();
406        let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap();
407        let static_table =
408            StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io)
409                .await
410                .unwrap();
411        let table = static_table.into_table();
412        assert!(table.readonly());
413        assert_eq!(table.identifier.name(), "static_table");
414        assert_eq!(
415            table.metadata_location(),
416            Some(metadata_file_path).as_deref()
417        );
418    }
419
420    #[tokio::test]
421    async fn test_table_readonly_flag() {
422        let metadata_file_name = "TableMetadataV2Valid.json";
423        let metadata_file_path = format!(
424            "{}/testdata/table_metadata/{}",
425            env!("CARGO_MANIFEST_DIR"),
426            metadata_file_name
427        );
428        let file_io = FileIO::new_with_fs();
429        let metadata_file = file_io.new_input(metadata_file_path).unwrap();
430        let metadata_file_content = metadata_file.read().await.unwrap();
431        let table_metadata =
432            serde_json::from_slice::<TableMetadata>(&metadata_file_content).unwrap();
433        let static_identifier = TableIdent::from_strs(["ns", "table"]).unwrap();
434        let table = Table::builder()
435            .metadata(table_metadata)
436            .identifier(static_identifier)
437            .file_io(file_io.clone())
438            .runtime(Runtime::try_current().unwrap())
439            .build()
440            .unwrap();
441        assert!(!table.readonly());
442        assert_eq!(table.identifier.name(), "table");
443    }
444}