iceberg/io/storage/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Storage interfaces for Iceberg.
19
20mod config;
21mod local_fs;
22mod memory;
23
24use std::fmt::Debug;
25use std::sync::Arc;
26
27use async_trait::async_trait;
28use bytes::Bytes;
29pub use config::*;
30use futures::stream::BoxStream;
31pub use local_fs::{LocalFsStorage, LocalFsStorageFactory};
32pub use memory::{MemoryStorage, MemoryStorageFactory};
33
34use super::{FileMetadata, FileRead, FileWrite, InputFile, OutputFile};
35use crate::Result;
36
37/// Trait for storage operations in Iceberg.
38///
39/// The trait supports serialization via `typetag`, allowing storage instances to be
40/// serialized and deserialized across process boundaries.
41///
42/// Third-party implementations can implement this trait to provide custom storage backends.
43///
44/// # Implementing Custom Storage
45///
46/// To implement a custom storage backend:
47///
48/// 1. Create a struct that implements this trait
49/// 2. Add `#[typetag::serde]` attribute for serialization support
50/// 3. Implement all required methods
51///
52/// # Example
53///
54/// ```rust,ignore
55/// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
56/// struct MyStorage {
57///     // custom fields
58/// }
59///
60/// #[async_trait]
61/// #[typetag::serde]
62/// impl Storage for MyStorage {
63///     async fn exists(&self, path: &str) -> Result<bool> {
64///         // implementation
65///         todo!()
66///     }
67///     // ... implement other methods
68/// }
69/// ```
70#[async_trait]
71#[typetag::serde(tag = "type")]
72pub trait Storage: Debug + Send + Sync {
73    /// Check if a file exists at the given path
74    async fn exists(&self, path: &str) -> Result<bool>;
75
76    /// Get metadata from an input path
77    async fn metadata(&self, path: &str) -> Result<FileMetadata>;
78
79    /// Read bytes from a path
80    async fn read(&self, path: &str) -> Result<Bytes>;
81
82    /// Get FileRead from a path
83    async fn reader(&self, path: &str) -> Result<Box<dyn FileRead>>;
84
85    /// Write bytes to an output path
86    async fn write(&self, path: &str, bs: Bytes) -> Result<()>;
87
88    /// Get FileWrite from a path
89    async fn writer(&self, path: &str) -> Result<Box<dyn FileWrite>>;
90
91    /// Delete a file at the given path
92    async fn delete(&self, path: &str) -> Result<()>;
93
94    /// Delete all files with the given prefix
95    async fn delete_prefix(&self, path: &str) -> Result<()>;
96
97    /// Delete multiple files from a stream of paths.
98    async fn delete_stream(&self, paths: BoxStream<'static, String>) -> Result<()>;
99
100    /// Create a new input file for reading
101    fn new_input(&self, path: &str) -> Result<InputFile>;
102
103    /// Create a new output file for writing
104    fn new_output(&self, path: &str) -> Result<OutputFile>;
105}
106
107/// Factory for creating Storage instances from configuration.
108///
109/// Implement this trait to provide custom storage backends. The factory pattern
110/// allows for lazy initialization of storage instances and enables users to
111/// inject custom storage implementations into catalogs.
112///
113/// # Example
114///
115/// ```rust,ignore
116/// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
117/// struct MyCustomStorageFactory {
118///     // custom configuration
119/// }
120///
121/// #[typetag::serde]
122/// impl StorageFactory for MyCustomStorageFactory {
123///     fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>> {
124///         // Create and return custom storage implementation
125///         todo!()
126///     }
127/// }
128/// ```
129#[typetag::serde(tag = "type")]
130pub trait StorageFactory: Debug + Send + Sync {
131    /// Build a new Storage instance from the given configuration.
132    ///
133    /// # Arguments
134    ///
135    /// * `config` - The storage configuration containing scheme and properties
136    ///
137    /// # Returns
138    ///
139    /// A `Result` containing an `Arc<dyn Storage>` on success, or an error
140    /// if the storage could not be created.
141    fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>>;
142}