iceberg/io/storage/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Storage interfaces for Iceberg.
19
20mod config;
21mod local_fs;
22mod memory;
23mod opendal;
24
25use std::fmt::Debug;
26use std::sync::Arc;
27
28use async_trait::async_trait;
29use bytes::Bytes;
30pub use config::*;
31pub use local_fs::{LocalFsStorage, LocalFsStorageFactory};
32pub use memory::{MemoryStorage, MemoryStorageFactory};
33#[cfg(feature = "storage-s3")]
34pub use opendal::CustomAwsCredentialLoader;
35pub use opendal::{OpenDalStorage, OpenDalStorageFactory};
36
37use super::{FileMetadata, FileRead, FileWrite, InputFile, OutputFile};
38use crate::Result;
39
40/// Trait for storage operations in Iceberg.
41///
42/// The trait supports serialization via `typetag`, allowing storage instances to be
43/// serialized and deserialized across process boundaries.
44///
45/// Third-party implementations can implement this trait to provide custom storage backends.
46///
47/// # Implementing Custom Storage
48///
49/// To implement a custom storage backend:
50///
51/// 1. Create a struct that implements this trait
52/// 2. Add `#[typetag::serde]` attribute for serialization support
53/// 3. Implement all required methods
54///
55/// # Example
56///
57/// ```rust,ignore
58/// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
59/// struct MyStorage {
60/// // custom fields
61/// }
62///
63/// #[async_trait]
64/// #[typetag::serde]
65/// impl Storage for MyStorage {
66/// async fn exists(&self, path: &str) -> Result<bool> {
67/// // implementation
68/// todo!()
69/// }
70/// // ... implement other methods
71/// }
72///
73/// TODO remove below when the trait is integrated with FileIO and Catalog
74/// # NOTE
75/// This trait is under heavy development and is not used anywhere as of now
76/// Please DO NOT implement it
77/// ```
78#[async_trait]
79#[typetag::serde(tag = "type")]
80pub trait Storage: Debug + Send + Sync {
81 /// Check if a file exists at the given path
82 async fn exists(&self, path: &str) -> Result<bool>;
83
84 /// Get metadata from an input path
85 async fn metadata(&self, path: &str) -> Result<FileMetadata>;
86
87 /// Read bytes from a path
88 async fn read(&self, path: &str) -> Result<Bytes>;
89
90 /// Get FileRead from a path
91 async fn reader(&self, path: &str) -> Result<Box<dyn FileRead>>;
92
93 /// Write bytes to an output path
94 async fn write(&self, path: &str, bs: Bytes) -> Result<()>;
95
96 /// Get FileWrite from a path
97 async fn writer(&self, path: &str) -> Result<Box<dyn FileWrite>>;
98
99 /// Delete a file at the given path
100 async fn delete(&self, path: &str) -> Result<()>;
101
102 /// Delete all files with the given prefix
103 async fn delete_prefix(&self, path: &str) -> Result<()>;
104
105 /// Create a new input file for reading
106 fn new_input(&self, path: &str) -> Result<InputFile>;
107
108 /// Create a new output file for writing
109 fn new_output(&self, path: &str) -> Result<OutputFile>;
110}
111
112/// Factory for creating Storage instances from configuration.
113///
114/// Implement this trait to provide custom storage backends. The factory pattern
115/// allows for lazy initialization of storage instances and enables users to
116/// inject custom storage implementations into catalogs.
117///
118/// # Example
119///
120/// ```rust,ignore
121/// #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
122/// struct MyCustomStorageFactory {
123/// // custom configuration
124/// }
125///
126/// #[typetag::serde]
127/// impl StorageFactory for MyCustomStorageFactory {
128/// fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>> {
129/// // Create and return custom storage implementation
130/// todo!()
131/// }
132/// }
133///
134/// TODO remove below when the trait is integrated with FileIO and Catalog
135/// # NOTE
136/// This trait is under heavy development and is not used anywhere as of now
137/// Please DO NOT implement it
138/// ```
139#[typetag::serde(tag = "type")]
140pub trait StorageFactory: Debug + Send + Sync {
141 /// Build a new Storage instance from the given configuration.
142 ///
143 /// # Arguments
144 ///
145 /// * `config` - The storage configuration containing scheme and properties
146 ///
147 /// # Returns
148 ///
149 /// A `Result` containing an `Arc<dyn Storage>` on success, or an error
150 /// if the storage could not be created.
151 fn build(&self, config: &StorageConfig) -> Result<Arc<dyn Storage>>;
152}