iceberg/io/storage/config/
s3.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Amazon S3 storage configuration.
19//!
20//! This module provides configuration constants and types for Amazon S3 storage.
21//! These are based on the [Iceberg S3 FileIO configuration](https://py.iceberg.apache.org/configuration/#s3).
22
23use serde::{Deserialize, Serialize};
24use typed_builder::TypedBuilder;
25
26use super::StorageConfig;
27use crate::io::is_truthy;
28use crate::{Error, ErrorKind, Result};
29
30/// S3 endpoint URL.
31pub const S3_ENDPOINT: &str = "s3.endpoint";
32/// S3 access key ID.
33pub const S3_ACCESS_KEY_ID: &str = "s3.access-key-id";
34/// S3 secret access key.
35pub const S3_SECRET_ACCESS_KEY: &str = "s3.secret-access-key";
36/// S3 session token (required when using temporary credentials).
37pub const S3_SESSION_TOKEN: &str = "s3.session-token";
38/// S3 region.
39pub const S3_REGION: &str = "s3.region";
40/// Region to use for the S3 client (takes precedence over [`S3_REGION`]).
41pub const CLIENT_REGION: &str = "client.region";
42/// S3 Path Style Access.
43pub const S3_PATH_STYLE_ACCESS: &str = "s3.path-style-access";
44/// S3 Server Side Encryption Type.
45pub const S3_SSE_TYPE: &str = "s3.sse.type";
46/// S3 Server Side Encryption Key.
47/// If S3 encryption type is kms, input is a KMS Key ID.
48/// In case this property is not set, default key "aws/s3" is used.
49/// If encryption type is custom, input is a custom base-64 AES256 symmetric key.
50pub const S3_SSE_KEY: &str = "s3.sse.key";
51/// S3 Server Side Encryption MD5.
52pub const S3_SSE_MD5: &str = "s3.sse.md5";
53/// If set, all AWS clients will assume a role of the given ARN, instead of using the default
54/// credential chain.
55pub const S3_ASSUME_ROLE_ARN: &str = "client.assume-role.arn";
56/// Optional external ID used to assume an IAM role.
57pub const S3_ASSUME_ROLE_EXTERNAL_ID: &str = "client.assume-role.external-id";
58/// Optional session name used to assume an IAM role.
59pub const S3_ASSUME_ROLE_SESSION_NAME: &str = "client.assume-role.session-name";
60/// Option to skip signing requests (e.g. for public buckets/folders).
61pub const S3_ALLOW_ANONYMOUS: &str = "s3.allow-anonymous";
62/// Option to skip loading the credential from EC2 metadata (typically used in conjunction with
63/// `S3_ALLOW_ANONYMOUS`).
64pub const S3_DISABLE_EC2_METADATA: &str = "s3.disable-ec2-metadata";
65/// Option to skip loading configuration from config file and the env.
66pub const S3_DISABLE_CONFIG_LOAD: &str = "s3.disable-config-load";
67
68/// Amazon S3 storage configuration.
69///
70/// This struct contains all the configuration options for connecting to Amazon S3.
71/// Use the builder pattern via `S3Config::builder()` to construct instances.
72///
73/// Defaults follow the Iceberg `S3FileIOProperties` spec (see
74/// [`PATH_STYLE_ACCESS_DEFAULT = false`](https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java)),
75/// i.e. virtual-host-style addressing is enabled unless
76/// `s3.path-style-access=true` is explicitly set. This matches what
77/// Java clients do out of the box and is required for a number of
78/// S3-compatible stores that do not support path-style URLs.
79#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
80pub struct S3Config {
81    /// S3 endpoint URL.
82    #[builder(default, setter(strip_option, into))]
83    pub endpoint: Option<String>,
84    /// S3 access key ID.
85    #[builder(default, setter(strip_option, into))]
86    pub access_key_id: Option<String>,
87    /// S3 secret access key.
88    #[builder(default, setter(strip_option, into))]
89    pub secret_access_key: Option<String>,
90    /// S3 session token.
91    #[builder(default, setter(strip_option, into))]
92    pub session_token: Option<String>,
93    /// S3 region.
94    #[builder(default, setter(strip_option, into))]
95    pub region: Option<String>,
96    /// Enable virtual host style (opposite of path style access).
97    ///
98    /// Defaults to `true` to match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`.
99    #[builder(default = true)]
100    pub enable_virtual_host_style: bool,
101    /// Server side encryption type.
102    #[builder(default, setter(strip_option, into))]
103    pub server_side_encryption: Option<String>,
104    /// Server side encryption AWS KMS key ID.
105    #[builder(default, setter(strip_option, into))]
106    pub server_side_encryption_aws_kms_key_id: Option<String>,
107    /// Server side encryption customer algorithm.
108    #[builder(default, setter(strip_option, into))]
109    pub server_side_encryption_customer_algorithm: Option<String>,
110    /// Server side encryption customer key.
111    #[builder(default, setter(strip_option, into))]
112    pub server_side_encryption_customer_key: Option<String>,
113    /// Server side encryption customer key MD5.
114    #[builder(default, setter(strip_option, into))]
115    pub server_side_encryption_customer_key_md5: Option<String>,
116    /// Role ARN for assuming a role.
117    #[builder(default, setter(strip_option, into))]
118    pub role_arn: Option<String>,
119    /// External ID for assuming a role.
120    #[builder(default, setter(strip_option, into))]
121    pub external_id: Option<String>,
122    /// Session name for assuming a role.
123    #[builder(default, setter(strip_option, into))]
124    pub role_session_name: Option<String>,
125    /// Allow anonymous access.
126    #[builder(default)]
127    pub allow_anonymous: bool,
128    /// Disable EC2 metadata.
129    #[builder(default)]
130    pub disable_ec2_metadata: bool,
131    /// Disable config load.
132    #[builder(default)]
133    pub disable_config_load: bool,
134}
135
136impl Default for S3Config {
137    fn default() -> Self {
138        Self::builder().build()
139    }
140}
141
142impl TryFrom<&StorageConfig> for S3Config {
143    type Error = crate::Error;
144
145    fn try_from(config: &StorageConfig) -> Result<Self> {
146        let props = config.props();
147
148        let mut cfg = S3Config::default();
149
150        if let Some(endpoint) = props.get(S3_ENDPOINT) {
151            cfg.endpoint = Some(endpoint.clone());
152        }
153        if let Some(access_key_id) = props.get(S3_ACCESS_KEY_ID) {
154            cfg.access_key_id = Some(access_key_id.clone());
155        }
156        if let Some(secret_access_key) = props.get(S3_SECRET_ACCESS_KEY) {
157            cfg.secret_access_key = Some(secret_access_key.clone());
158        }
159        if let Some(session_token) = props.get(S3_SESSION_TOKEN) {
160            cfg.session_token = Some(session_token.clone());
161        }
162        if let Some(region) = props.get(S3_REGION) {
163            cfg.region = Some(region.clone());
164        }
165        // CLIENT_REGION takes precedence over S3_REGION
166        if let Some(region) = props.get(CLIENT_REGION) {
167            cfg.region = Some(region.clone());
168        }
169        if let Some(path_style_access) = props.get(S3_PATH_STYLE_ACCESS) {
170            cfg.enable_virtual_host_style = !is_truthy(path_style_access.to_lowercase().as_str());
171        }
172        if let Some(arn) = props.get(S3_ASSUME_ROLE_ARN) {
173            cfg.role_arn = Some(arn.clone());
174        }
175        if let Some(external_id) = props.get(S3_ASSUME_ROLE_EXTERNAL_ID) {
176            cfg.external_id = Some(external_id.clone());
177        }
178        if let Some(session_name) = props.get(S3_ASSUME_ROLE_SESSION_NAME) {
179            cfg.role_session_name = Some(session_name.clone());
180        }
181
182        // Handle SSE configuration
183        let s3_sse_key = props.get(S3_SSE_KEY).cloned();
184        if let Some(sse_type) = props.get(S3_SSE_TYPE) {
185            match sse_type.to_lowercase().as_str() {
186                // No Server Side Encryption
187                "none" => {}
188                // S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html
189                "s3" => {
190                    cfg.server_side_encryption = Some("AES256".to_string());
191                }
192                // S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html
193                "kms" => {
194                    cfg.server_side_encryption = Some("aws:kms".to_string());
195                    cfg.server_side_encryption_aws_kms_key_id = s3_sse_key;
196                }
197                // S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html
198                "custom" => {
199                    cfg.server_side_encryption_customer_algorithm = Some("AES256".to_string());
200                    cfg.server_side_encryption_customer_key = s3_sse_key;
201                    cfg.server_side_encryption_customer_key_md5 = props.get(S3_SSE_MD5).cloned();
202                }
203                _ => {
204                    return Err(Error::new(
205                        ErrorKind::DataInvalid,
206                        format!(
207                            "Invalid {S3_SSE_TYPE}: {sse_type}. Expected one of (custom, kms, s3, none)"
208                        ),
209                    ));
210                }
211            }
212        }
213
214        if let Some(allow_anonymous) = props.get(S3_ALLOW_ANONYMOUS)
215            && is_truthy(allow_anonymous.to_lowercase().as_str())
216        {
217            cfg.allow_anonymous = true;
218        }
219        if let Some(disable_ec2_metadata) = props.get(S3_DISABLE_EC2_METADATA)
220            && is_truthy(disable_ec2_metadata.to_lowercase().as_str())
221        {
222            cfg.disable_ec2_metadata = true;
223        }
224        if let Some(disable_config_load) = props.get(S3_DISABLE_CONFIG_LOAD)
225            && is_truthy(disable_config_load.to_lowercase().as_str())
226        {
227            cfg.disable_config_load = true;
228        }
229
230        Ok(cfg)
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn test_s3_config_builder() {
240        let config = S3Config::builder()
241            .region("us-east-1")
242            .access_key_id("my-access-key")
243            .secret_access_key("my-secret-key")
244            .endpoint("http://localhost:9000")
245            .build();
246
247        assert_eq!(config.region.as_deref(), Some("us-east-1"));
248        assert_eq!(config.access_key_id.as_deref(), Some("my-access-key"));
249        assert_eq!(config.secret_access_key.as_deref(), Some("my-secret-key"));
250        assert_eq!(config.endpoint.as_deref(), Some("http://localhost:9000"));
251    }
252
253    #[test]
254    fn test_s3_config_from_storage_config() {
255        let storage_config = StorageConfig::new()
256            .with_prop(S3_REGION, "us-east-1")
257            .with_prop(S3_ACCESS_KEY_ID, "my-access-key")
258            .with_prop(S3_SECRET_ACCESS_KEY, "my-secret-key")
259            .with_prop(S3_ENDPOINT, "http://localhost:9000");
260
261        let s3_config = S3Config::try_from(&storage_config).unwrap();
262
263        assert_eq!(s3_config.region.as_deref(), Some("us-east-1"));
264        assert_eq!(s3_config.access_key_id.as_deref(), Some("my-access-key"));
265        assert_eq!(
266            s3_config.secret_access_key.as_deref(),
267            Some("my-secret-key")
268        );
269        assert_eq!(s3_config.endpoint.as_deref(), Some("http://localhost:9000"));
270    }
271
272    #[test]
273    fn test_s3_config_client_region_precedence() {
274        let storage_config = StorageConfig::new()
275            .with_prop(S3_REGION, "us-east-1")
276            .with_prop(CLIENT_REGION, "eu-west-1");
277
278        let s3_config = S3Config::try_from(&storage_config).unwrap();
279
280        // CLIENT_REGION should take precedence
281        assert_eq!(s3_config.region.as_deref(), Some("eu-west-1"));
282    }
283
284    #[test]
285    fn test_s3_config_default_is_virtual_host_style() {
286        // Matches Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
287        assert!(S3Config::default().enable_virtual_host_style);
288        assert!(
289            S3Config::try_from(&StorageConfig::new())
290                .unwrap()
291                .enable_virtual_host_style
292        );
293    }
294
295    #[test]
296    fn test_s3_config_path_style_access() {
297        let storage_config = StorageConfig::new().with_prop(S3_PATH_STYLE_ACCESS, "true");
298
299        let s3_config = S3Config::try_from(&storage_config).unwrap();
300
301        // path style access = true means virtual host style = false
302        assert!(!s3_config.enable_virtual_host_style);
303    }
304
305    #[test]
306    fn test_s3_config_sse_kms() {
307        let storage_config = StorageConfig::new()
308            .with_prop(S3_SSE_TYPE, "kms")
309            .with_prop(S3_SSE_KEY, "my-kms-key-id");
310
311        let s3_config = S3Config::try_from(&storage_config).unwrap();
312
313        assert_eq!(s3_config.server_side_encryption.as_deref(), Some("aws:kms"));
314        assert_eq!(
315            s3_config.server_side_encryption_aws_kms_key_id.as_deref(),
316            Some("my-kms-key-id")
317        );
318    }
319
320    #[test]
321    fn test_s3_config_allow_anonymous() {
322        let storage_config = StorageConfig::new().with_prop(S3_ALLOW_ANONYMOUS, "true");
323
324        let s3_config = S3Config::try_from(&storage_config).unwrap();
325
326        assert!(s3_config.allow_anonymous);
327    }
328}