iceberg/io/config/
s3.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Amazon S3 storage configuration.
19//!
20//! This module provides configuration constants and types for Amazon S3 storage.
21//! These are based on the [Iceberg S3 FileIO configuration](https://py.iceberg.apache.org/configuration/#s3).
22
23use serde::{Deserialize, Serialize};
24use typed_builder::TypedBuilder;
25
26use super::StorageConfig;
27use crate::io::is_truthy;
28use crate::{Error, ErrorKind, Result};
29
30/// S3 endpoint URL.
31pub const S3_ENDPOINT: &str = "s3.endpoint";
32/// S3 access key ID.
33pub const S3_ACCESS_KEY_ID: &str = "s3.access-key-id";
34/// S3 secret access key.
35pub const S3_SECRET_ACCESS_KEY: &str = "s3.secret-access-key";
36/// S3 session token (required when using temporary credentials).
37pub const S3_SESSION_TOKEN: &str = "s3.session-token";
38/// S3 region.
39pub const S3_REGION: &str = "s3.region";
40/// Region to use for the S3 client (takes precedence over [`S3_REGION`]).
41pub const CLIENT_REGION: &str = "client.region";
42/// S3 Path Style Access.
43pub const S3_PATH_STYLE_ACCESS: &str = "s3.path-style-access";
44/// S3 Server Side Encryption Type.
45pub const S3_SSE_TYPE: &str = "s3.sse.type";
46/// S3 Server Side Encryption Key.
47/// If S3 encryption type is kms, input is a KMS Key ID.
48/// In case this property is not set, default key "aws/s3" is used.
49/// If encryption type is custom, input is a custom base-64 AES256 symmetric key.
50pub const S3_SSE_KEY: &str = "s3.sse.key";
51/// S3 Server Side Encryption MD5.
52pub const S3_SSE_MD5: &str = "s3.sse.md5";
53/// If set, all AWS clients will assume a role of the given ARN, instead of using the default
54/// credential chain.
55pub const S3_ASSUME_ROLE_ARN: &str = "client.assume-role.arn";
56/// Optional external ID used to assume an IAM role.
57pub const S3_ASSUME_ROLE_EXTERNAL_ID: &str = "client.assume-role.external-id";
58/// Optional session name used to assume an IAM role.
59pub const S3_ASSUME_ROLE_SESSION_NAME: &str = "client.assume-role.session-name";
60/// Option to skip signing requests (e.g. for public buckets/folders).
61pub const S3_ALLOW_ANONYMOUS: &str = "s3.allow-anonymous";
62/// Option to skip loading the credential from EC2 metadata (typically used in conjunction with
63/// `S3_ALLOW_ANONYMOUS`).
64pub const S3_DISABLE_EC2_METADATA: &str = "s3.disable-ec2-metadata";
65/// Option to skip loading configuration from config file and the env.
66pub const S3_DISABLE_CONFIG_LOAD: &str = "s3.disable-config-load";
67
68/// Amazon S3 storage configuration.
69///
70/// This struct contains all the configuration options for connecting to Amazon S3.
71/// Use the builder pattern via `S3Config::builder()` to construct instances.
72/// ```
73#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
74pub struct S3Config {
75    /// S3 endpoint URL.
76    #[builder(default, setter(strip_option, into))]
77    pub endpoint: Option<String>,
78    /// S3 access key ID.
79    #[builder(default, setter(strip_option, into))]
80    pub access_key_id: Option<String>,
81    /// S3 secret access key.
82    #[builder(default, setter(strip_option, into))]
83    pub secret_access_key: Option<String>,
84    /// S3 session token.
85    #[builder(default, setter(strip_option, into))]
86    pub session_token: Option<String>,
87    /// S3 region.
88    #[builder(default, setter(strip_option, into))]
89    pub region: Option<String>,
90    /// Enable virtual host style (opposite of path style access).
91    #[builder(default)]
92    pub enable_virtual_host_style: bool,
93    /// Server side encryption type.
94    #[builder(default, setter(strip_option, into))]
95    pub server_side_encryption: Option<String>,
96    /// Server side encryption AWS KMS key ID.
97    #[builder(default, setter(strip_option, into))]
98    pub server_side_encryption_aws_kms_key_id: Option<String>,
99    /// Server side encryption customer algorithm.
100    #[builder(default, setter(strip_option, into))]
101    pub server_side_encryption_customer_algorithm: Option<String>,
102    /// Server side encryption customer key.
103    #[builder(default, setter(strip_option, into))]
104    pub server_side_encryption_customer_key: Option<String>,
105    /// Server side encryption customer key MD5.
106    #[builder(default, setter(strip_option, into))]
107    pub server_side_encryption_customer_key_md5: Option<String>,
108    /// Role ARN for assuming a role.
109    #[builder(default, setter(strip_option, into))]
110    pub role_arn: Option<String>,
111    /// External ID for assuming a role.
112    #[builder(default, setter(strip_option, into))]
113    pub external_id: Option<String>,
114    /// Session name for assuming a role.
115    #[builder(default, setter(strip_option, into))]
116    pub role_session_name: Option<String>,
117    /// Allow anonymous access.
118    #[builder(default)]
119    pub allow_anonymous: bool,
120    /// Disable EC2 metadata.
121    #[builder(default)]
122    pub disable_ec2_metadata: bool,
123    /// Disable config load.
124    #[builder(default)]
125    pub disable_config_load: bool,
126}
127
128impl TryFrom<&StorageConfig> for S3Config {
129    type Error = crate::Error;
130
131    fn try_from(config: &StorageConfig) -> Result<Self> {
132        let props = config.props();
133
134        let mut cfg = S3Config::default();
135
136        if let Some(endpoint) = props.get(S3_ENDPOINT) {
137            cfg.endpoint = Some(endpoint.clone());
138        }
139        if let Some(access_key_id) = props.get(S3_ACCESS_KEY_ID) {
140            cfg.access_key_id = Some(access_key_id.clone());
141        }
142        if let Some(secret_access_key) = props.get(S3_SECRET_ACCESS_KEY) {
143            cfg.secret_access_key = Some(secret_access_key.clone());
144        }
145        if let Some(session_token) = props.get(S3_SESSION_TOKEN) {
146            cfg.session_token = Some(session_token.clone());
147        }
148        if let Some(region) = props.get(S3_REGION) {
149            cfg.region = Some(region.clone());
150        }
151        // CLIENT_REGION takes precedence over S3_REGION
152        if let Some(region) = props.get(CLIENT_REGION) {
153            cfg.region = Some(region.clone());
154        }
155        if let Some(path_style_access) = props.get(S3_PATH_STYLE_ACCESS) {
156            cfg.enable_virtual_host_style = !is_truthy(path_style_access.to_lowercase().as_str());
157        }
158        if let Some(arn) = props.get(S3_ASSUME_ROLE_ARN) {
159            cfg.role_arn = Some(arn.clone());
160        }
161        if let Some(external_id) = props.get(S3_ASSUME_ROLE_EXTERNAL_ID) {
162            cfg.external_id = Some(external_id.clone());
163        }
164        if let Some(session_name) = props.get(S3_ASSUME_ROLE_SESSION_NAME) {
165            cfg.role_session_name = Some(session_name.clone());
166        }
167
168        // Handle SSE configuration
169        let s3_sse_key = props.get(S3_SSE_KEY).cloned();
170        if let Some(sse_type) = props.get(S3_SSE_TYPE) {
171            match sse_type.to_lowercase().as_str() {
172                // No Server Side Encryption
173                "none" => {}
174                // S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html
175                "s3" => {
176                    cfg.server_side_encryption = Some("AES256".to_string());
177                }
178                // S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html
179                "kms" => {
180                    cfg.server_side_encryption = Some("aws:kms".to_string());
181                    cfg.server_side_encryption_aws_kms_key_id = s3_sse_key;
182                }
183                // S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html
184                "custom" => {
185                    cfg.server_side_encryption_customer_algorithm = Some("AES256".to_string());
186                    cfg.server_side_encryption_customer_key = s3_sse_key;
187                    cfg.server_side_encryption_customer_key_md5 = props.get(S3_SSE_MD5).cloned();
188                }
189                _ => {
190                    return Err(Error::new(
191                        ErrorKind::DataInvalid,
192                        format!(
193                            "Invalid {S3_SSE_TYPE}: {sse_type}. Expected one of (custom, kms, s3, none)"
194                        ),
195                    ));
196                }
197            }
198        }
199
200        if let Some(allow_anonymous) = props.get(S3_ALLOW_ANONYMOUS)
201            && is_truthy(allow_anonymous.to_lowercase().as_str())
202        {
203            cfg.allow_anonymous = true;
204        }
205        if let Some(disable_ec2_metadata) = props.get(S3_DISABLE_EC2_METADATA)
206            && is_truthy(disable_ec2_metadata.to_lowercase().as_str())
207        {
208            cfg.disable_ec2_metadata = true;
209        }
210        if let Some(disable_config_load) = props.get(S3_DISABLE_CONFIG_LOAD)
211            && is_truthy(disable_config_load.to_lowercase().as_str())
212        {
213            cfg.disable_config_load = true;
214        }
215
216        Ok(cfg)
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223
224    #[test]
225    fn test_s3_config_builder() {
226        let config = S3Config::builder()
227            .region("us-east-1")
228            .access_key_id("my-access-key")
229            .secret_access_key("my-secret-key")
230            .endpoint("http://localhost:9000")
231            .build();
232
233        assert_eq!(config.region.as_deref(), Some("us-east-1"));
234        assert_eq!(config.access_key_id.as_deref(), Some("my-access-key"));
235        assert_eq!(config.secret_access_key.as_deref(), Some("my-secret-key"));
236        assert_eq!(config.endpoint.as_deref(), Some("http://localhost:9000"));
237    }
238
239    #[test]
240    fn test_s3_config_from_storage_config() {
241        let storage_config = StorageConfig::new()
242            .with_prop(S3_REGION, "us-east-1")
243            .with_prop(S3_ACCESS_KEY_ID, "my-access-key")
244            .with_prop(S3_SECRET_ACCESS_KEY, "my-secret-key")
245            .with_prop(S3_ENDPOINT, "http://localhost:9000");
246
247        let s3_config = S3Config::try_from(&storage_config).unwrap();
248
249        assert_eq!(s3_config.region.as_deref(), Some("us-east-1"));
250        assert_eq!(s3_config.access_key_id.as_deref(), Some("my-access-key"));
251        assert_eq!(
252            s3_config.secret_access_key.as_deref(),
253            Some("my-secret-key")
254        );
255        assert_eq!(s3_config.endpoint.as_deref(), Some("http://localhost:9000"));
256    }
257
258    #[test]
259    fn test_s3_config_client_region_precedence() {
260        let storage_config = StorageConfig::new()
261            .with_prop(S3_REGION, "us-east-1")
262            .with_prop(CLIENT_REGION, "eu-west-1");
263
264        let s3_config = S3Config::try_from(&storage_config).unwrap();
265
266        // CLIENT_REGION should take precedence
267        assert_eq!(s3_config.region.as_deref(), Some("eu-west-1"));
268    }
269
270    #[test]
271    fn test_s3_config_path_style_access() {
272        let storage_config = StorageConfig::new().with_prop(S3_PATH_STYLE_ACCESS, "true");
273
274        let s3_config = S3Config::try_from(&storage_config).unwrap();
275
276        // path style access = true means virtual host style = false
277        assert!(!s3_config.enable_virtual_host_style);
278    }
279
280    #[test]
281    fn test_s3_config_sse_kms() {
282        let storage_config = StorageConfig::new()
283            .with_prop(S3_SSE_TYPE, "kms")
284            .with_prop(S3_SSE_KEY, "my-kms-key-id");
285
286        let s3_config = S3Config::try_from(&storage_config).unwrap();
287
288        assert_eq!(s3_config.server_side_encryption.as_deref(), Some("aws:kms"));
289        assert_eq!(
290            s3_config.server_side_encryption_aws_kms_key_id.as_deref(),
291            Some("my-kms-key-id")
292        );
293    }
294
295    #[test]
296    fn test_s3_config_allow_anonymous() {
297        let storage_config = StorageConfig::new().with_prop(S3_ALLOW_ANONYMOUS, "true");
298
299        let s3_config = S3Config::try_from(&storage_config).unwrap();
300
301        assert!(s3_config.allow_anonymous);
302    }
303}