iceberg/encryption/
crypto.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Core cryptographic operations for Iceberg encryption.
19
20use std::fmt;
21use std::str::FromStr;
22
23use aes_gcm::aead::generic_array::typenum::U12;
24use aes_gcm::aead::rand_core::RngCore;
25use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload};
26use aes_gcm::{Aes128Gcm, Aes256Gcm, AesGcm, Nonce};
27use zeroize::Zeroizing;
28
29/// AES-192-GCM with 96-bit nonce. Not provided by `aes-gcm` but constructible
30/// from the underlying primitives, same as `Aes128Gcm` and `Aes256Gcm`.
31type Aes192Gcm = AesGcm<aes_gcm::aes::Aes192, U12>;
32
33use crate::{Error, ErrorKind, Result};
34
35/// Wrapper for sensitive byte data (encryption keys, DEKs, etc.) that:
36/// - Zeroizes memory on drop
37/// - Redacts content in [`Debug`] and [`Display`] output
38/// - Provides only `&[u8]` access via [`as_bytes()`](Self::as_bytes)
39/// - Uses `Box<[u8]>` (immutable boxed slice) since key bytes never grow
40///
41/// Use this type for any struct field that holds plaintext key material.
42/// Because its [`Debug`] impl always prints `[N bytes REDACTED]`, structs
43/// containing `SensitiveBytes` can safely derive or implement `Debug`
44/// without risk of leaking key material.
45#[derive(Clone, PartialEq, Eq)]
46struct SensitiveBytes(Zeroizing<Box<[u8]>>);
47
48impl SensitiveBytes {
49    /// Wraps the given bytes as sensitive material.
50    pub fn new(bytes: impl Into<Box<[u8]>>) -> Self {
51        Self(Zeroizing::new(bytes.into()))
52    }
53
54    /// Returns the underlying bytes.
55    pub fn as_bytes(&self) -> &[u8] {
56        &self.0
57    }
58
59    /// Returns the number of bytes.
60    #[allow(dead_code)] // Encryption work is ongoing so currently unused
61    pub fn len(&self) -> usize {
62        self.0.len()
63    }
64
65    /// Returns `true` if the byte slice is empty.
66    #[allow(dead_code)] // Encryption work is ongoing so currently unused
67    pub fn is_empty(&self) -> bool {
68        self.0.is_empty()
69    }
70}
71
72impl fmt::Debug for SensitiveBytes {
73    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
74        write!(f, "[{} bytes REDACTED]", self.0.len())
75    }
76}
77
78impl fmt::Display for SensitiveBytes {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        write!(f, "[{} bytes REDACTED]", self.0.len())
81    }
82}
83
84/// Supported AES key sizes for AES-GCM encryption.
85///
86/// The Iceberg spec supports 128, 192, and 256-bit keys for AES-GCM.
87/// See: <https://iceberg.apache.org/gcm-stream-spec/#goals>
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89pub enum AesKeySize {
90    /// 128-bit AES key (16 bytes)
91    Bits128 = 128,
92    /// 192-bit AES key (24 bytes)
93    Bits192 = 192,
94    /// 256-bit AES key (32 bytes)
95    Bits256 = 256,
96}
97
98impl AesKeySize {
99    /// Returns the key length in bytes for this key size.
100    pub fn key_length(&self) -> usize {
101        match self {
102            Self::Bits128 => 16,
103            Self::Bits192 => 24,
104            Self::Bits256 => 32,
105        }
106    }
107
108    /// Returns the key size for a given DEK length in bytes.
109    ///
110    /// Matches Java's `encryption.data-key-length` property semantics:
111    /// 16 → 128-bit, 24 → 192-bit, 32 → 256-bit.
112    pub fn from_key_length(len: usize) -> Result<Self> {
113        match len {
114            16 => Ok(Self::Bits128),
115            24 => Ok(Self::Bits192),
116            32 => Ok(Self::Bits256),
117            _ => Err(Error::new(
118                ErrorKind::FeatureUnsupported,
119                format!("Unsupported data key length: {len} (must be 16, 24, or 32)"),
120            )),
121        }
122    }
123}
124
125impl FromStr for AesKeySize {
126    type Err = Error;
127
128    fn from_str(s: &str) -> Result<Self> {
129        match s {
130            "128" | "AES_GCM_128" | "AES128_GCM" => Ok(Self::Bits128),
131            "192" | "AES_GCM_192" | "AES192_GCM" => Ok(Self::Bits192),
132            "256" | "AES_GCM_256" | "AES256_GCM" => Ok(Self::Bits256),
133            _ => Err(Error::new(
134                ErrorKind::FeatureUnsupported,
135                format!("Unsupported AES key size: {s}"),
136            )),
137        }
138    }
139}
140
141/// A secure encryption key that zeroes its memory on drop.
142pub struct SecureKey {
143    key: SensitiveBytes,
144    key_size: AesKeySize,
145}
146
147impl SecureKey {
148    /// Creates a new secure key with the specified key size.
149    ///
150    /// # Errors
151    /// Returns an error if the key length doesn't match the key size requirements.
152    pub fn new(key: &[u8]) -> Result<Self> {
153        let key_size = AesKeySize::from_key_length(key.len())?;
154        Ok(Self {
155            key: SensitiveBytes::new(key),
156            key_size,
157        })
158    }
159
160    /// Generates a new random key for the specified key size.
161    pub fn generate(key_size: AesKeySize) -> Self {
162        let mut key = vec![0u8; key_size.key_length()];
163        OsRng.fill_bytes(&mut key);
164        Self {
165            key: SensitiveBytes::new(key),
166            key_size,
167        }
168    }
169
170    /// Returns the AES key size.
171    pub fn key_size(&self) -> AesKeySize {
172        self.key_size
173    }
174
175    /// Returns the key bytes.
176    pub fn as_bytes(&self) -> &[u8] {
177        self.key.as_bytes()
178    }
179}
180
181/// AES-GCM cipher for encrypting and decrypting data.
182pub struct AesGcmCipher {
183    key: SensitiveBytes,
184    key_size: AesKeySize,
185}
186
187impl AesGcmCipher {
188    /// AES-GCM nonce length in bytes (96 bits).
189    pub const NONCE_LEN: usize = 12;
190    /// AES-GCM authentication tag length in bytes (128 bits).
191    pub const TAG_LEN: usize = 16;
192
193    /// Creates a new cipher with the specified key.
194    pub fn new(key: SecureKey) -> Self {
195        Self {
196            key: SensitiveBytes::new(key.as_bytes()),
197            key_size: key.key_size(),
198        }
199    }
200
201    /// Encrypts data using AES-GCM.
202    ///
203    /// # Arguments
204    /// * `plaintext` - The data to encrypt
205    /// * `aad` - Additional authenticated data (optional)
206    ///
207    /// # Returns
208    /// The encrypted data in the format: [12-byte nonce][ciphertext][16-byte auth tag]
209    /// This matches the Java implementation format for compatibility.
210    pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>> {
211        match self.key_size {
212            AesKeySize::Bits128 => {
213                encrypt_aes_gcm::<Aes128Gcm>(self.key.as_bytes(), plaintext, aad)
214            }
215            AesKeySize::Bits192 => {
216                encrypt_aes_gcm::<Aes192Gcm>(self.key.as_bytes(), plaintext, aad)
217            }
218            AesKeySize::Bits256 => {
219                encrypt_aes_gcm::<Aes256Gcm>(self.key.as_bytes(), plaintext, aad)
220            }
221        }
222    }
223
224    /// Decrypts data using AES-GCM.
225    ///
226    /// # Arguments
227    /// * `ciphertext` - The encrypted data with format: [12-byte nonce][encrypted data][16-byte auth tag]
228    /// * `aad` - Additional authenticated data (must match encryption)
229    ///
230    /// # Returns
231    /// The decrypted plaintext.
232    pub fn decrypt(&self, ciphertext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>> {
233        if ciphertext.len() < Self::NONCE_LEN + Self::TAG_LEN {
234            return Err(Error::new(
235                ErrorKind::DataInvalid,
236                format!(
237                    "Ciphertext too short: expected at least {} bytes, got {}",
238                    Self::NONCE_LEN + Self::TAG_LEN,
239                    ciphertext.len()
240                ),
241            ));
242        }
243
244        match self.key_size {
245            AesKeySize::Bits128 => {
246                decrypt_aes_gcm::<Aes128Gcm>(self.key.as_bytes(), ciphertext, aad)
247            }
248            AesKeySize::Bits192 => {
249                decrypt_aes_gcm::<Aes192Gcm>(self.key.as_bytes(), ciphertext, aad)
250            }
251            AesKeySize::Bits256 => {
252                decrypt_aes_gcm::<Aes256Gcm>(self.key.as_bytes(), ciphertext, aad)
253            }
254        }
255    }
256}
257
258fn encrypt_aes_gcm<C>(key_bytes: &[u8], plaintext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>>
259where C: Aead + AeadCore + KeyInit {
260    let cipher = C::new_from_slice(key_bytes).map_err(|e| {
261        Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e))
262    })?;
263    let nonce = C::generate_nonce(&mut OsRng);
264
265    let ciphertext = if let Some(aad) = aad {
266        cipher.encrypt(&nonce, Payload {
267            msg: plaintext,
268            aad,
269        })
270    } else {
271        cipher.encrypt(&nonce, plaintext.as_ref())
272    }
273    .map_err(|e| {
274        Error::new(ErrorKind::Unexpected, "AES-GCM encryption failed")
275            .with_source(anyhow::anyhow!(e))
276    })?;
277
278    // Prepend nonce to ciphertext (Java compatible format)
279    let mut result = Vec::with_capacity(nonce.len() + ciphertext.len());
280    result.extend_from_slice(&nonce);
281    result.extend_from_slice(&ciphertext);
282    Ok(result)
283}
284
285fn decrypt_aes_gcm<C>(key_bytes: &[u8], ciphertext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>>
286where C: Aead + AeadCore + KeyInit {
287    let cipher = C::new_from_slice(key_bytes).map_err(|e| {
288        Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e))
289    })?;
290
291    let nonce = Nonce::from_slice(&ciphertext[..AesGcmCipher::NONCE_LEN]);
292    let encrypted_data = &ciphertext[AesGcmCipher::NONCE_LEN..];
293
294    let plaintext = if let Some(aad) = aad {
295        cipher.decrypt(nonce, Payload {
296            msg: encrypted_data,
297            aad,
298        })
299    } else {
300        cipher.decrypt(nonce, encrypted_data)
301    }
302    .map_err(|e| {
303        Error::new(ErrorKind::Unexpected, "AES-GCM decryption failed")
304            .with_source(anyhow::anyhow!(e))
305    })?;
306
307    Ok(plaintext)
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    #[test]
315    fn test_aes_key_size() {
316        assert_eq!(AesKeySize::Bits128.key_length(), 16);
317        assert_eq!(AesKeySize::Bits192.key_length(), 24);
318        assert_eq!(AesKeySize::Bits256.key_length(), 32);
319
320        assert_eq!(
321            AesKeySize::from_key_length(16).unwrap(),
322            AesKeySize::Bits128
323        );
324        assert_eq!(
325            AesKeySize::from_key_length(24).unwrap(),
326            AesKeySize::Bits192
327        );
328        assert_eq!(
329            AesKeySize::from_key_length(32).unwrap(),
330            AesKeySize::Bits256
331        );
332        assert!(AesKeySize::from_key_length(8).is_err());
333
334        assert_eq!(AesKeySize::from_str("128").unwrap(), AesKeySize::Bits128);
335        assert_eq!(
336            AesKeySize::from_str("AES_GCM_128").unwrap(),
337            AesKeySize::Bits128
338        );
339        assert_eq!(
340            AesKeySize::from_str("AES_GCM_256").unwrap(),
341            AesKeySize::Bits256
342        );
343        assert!(AesKeySize::from_str("INVALID").is_err());
344    }
345
346    #[test]
347    fn test_secure_key() {
348        // Test key generation
349        let key1 = SecureKey::generate(AesKeySize::Bits128);
350        assert_eq!(key1.as_bytes().len(), 16);
351        assert_eq!(key1.key_size(), AesKeySize::Bits128);
352
353        // Test key creation with validation
354        let valid_key = [0u8; 16];
355        assert!(SecureKey::new(valid_key.as_slice()).is_ok());
356
357        let invalid_key = [0u8; 33];
358        assert!(SecureKey::new(invalid_key.as_slice()).is_err());
359    }
360
361    #[test]
362    fn test_aes128_gcm_encryption_roundtrip() {
363        let key = SecureKey::generate(AesKeySize::Bits128);
364        let cipher = AesGcmCipher::new(key);
365
366        let plaintext = b"Hello, Iceberg encryption!";
367        let aad = b"additional authenticated data";
368
369        // Test without AAD
370        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
371        assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag
372        assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs
373
374        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
375        assert_eq!(decrypted, plaintext);
376
377        // Test with AAD
378        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
379        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
380        assert_eq!(decrypted, plaintext);
381
382        // Test with wrong AAD fails
383        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
384    }
385
386    #[test]
387    fn test_aes192_gcm_encryption_roundtrip() {
388        let key = SecureKey::generate(AesKeySize::Bits192);
389        let cipher = AesGcmCipher::new(key);
390
391        let plaintext = b"Hello, Iceberg encryption!";
392        let aad = b"additional authenticated data";
393
394        // Test without AAD
395        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
396        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
397        assert_eq!(decrypted, plaintext);
398
399        // Test with AAD
400        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
401        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
402        assert_eq!(decrypted, plaintext);
403
404        // Test with wrong AAD fails
405        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
406    }
407
408    #[test]
409    fn test_aes256_gcm_encryption_roundtrip() {
410        let key = SecureKey::generate(AesKeySize::Bits256);
411        let cipher = AesGcmCipher::new(key);
412
413        let plaintext = b"Hello, Iceberg encryption!";
414        let aad = b"additional authenticated data";
415
416        // Test without AAD
417        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
418        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
419        assert_eq!(decrypted, plaintext);
420
421        // Test with AAD
422        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
423        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
424        assert_eq!(decrypted, plaintext);
425
426        // Test with wrong AAD fails
427        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
428    }
429
430    #[test]
431    fn test_cross_key_size_incompatibility() {
432        let plaintext = b"Cross-key test";
433
434        let key128 = SecureKey::generate(AesKeySize::Bits128);
435        let key256 = SecureKey::generate(AesKeySize::Bits256);
436
437        let cipher128 = AesGcmCipher::new(key128);
438        let cipher256 = AesGcmCipher::new(key256);
439
440        // Ciphertext from 128-bit key should not decrypt with 256-bit key
441        let ciphertext = cipher128.encrypt(plaintext, None).unwrap();
442        assert!(cipher256.decrypt(&ciphertext, None).is_err());
443    }
444
445    #[test]
446    fn test_encryption_with_empty_plaintext() {
447        let key = SecureKey::generate(AesKeySize::Bits128);
448        let cipher = AesGcmCipher::new(key);
449
450        let plaintext = b"";
451        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
452
453        // Even empty plaintext produces nonce + tag
454        assert_eq!(ciphertext.len(), 12 + 16); // 12-byte nonce + 16-byte tag
455
456        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
457        assert_eq!(decrypted, plaintext);
458    }
459
460    #[test]
461    fn test_decryption_with_tampered_ciphertext() {
462        let key = SecureKey::generate(AesKeySize::Bits128);
463        let cipher = AesGcmCipher::new(key);
464
465        let plaintext = b"Sensitive data";
466        let mut ciphertext = cipher.encrypt(plaintext, None).unwrap();
467
468        // Tamper with the encrypted portion (after the nonce)
469        if ciphertext.len() > 12 {
470            ciphertext[12] ^= 0xFF;
471        }
472
473        // Decryption should fail due to authentication tag mismatch
474        assert!(cipher.decrypt(&ciphertext, None).is_err());
475    }
476
477    #[test]
478    fn test_different_keys_produce_different_ciphertexts() {
479        let key1 = SecureKey::generate(AesKeySize::Bits128);
480        let key2 = SecureKey::generate(AesKeySize::Bits128);
481
482        let cipher1 = AesGcmCipher::new(key1);
483        let cipher2 = AesGcmCipher::new(key2);
484
485        let plaintext = b"Same plaintext";
486
487        let ciphertext1 = cipher1.encrypt(plaintext, None).unwrap();
488        let ciphertext2 = cipher2.encrypt(plaintext, None).unwrap();
489
490        // Different keys should produce different ciphertexts (comparing the encrypted portion)
491        // Note: The nonces will also be different, but we're mainly interested in the encrypted data
492        assert_ne!(&ciphertext1[12..], &ciphertext2[12..]);
493    }
494
495    #[test]
496    fn test_ciphertext_format_java_compatible() {
497        // Test that our ciphertext format matches Java's: [12-byte nonce][ciphertext][16-byte tag]
498        let key = SecureKey::generate(AesKeySize::Bits128);
499        let cipher = AesGcmCipher::new(key);
500
501        let plaintext = b"Test data";
502        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
503
504        // Format should be: [12-byte nonce][encrypted_data + 16-byte GCM tag]
505        assert_eq!(
506            ciphertext.len(),
507            12 + plaintext.len() + 16,
508            "Ciphertext should be nonce + plaintext + tag length"
509        );
510
511        // Verify we can decrypt by extracting nonce from the beginning
512        let nonce = &ciphertext[..12];
513        assert_eq!(nonce.len(), 12, "Nonce should be 12 bytes");
514
515        // The rest is encrypted data + tag
516        let encrypted_with_tag = &ciphertext[12..];
517        assert_eq!(
518            encrypted_with_tag.len(),
519            plaintext.len() + 16,
520            "Encrypted portion should be plaintext length + 16-byte tag"
521        );
522    }
523}