iceberg/encryption/
crypto.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Core cryptographic operations for Iceberg encryption.
19
20use std::fmt;
21use std::str::FromStr;
22
23use aes_gcm::aead::generic_array::typenum::U12;
24use aes_gcm::aead::rand_core::RngCore;
25use aes_gcm::aead::{Aead, AeadCore, KeyInit, OsRng, Payload};
26use aes_gcm::{Aes128Gcm, Aes256Gcm, AesGcm, Nonce};
27use zeroize::Zeroizing;
28
29/// AES-192-GCM with 96-bit nonce. Not provided by `aes-gcm` but constructible
30/// from the underlying primitives, same as `Aes128Gcm` and `Aes256Gcm`.
31type Aes192Gcm = AesGcm<aes_gcm::aes::Aes192, U12>;
32
33use crate::{Error, ErrorKind, Result};
34
35/// Wrapper for sensitive byte data (encryption keys, DEKs, etc.) that:
36/// - Zeroizes memory on drop
37/// - Redacts content in [`Debug`] and [`Display`] output
38/// - Provides only `&[u8]` access via [`as_bytes()`](Self::as_bytes)
39/// - Uses `Box<[u8]>` (immutable boxed slice) since key bytes never grow
40///
41/// Use this type for any struct field that holds plaintext key material.
42/// Because its [`Debug`] impl always prints `[N bytes REDACTED]`, structs
43/// containing `SensitiveBytes` can safely derive or implement `Debug`
44/// without risk of leaking key material.
45#[derive(Clone, PartialEq, Eq)]
46pub struct SensitiveBytes(Zeroizing<Box<[u8]>>);
47
48impl SensitiveBytes {
49    /// Wraps the given bytes as sensitive material.
50    pub fn new(bytes: impl Into<Box<[u8]>>) -> Self {
51        Self(Zeroizing::new(bytes.into()))
52    }
53
54    /// Returns the underlying bytes.
55    pub fn as_bytes(&self) -> &[u8] {
56        &self.0
57    }
58
59    /// Returns the number of bytes.
60    pub fn len(&self) -> usize {
61        self.0.len()
62    }
63
64    /// Returns `true` if the byte slice is empty.
65    pub fn is_empty(&self) -> bool {
66        self.0.is_empty()
67    }
68}
69
70impl fmt::Debug for SensitiveBytes {
71    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
72        write!(f, "[{} bytes REDACTED]", self.0.len())
73    }
74}
75
76impl fmt::Display for SensitiveBytes {
77    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
78        write!(f, "[{} bytes REDACTED]", self.0.len())
79    }
80}
81
82/// Supported AES key sizes for AES-GCM encryption.
83///
84/// The Iceberg spec supports 128, 192, and 256-bit keys for AES-GCM.
85/// See: <https://iceberg.apache.org/gcm-stream-spec/#goals>
86#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
87pub enum AesKeySize {
88    /// 128-bit AES key (16 bytes). Default per the Iceberg spec.
89    #[default]
90    Bits128 = 128,
91    /// 192-bit AES key (24 bytes)
92    Bits192 = 192,
93    /// 256-bit AES key (32 bytes)
94    Bits256 = 256,
95}
96
97impl AesKeySize {
98    /// Returns the key length in bytes for this key size.
99    pub fn key_length(&self) -> usize {
100        match self {
101            Self::Bits128 => 16,
102            Self::Bits192 => 24,
103            Self::Bits256 => 32,
104        }
105    }
106
107    /// Returns the key size for a given DEK length in bytes.
108    ///
109    /// Matches Java's `encryption.data-key-length` property semantics:
110    /// 16 → 128-bit, 24 → 192-bit, 32 → 256-bit.
111    pub fn from_key_length(len: usize) -> Result<Self> {
112        match len {
113            16 => Ok(Self::Bits128),
114            24 => Ok(Self::Bits192),
115            32 => Ok(Self::Bits256),
116            _ => Err(Error::new(
117                ErrorKind::FeatureUnsupported,
118                format!("Unsupported data key length: {len} (must be 16, 24, or 32)"),
119            )),
120        }
121    }
122}
123
124impl FromStr for AesKeySize {
125    type Err = Error;
126
127    fn from_str(s: &str) -> Result<Self> {
128        match s {
129            "128" | "AES_GCM_128" | "AES128_GCM" => Ok(Self::Bits128),
130            "192" | "AES_GCM_192" | "AES192_GCM" => Ok(Self::Bits192),
131            "256" | "AES_GCM_256" | "AES256_GCM" => Ok(Self::Bits256),
132            _ => Err(Error::new(
133                ErrorKind::FeatureUnsupported,
134                format!("Unsupported AES key size: {s}"),
135            )),
136        }
137    }
138}
139
140/// A secure encryption key that zeroes its memory on drop.
141pub struct SecureKey {
142    key: SensitiveBytes,
143    key_size: AesKeySize,
144}
145
146impl SecureKey {
147    /// Creates a new secure key with the specified key size.
148    ///
149    /// # Errors
150    /// Returns an error if the key length doesn't match the key size requirements.
151    pub fn new(key: &[u8]) -> Result<Self> {
152        let key_size = AesKeySize::from_key_length(key.len())?;
153        Ok(Self {
154            key: SensitiveBytes::new(key),
155            key_size,
156        })
157    }
158
159    /// Generates a new random key for the specified key size.
160    pub fn generate(key_size: AesKeySize) -> Self {
161        let mut key = vec![0u8; key_size.key_length()];
162        OsRng.fill_bytes(&mut key);
163        Self {
164            key: SensitiveBytes::new(key),
165            key_size,
166        }
167    }
168
169    /// Returns the AES key size.
170    pub fn key_size(&self) -> AesKeySize {
171        self.key_size
172    }
173
174    /// Returns the key bytes.
175    pub fn as_bytes(&self) -> &[u8] {
176        self.key.as_bytes()
177    }
178}
179
180impl TryFrom<SensitiveBytes> for SecureKey {
181    type Error = Error;
182
183    fn try_from(key: SensitiveBytes) -> Result<Self> {
184        let key_size = AesKeySize::from_key_length(key.len())?;
185        Ok(Self { key, key_size })
186    }
187}
188
189/// AES-GCM cipher for encrypting and decrypting data.
190pub struct AesGcmCipher {
191    key: SensitiveBytes,
192    key_size: AesKeySize,
193}
194
195impl AesGcmCipher {
196    /// AES-GCM nonce length in bytes (96 bits).
197    pub const NONCE_LEN: usize = 12;
198    /// AES-GCM authentication tag length in bytes (128 bits).
199    pub const TAG_LEN: usize = 16;
200
201    /// Creates a new cipher with the specified key.
202    pub fn new(key: SecureKey) -> Self {
203        Self {
204            key: SensitiveBytes::new(key.as_bytes()),
205            key_size: key.key_size(),
206        }
207    }
208
209    /// Encrypts data using AES-GCM.
210    ///
211    /// # Arguments
212    /// * `plaintext` - The data to encrypt
213    /// * `aad` - Additional authenticated data (optional)
214    ///
215    /// # Returns
216    /// The encrypted data in the format: [12-byte nonce][ciphertext][16-byte auth tag]
217    /// This matches the Java implementation format for compatibility.
218    pub fn encrypt(&self, plaintext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>> {
219        match self.key_size {
220            AesKeySize::Bits128 => {
221                encrypt_aes_gcm::<Aes128Gcm>(self.key.as_bytes(), plaintext, aad)
222            }
223            AesKeySize::Bits192 => {
224                encrypt_aes_gcm::<Aes192Gcm>(self.key.as_bytes(), plaintext, aad)
225            }
226            AesKeySize::Bits256 => {
227                encrypt_aes_gcm::<Aes256Gcm>(self.key.as_bytes(), plaintext, aad)
228            }
229        }
230    }
231
232    /// Decrypts data using AES-GCM.
233    ///
234    /// # Arguments
235    /// * `ciphertext` - The encrypted data with format: [12-byte nonce][encrypted data][16-byte auth tag]
236    /// * `aad` - Additional authenticated data (must match encryption)
237    ///
238    /// # Returns
239    /// The decrypted plaintext.
240    pub fn decrypt(&self, ciphertext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>> {
241        if ciphertext.len() < Self::NONCE_LEN + Self::TAG_LEN {
242            return Err(Error::new(
243                ErrorKind::DataInvalid,
244                format!(
245                    "Ciphertext too short: expected at least {} bytes, got {}",
246                    Self::NONCE_LEN + Self::TAG_LEN,
247                    ciphertext.len()
248                ),
249            ));
250        }
251
252        match self.key_size {
253            AesKeySize::Bits128 => {
254                decrypt_aes_gcm::<Aes128Gcm>(self.key.as_bytes(), ciphertext, aad)
255            }
256            AesKeySize::Bits192 => {
257                decrypt_aes_gcm::<Aes192Gcm>(self.key.as_bytes(), ciphertext, aad)
258            }
259            AesKeySize::Bits256 => {
260                decrypt_aes_gcm::<Aes256Gcm>(self.key.as_bytes(), ciphertext, aad)
261            }
262        }
263    }
264}
265
266fn encrypt_aes_gcm<C>(key_bytes: &[u8], plaintext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>>
267where C: Aead + AeadCore + KeyInit {
268    let cipher = C::new_from_slice(key_bytes).map_err(|e| {
269        Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e))
270    })?;
271    let nonce = C::generate_nonce(&mut OsRng);
272
273    let ciphertext = if let Some(aad) = aad {
274        cipher.encrypt(&nonce, Payload {
275            msg: plaintext,
276            aad,
277        })
278    } else {
279        cipher.encrypt(&nonce, plaintext.as_ref())
280    }
281    .map_err(|e| {
282        Error::new(ErrorKind::Unexpected, "AES-GCM encryption failed")
283            .with_source(anyhow::anyhow!(e))
284    })?;
285
286    // Prepend nonce to ciphertext (Java compatible format)
287    let mut result = Vec::with_capacity(nonce.len() + ciphertext.len());
288    result.extend_from_slice(&nonce);
289    result.extend_from_slice(&ciphertext);
290    Ok(result)
291}
292
293fn decrypt_aes_gcm<C>(key_bytes: &[u8], ciphertext: &[u8], aad: Option<&[u8]>) -> Result<Vec<u8>>
294where C: Aead + AeadCore + KeyInit {
295    let cipher = C::new_from_slice(key_bytes).map_err(|e| {
296        Error::new(ErrorKind::DataInvalid, "Invalid AES key").with_source(anyhow::anyhow!(e))
297    })?;
298
299    let nonce = Nonce::from_slice(&ciphertext[..AesGcmCipher::NONCE_LEN]);
300    let encrypted_data = &ciphertext[AesGcmCipher::NONCE_LEN..];
301
302    let plaintext = if let Some(aad) = aad {
303        cipher.decrypt(nonce, Payload {
304            msg: encrypted_data,
305            aad,
306        })
307    } else {
308        cipher.decrypt(nonce, encrypted_data)
309    }
310    .map_err(|e| {
311        Error::new(ErrorKind::Unexpected, "AES-GCM decryption failed")
312            .with_source(anyhow::anyhow!(e))
313    })?;
314
315    Ok(plaintext)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_aes_key_size() {
324        assert_eq!(AesKeySize::Bits128.key_length(), 16);
325        assert_eq!(AesKeySize::Bits192.key_length(), 24);
326        assert_eq!(AesKeySize::Bits256.key_length(), 32);
327
328        assert_eq!(
329            AesKeySize::from_key_length(16).unwrap(),
330            AesKeySize::Bits128
331        );
332        assert_eq!(
333            AesKeySize::from_key_length(24).unwrap(),
334            AesKeySize::Bits192
335        );
336        assert_eq!(
337            AesKeySize::from_key_length(32).unwrap(),
338            AesKeySize::Bits256
339        );
340        assert!(AesKeySize::from_key_length(8).is_err());
341
342        assert_eq!(AesKeySize::from_str("128").unwrap(), AesKeySize::Bits128);
343        assert_eq!(
344            AesKeySize::from_str("AES_GCM_128").unwrap(),
345            AesKeySize::Bits128
346        );
347        assert_eq!(
348            AesKeySize::from_str("AES_GCM_256").unwrap(),
349            AesKeySize::Bits256
350        );
351        assert!(AesKeySize::from_str("INVALID").is_err());
352    }
353
354    #[test]
355    fn test_secure_key() {
356        // Test key generation
357        let key1 = SecureKey::generate(AesKeySize::Bits128);
358        assert_eq!(key1.as_bytes().len(), 16);
359        assert_eq!(key1.key_size(), AesKeySize::Bits128);
360
361        // Test key creation with validation
362        let valid_key = [0u8; 16];
363        assert!(SecureKey::new(valid_key.as_slice()).is_ok());
364
365        let invalid_key = [0u8; 33];
366        assert!(SecureKey::new(invalid_key.as_slice()).is_err());
367    }
368
369    #[test]
370    fn test_aes128_gcm_encryption_roundtrip() {
371        let key = SecureKey::generate(AesKeySize::Bits128);
372        let cipher = AesGcmCipher::new(key);
373
374        let plaintext = b"Hello, Iceberg encryption!";
375        let aad = b"additional authenticated data";
376
377        // Test without AAD
378        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
379        assert!(ciphertext.len() > plaintext.len() + 12); // nonce + tag
380        assert_ne!(&ciphertext[12..], plaintext); // encrypted portion differs
381
382        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
383        assert_eq!(decrypted, plaintext);
384
385        // Test with AAD
386        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
387        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
388        assert_eq!(decrypted, plaintext);
389
390        // Test with wrong AAD fails
391        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
392    }
393
394    #[test]
395    fn test_aes192_gcm_encryption_roundtrip() {
396        let key = SecureKey::generate(AesKeySize::Bits192);
397        let cipher = AesGcmCipher::new(key);
398
399        let plaintext = b"Hello, Iceberg encryption!";
400        let aad = b"additional authenticated data";
401
402        // Test without AAD
403        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
404        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
405        assert_eq!(decrypted, plaintext);
406
407        // Test with AAD
408        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
409        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
410        assert_eq!(decrypted, plaintext);
411
412        // Test with wrong AAD fails
413        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
414    }
415
416    #[test]
417    fn test_aes256_gcm_encryption_roundtrip() {
418        let key = SecureKey::generate(AesKeySize::Bits256);
419        let cipher = AesGcmCipher::new(key);
420
421        let plaintext = b"Hello, Iceberg encryption!";
422        let aad = b"additional authenticated data";
423
424        // Test without AAD
425        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
426        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
427        assert_eq!(decrypted, plaintext);
428
429        // Test with AAD
430        let ciphertext = cipher.encrypt(plaintext, Some(aad)).unwrap();
431        let decrypted = cipher.decrypt(&ciphertext, Some(aad)).unwrap();
432        assert_eq!(decrypted, plaintext);
433
434        // Test with wrong AAD fails
435        assert!(cipher.decrypt(&ciphertext, Some(b"wrong aad")).is_err());
436    }
437
438    #[test]
439    fn test_cross_key_size_incompatibility() {
440        let plaintext = b"Cross-key test";
441
442        let key128 = SecureKey::generate(AesKeySize::Bits128);
443        let key256 = SecureKey::generate(AesKeySize::Bits256);
444
445        let cipher128 = AesGcmCipher::new(key128);
446        let cipher256 = AesGcmCipher::new(key256);
447
448        // Ciphertext from 128-bit key should not decrypt with 256-bit key
449        let ciphertext = cipher128.encrypt(plaintext, None).unwrap();
450        assert!(cipher256.decrypt(&ciphertext, None).is_err());
451    }
452
453    #[test]
454    fn test_encryption_with_empty_plaintext() {
455        let key = SecureKey::generate(AesKeySize::Bits128);
456        let cipher = AesGcmCipher::new(key);
457
458        let plaintext = b"";
459        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
460
461        // Even empty plaintext produces nonce + tag
462        assert_eq!(ciphertext.len(), 12 + 16); // 12-byte nonce + 16-byte tag
463
464        let decrypted = cipher.decrypt(&ciphertext, None).unwrap();
465        assert_eq!(decrypted, plaintext);
466    }
467
468    #[test]
469    fn test_decryption_with_tampered_ciphertext() {
470        let key = SecureKey::generate(AesKeySize::Bits128);
471        let cipher = AesGcmCipher::new(key);
472
473        let plaintext = b"Sensitive data";
474        let mut ciphertext = cipher.encrypt(plaintext, None).unwrap();
475
476        // Tamper with the encrypted portion (after the nonce)
477        if ciphertext.len() > 12 {
478            ciphertext[12] ^= 0xFF;
479        }
480
481        // Decryption should fail due to authentication tag mismatch
482        assert!(cipher.decrypt(&ciphertext, None).is_err());
483    }
484
485    #[test]
486    fn test_different_keys_produce_different_ciphertexts() {
487        let key1 = SecureKey::generate(AesKeySize::Bits128);
488        let key2 = SecureKey::generate(AesKeySize::Bits128);
489
490        let cipher1 = AesGcmCipher::new(key1);
491        let cipher2 = AesGcmCipher::new(key2);
492
493        let plaintext = b"Same plaintext";
494
495        let ciphertext1 = cipher1.encrypt(plaintext, None).unwrap();
496        let ciphertext2 = cipher2.encrypt(plaintext, None).unwrap();
497
498        // Different keys should produce different ciphertexts (comparing the encrypted portion)
499        // Note: The nonces will also be different, but we're mainly interested in the encrypted data
500        assert_ne!(&ciphertext1[12..], &ciphertext2[12..]);
501    }
502
503    #[test]
504    fn test_ciphertext_format_java_compatible() {
505        // Test that our ciphertext format matches Java's: [12-byte nonce][ciphertext][16-byte tag]
506        let key = SecureKey::generate(AesKeySize::Bits128);
507        let cipher = AesGcmCipher::new(key);
508
509        let plaintext = b"Test data";
510        let ciphertext = cipher.encrypt(plaintext, None).unwrap();
511
512        // Format should be: [12-byte nonce][encrypted_data + 16-byte GCM tag]
513        assert_eq!(
514            ciphertext.len(),
515            12 + plaintext.len() + 16,
516            "Ciphertext should be nonce + plaintext + tag length"
517        );
518
519        // Verify we can decrypt by extracting nonce from the beginning
520        let nonce = &ciphertext[..12];
521        assert_eq!(nonce.len(), 12, "Nonce should be 12 bytes");
522
523        // The rest is encrypted data + tag
524        let encrypted_with_tag = &ciphertext[12..];
525        assert_eq!(
526            encrypted_with_tag.len(),
527            plaintext.len() + 16,
528            "Encrypted portion should be plaintext length + 16-byte tag"
529        );
530    }
531}