Skip to content

Commit 11158ba

Browse files
rokadamreeve
andauthored
Support writing encrypted Parquet files with plaintext footers (#7439)
* Initial commit * Lint and clippy * Plaintext layout is different to encrypted one * Lint and expected memory size at decryption * Apply suggestions from code review Co-authored-by: Adam Reeve <[email protected]> * Review feedback * Lint * Update parquet/tests/encryption/encryption.rs Co-authored-by: Adam Reeve <[email protected]> * Review feedback * Initial commit * Encrypt plaintext to extract nonce and footer * fix * Add encryption algorithm to file_metadata before writing * fix * Apply suggestions from code review Co-authored-by: Adam Reeve <[email protected]> * Fix * lint * Move get_footer_encryption_algorithm into MetadataObjectWriter * Fix * Review feedback * Avoid unwraps in file_crypto_metada method * Minor tidy * Test wrting and reading with a different footer key --------- Co-authored-by: Adam Reeve <[email protected]>
1 parent fb72b8f commit 11158ba

File tree

4 files changed

+111
-36
lines changed

4 files changed

+111
-36
lines changed

parquet/src/encryption/encrypt.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717

1818
//! Configuration and utilities for Parquet Modular Encryption
1919
20-
use crate::encryption::ciphers::{BlockEncryptor, RingGcmBlockEncryptor};
20+
use crate::encryption::ciphers::{
21+
BlockEncryptor, RingGcmBlockEncryptor, NONCE_LEN, SIZE_LEN, TAG_LEN,
22+
};
2123
use crate::errors::{ParquetError, Result};
2224
use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey};
2325
use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
@@ -374,6 +376,29 @@ pub(crate) fn encrypt_object<T: TSerializable, W: Write>(
374376
Ok(())
375377
}
376378

379+
pub(crate) fn write_signed_plaintext_object<T: TSerializable, W: Write>(
380+
object: &T,
381+
encryptor: &mut Box<dyn BlockEncryptor>,
382+
sink: &mut W,
383+
module_aad: &[u8],
384+
) -> Result<()> {
385+
let mut buffer: Vec<u8> = vec![];
386+
{
387+
let mut protocol = TCompactOutputProtocol::new(&mut buffer);
388+
object.write_to_out_protocol(&mut protocol)?;
389+
}
390+
sink.write_all(&buffer)?;
391+
buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?;
392+
393+
// Format of encrypted buffer is: [ciphertext size, nonce, ciphertext, authentication tag]
394+
let nonce = &buffer[SIZE_LEN..SIZE_LEN + NONCE_LEN];
395+
let tag = &buffer[buffer.len() - TAG_LEN..];
396+
sink.write_all(nonce)?;
397+
sink.write_all(tag)?;
398+
399+
Ok(())
400+
}
401+
377402
/// Encrypt a Thrift serializable object to a byte vector
378403
pub(crate) fn encrypt_object_to_vec<T: TSerializable>(
379404
object: &T,

parquet/src/file/metadata/writer.rs

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,21 @@
1616
// under the License.
1717

1818
#[cfg(feature = "encryption")]
19-
use crate::encryption::encrypt::{encrypt_object, encrypt_object_to_vec, FileEncryptor};
20-
#[cfg(feature = "encryption")]
21-
use crate::encryption::modules::{create_footer_aad, create_module_aad, ModuleType};
19+
use crate::encryption::{
20+
encrypt::{
21+
encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor,
22+
},
23+
modules::{create_footer_aad, create_module_aad, ModuleType},
24+
};
2225
#[cfg(feature = "encryption")]
2326
use crate::errors::ParquetError;
2427
use crate::errors::Result;
2528
use crate::file::metadata::{KeyValue, ParquetMetaData};
2629
use crate::file::page_index::index::Index;
2730
use crate::file::writer::{get_file_magic, TrackedWrite};
31+
use crate::format::EncryptionAlgorithm;
2832
#[cfg(feature = "encryption")]
29-
use crate::format::{AesGcmV1, ColumnCryptoMetaData, EncryptionAlgorithm};
33+
use crate::format::{AesGcmV1, ColumnCryptoMetaData};
3034
use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, RowGroup};
3135
use crate::schema::types;
3236
use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr};
@@ -149,7 +153,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> {
149153
schema: types::to_thrift(self.schema.as_ref())?,
150154
created_by: self.created_by.clone(),
151155
column_orders,
152-
encryption_algorithm: None,
156+
encryption_algorithm: self.object_writer.get_footer_encryption_algorithm(),
153157
footer_signing_key_metadata: None,
154158
};
155159

@@ -474,6 +478,10 @@ impl MetadataObjectWriter {
474478
pub fn get_file_magic(&self) -> &[u8; 4] {
475479
get_file_magic()
476480
}
481+
482+
fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
483+
None
484+
}
477485
}
478486

479487
/// Implementations of [`MetadataObjectWriter`] methods that rely on encryption being enabled
@@ -503,6 +511,11 @@ impl MetadataObjectWriter {
503511
let mut encryptor = file_encryptor.get_footer_encryptor()?;
504512
encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad)
505513
}
514+
Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => {
515+
let aad = create_footer_aad(file_encryptor.file_aad())?;
516+
let mut encryptor = file_encryptor.get_footer_encryptor()?;
517+
write_signed_plaintext_object(file_metadata, &mut encryptor, &mut sink, &aad)
518+
}
506519
_ => Self::write_object(file_metadata, &mut sink),
507520
}
508521
}
@@ -622,25 +635,36 @@ impl MetadataObjectWriter {
622635
}
623636
}
624637

625-
fn file_crypto_metadata(
626-
file_encryptor: &FileEncryptor,
627-
) -> Result<crate::format::FileCryptoMetaData> {
628-
let properties = file_encryptor.properties();
629-
let supply_aad_prefix = properties
638+
fn get_footer_encryption_algorithm(&self) -> Option<EncryptionAlgorithm> {
639+
if let Some(file_encryptor) = &self.file_encryptor {
640+
return Some(Self::encryption_algorithm_from_encryptor(file_encryptor));
641+
}
642+
None
643+
}
644+
645+
fn encryption_algorithm_from_encryptor(file_encryptor: &FileEncryptor) -> EncryptionAlgorithm {
646+
let supply_aad_prefix = file_encryptor
647+
.properties()
630648
.aad_prefix()
631-
.map(|_| !properties.store_aad_prefix());
632-
let encryption_algorithm = AesGcmV1 {
633-
aad_prefix: if properties.store_aad_prefix() {
634-
properties.aad_prefix().cloned()
635-
} else {
636-
None
637-
},
649+
.map(|_| !file_encryptor.properties().store_aad_prefix());
650+
let aad_prefix = if file_encryptor.properties().store_aad_prefix() {
651+
file_encryptor.properties().aad_prefix().cloned()
652+
} else {
653+
None
654+
};
655+
EncryptionAlgorithm::AESGCMV1(AesGcmV1 {
656+
aad_prefix,
638657
aad_file_unique: Some(file_encryptor.aad_file_unique().clone()),
639658
supply_aad_prefix,
640-
};
659+
})
660+
}
641661

662+
fn file_crypto_metadata(
663+
file_encryptor: &FileEncryptor,
664+
) -> Result<crate::format::FileCryptoMetaData> {
665+
let properties = file_encryptor.properties();
642666
Ok(crate::format::FileCryptoMetaData {
643-
encryption_algorithm: EncryptionAlgorithm::AESGCMV1(encryption_algorithm),
667+
encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor),
644668
key_metadata: properties.footer_key_metadata().cloned(),
645669
})
646670
}

parquet/src/file/writer.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -212,12 +212,6 @@ impl<W: Write + Send> SerializedFileWriter<W> {
212212
if let Some(file_encryption_properties) = &properties.file_encryption_properties {
213213
file_encryption_properties.validate_encrypted_column_names(schema_descriptor)?;
214214

215-
if !file_encryption_properties.encrypt_footer() {
216-
return Err(general_err!(
217-
"Writing encrypted files with plaintext footers is not supported yet"
218-
));
219-
}
220-
221215
Ok(Some(Arc::new(FileEncryptor::new(
222216
file_encryption_properties.clone(),
223217
)?)))

parquet/tests/encryption/encryption.rs

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -433,18 +433,23 @@ fn test_write_non_uniform_encryption() {
433433
read_and_roundtrip_to_encrypted_file(&path, decryption_properties, file_encryption_properties);
434434
}
435435

436-
// todo: currently we raise if writing with plaintext footer, but we should support it
437-
// for uniform and non-uniform encryption (see https://github.com/apache/arrow-rs/issues/7320)
438436
#[test]
439437
fn test_write_uniform_encryption_plaintext_footer() {
440438
let testdata = arrow::util::test_util::parquet_test_data();
441-
let path = format!("{testdata}/encrypt_columns_and_footer.parquet.encrypted");
439+
let path = format!("{testdata}/encrypt_columns_plaintext_footer.parquet.encrypted");
442440

443441
let footer_key = b"0123456789012345".to_vec(); // 128bit/16
442+
let wrong_footer_key = b"0000000000000000".to_vec(); // 128bit/16
444443
let column_1_key = b"1234567890123450".to_vec();
445444
let column_2_key = b"1234567890123451".to_vec();
446445

447446
let decryption_properties = FileDecryptionProperties::builder(footer_key.clone())
447+
.with_column_key("double_field", column_1_key.clone())
448+
.with_column_key("float_field", column_2_key.clone())
449+
.build()
450+
.unwrap();
451+
452+
let wrong_decryption_properties = FileDecryptionProperties::builder(wrong_footer_key)
448453
.with_column_key("double_field", column_1_key)
449454
.with_column_key("float_field", column_2_key)
450455
.build()
@@ -455,26 +460,53 @@ fn test_write_uniform_encryption_plaintext_footer() {
455460
.build()
456461
.unwrap();
457462

463+
// Try writing plaintext footer and then reading it with the correct footer key
464+
read_and_roundtrip_to_encrypted_file(
465+
&path,
466+
decryption_properties.clone(),
467+
file_encryption_properties.clone(),
468+
);
469+
470+
// Try writing plaintext footer and then reading it with the wrong footer key
471+
let temp_file = tempfile::tempfile().unwrap();
472+
473+
// read example data
458474
let file = File::open(path).unwrap();
459475
let options = ArrowReaderOptions::default()
460476
.with_file_decryption_properties(decryption_properties.clone());
461477
let metadata = ArrowReaderMetadata::load(&file, options.clone()).unwrap();
462478

479+
let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
480+
let batch_reader = builder.build().unwrap();
481+
let batches = batch_reader
482+
.collect::<parquet::errors::Result<Vec<RecordBatch>, _>>()
483+
.unwrap();
484+
485+
// write example data
463486
let props = WriterProperties::builder()
464487
.with_file_encryption_properties(file_encryption_properties)
465488
.build();
466-
let temp_file = tempfile::tempfile().unwrap();
467489

468-
let writer = ArrowWriter::try_new(
490+
let mut writer = ArrowWriter::try_new(
469491
temp_file.try_clone().unwrap(),
470492
metadata.schema().clone(),
471493
Some(props),
472-
);
473-
assert!(writer.is_err());
474-
assert_eq!(
475-
writer.unwrap_err().to_string(),
476-
"Parquet error: Writing encrypted files with plaintext footers is not supported yet"
477494
)
495+
.unwrap();
496+
for batch in batches {
497+
writer.write(&batch).unwrap();
498+
}
499+
writer.close().unwrap();
500+
501+
// Try reading plaintext footer and with the wrong footer key
502+
let options =
503+
ArrowReaderOptions::default().with_file_decryption_properties(wrong_decryption_properties);
504+
let result = ArrowReaderMetadata::load(&temp_file, options.clone());
505+
assert!(result.is_err());
506+
assert!(result
507+
.unwrap_err()
508+
.to_string()
509+
.starts_with("Parquet error: Footer signature verification failed. Computed: ["));
478510
}
479511

480512
#[test]

0 commit comments

Comments
 (0)