From 67c16dd63181bfb6dafa8fe442f6bbed91a8f71d Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Mon, 12 Jun 2023 11:46:45 +0800 Subject: [PATCH] feat: optimize some parquet writer parameter (#1758) --- src/storage/src/sst/parquet.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/storage/src/sst/parquet.rs b/src/storage/src/sst/parquet.rs index 47943a9d30..43a4a81a34 100644 --- a/src/storage/src/sst/parquet.rs +++ b/src/storage/src/sst/parquet.rs @@ -43,8 +43,9 @@ use parquet::basic::{Compression, Encoding, ZstdLevel}; use parquet::file::metadata::KeyValue; use parquet::file::properties::WriterProperties; use parquet::format::FileMetaData; -use parquet::schema::types::SchemaDescriptor; +use parquet::schema::types::{ColumnPath, SchemaDescriptor}; use snafu::{OptionExt, ResultExt}; +use store_api::storage::consts::SEQUENCE_COLUMN_NAME; use table::predicate::Predicate; use tokio::io::BufReader; @@ -87,7 +88,8 @@ impl<'a> ParquetWriter<'a> { opts: &sst::WriteOptions, ) -> Result> { let schema = self.source.schema(); - let writer_props = WriterProperties::builder() + + let mut props_builder = WriterProperties::builder() .set_compression(Compression::ZSTD(ZstdLevel::default())) .set_encoding(Encoding::PLAIN) .set_max_row_group_size(self.max_row_group_size) @@ -96,7 +98,23 @@ impl<'a> ParquetWriter<'a> { .map(|(k, v)| KeyValue::new(k.clone(), v.clone())) .collect::>() })) - .build(); + .set_column_encoding( + ColumnPath::new(vec![SEQUENCE_COLUMN_NAME.to_string()]), + Encoding::DELTA_BINARY_PACKED, + ) + .set_column_dictionary_enabled( + ColumnPath::new(vec![SEQUENCE_COLUMN_NAME.to_string()]), + false, + ); + + if let Some(ts_col) = schema.timestamp_column() { + props_builder = props_builder.set_column_encoding( + ColumnPath::new(vec![ts_col.name.clone()]), + Encoding::DELTA_BINARY_PACKED, + ); + } + + let writer_props = props_builder.build(); let mut buffered_writer = BufferedWriter::try_new( self.file_path.to_string(),