From aab7367804bdcc6cd027c030879ef095417efbe4 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Tue, 9 Apr 2024 21:53:38 +0900 Subject: [PATCH] feat: try get pk values from cache when applying predicate to parquet (#3286) Signed-off-by: Ruihang Xia Co-authored-by: tison --- src/mito2/src/sst/parquet/reader.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 827e5d851c..60aa0afa54 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -698,7 +698,12 @@ impl ParquetReader { }; let result = match column_metadata.semantic_type { SemanticType::Tag => { - let pk_values = self.codec.decode(input.primary_key())?; + let pk_values = if let Some(pk_values) = input.pk_values() { + pk_values + } else { + input.set_pk_values(self.codec.decode(input.primary_key())?); + input.pk_values().unwrap() + }; // Safety: this is a primary key let pk_index = self .read_format @@ -712,7 +717,6 @@ impl ParquetReader { .evaluate_scalar(&pk_value) .context(FilterRecordBatchSnafu)? { - input.set_pk_values(pk_values); continue; } else { // PK not match means the entire batch is filtered out.