fix: parquet also sort by pk

This commit is contained in:
discord9
2025-03-09 14:47:34 +08:00
parent a57e263e5a
commit 77dee84a75
2 changed files with 6 additions and 4 deletions

View File

@@ -14,9 +14,9 @@ common-macro.workspace = true
common-meta.workspace = true
common-recordbatch.workspace = true
datatypes.workspace = true
futures.workspace = true
futures-util.workspace = true
meta-client.workspace = true
futures.workspace = true
metric-engine.workspace = true
mito2.workspace = true
object-store.workspace = true

View File

@@ -14,7 +14,7 @@
//! Parquet file format support.
use std::collections::{HashMap, VecDeque};
use std::collections::{BTreeMap, HashMap, VecDeque};
use std::pin::Pin;
use std::sync::Arc;
@@ -212,10 +212,12 @@ pub fn extract_to_batches(
builder.push_row(&row).map_err(BoxedError::new)?;
}
let mut batches = Vec::new();
// sort batches by primary key
let mut batches = BTreeMap::new();
for (pk, builder) in pk_to_batchs {
batches.push(builder.finish(pk).map_err(BoxedError::new)?);
batches.insert(pk.clone(), builder.finish(pk).map_err(BoxedError::new)?);
}
let batches = batches.into_values().collect();
Ok(batches)
}