refactor: skip re-taking arrays in memtable if possible (#5779)

experiment: skip sorting and re-taking arrays if possible when scanning memtable
This commit is contained in:
LFC
2025-03-28 17:58:55 +08:00
committed by GitHub
parent 7e1ba49d3d
commit a9e990768d
2 changed files with 16 additions and 10 deletions

View File

@@ -666,17 +666,15 @@ impl Series {
/// Freezes active part to frozen part and compact frozen part to reduce memory fragmentation.
/// Returns the frozen and compacted values.
fn compact(&mut self, region_metadata: &RegionMetadataRef) -> Result<Values> {
fn compact(&mut self, region_metadata: &RegionMetadataRef) -> Result<&Values> {
self.freeze(region_metadata);
let mut frozen = self.frozen.clone();
let frozen = &self.frozen;
// Each series must contain at least one row
debug_assert!(!frozen.is_empty());
let values = if frozen.len() == 1 {
frozen.pop().unwrap()
} else {
if frozen.len() > 1 {
// TODO(hl): We should keep track of min/max timestamps for each values and avoid
// cloning and sorting when values do not overlap with each other.
@@ -700,10 +698,9 @@ impl Series {
debug_assert_eq!(concatenated.len(), column_size);
let values = Values::from_columns(&concatenated)?;
self.frozen = vec![values.clone()];
values
self.frozen = vec![values];
};
Ok(values)
Ok(&self.frozen[0])
}
}
@@ -1008,7 +1005,7 @@ mod tests {
vec![ValueRef::Int64(v0), ValueRef::Float64(OrderedFloat(v1))].into_iter()
}
fn check_values(values: Values, expect: &[(i64, u64, u8, i64, f64)]) {
fn check_values(values: &Values, expect: &[(i64, u64, u8, i64, f64)]) {
let ts = values
.timestamp
.as_any()

View File

@@ -382,8 +382,13 @@ impl Batch {
];
let rows = converter.convert_columns(&columns).unwrap();
let mut to_sort: Vec<_> = rows.iter().enumerate().collect();
to_sort.sort_unstable_by(|left, right| left.1.cmp(&right.1));
let was_sorted = to_sort.is_sorted_by_key(|x| x.1);
if !was_sorted {
to_sort.sort_unstable_by_key(|x| x.1);
}
let num_rows = to_sort.len();
if dedup {
// Dedup by timestamps.
to_sort.dedup_by(|left, right| {
@@ -394,7 +399,11 @@ impl Batch {
left_key[..TIMESTAMP_KEY_LEN] == right_key[..TIMESTAMP_KEY_LEN]
});
}
let no_dedup = to_sort.len() == num_rows;
if was_sorted && no_dedup {
return Ok(());
}
let indices = UInt32Vector::from_iter_values(to_sort.iter().map(|v| v.0 as u32));
self.take_in_place(&indices)
}