mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-30 02:10:40 +00:00
fix: include _rowid in hash and calculated split projections (#2965)
## Summary - PR #2957 changed the permutation builder to only select `_rowid` from the base table, but `Splitter::project()` for hash and calculated splits replaced the selection entirely, dropping `_rowid`. - Include `_rowid` in the column selections for hash and calculated split projections. - Fix a Python test that queried the permutation table for base table columns no longer materialized. Fixes the `test_split_hash`, `test_split_hash_with_discard`, `test_split_calculated`, `test_shuffle_combined_with_splits`, and `test_filter_with_splits` failures in `test_permutation.py`. ## Test plan - [x] `cargo test -p lancedb -- permutation` (22 passed) - [x] `pytest python/tests/test_permutation.py` (46 passed) - [x] `npm test __test__/permutation.test.ts` (20 passed) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,8 @@ use datafusion_common::hash_utils::create_hashes;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
use lance_arrow::SchemaExt;
|
||||
|
||||
use lance_core::ROW_ID;
|
||||
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
dataloader::{
|
||||
@@ -360,11 +362,15 @@ impl Splitter {
|
||||
|
||||
pub fn project(&self, query: Query) -> Query {
|
||||
match &self.strategy {
|
||||
SplitStrategy::Calculated { calculation } => query.select(Select::Dynamic(vec![(
|
||||
SPLIT_ID_COLUMN.to_string(),
|
||||
calculation.clone(),
|
||||
)])),
|
||||
SplitStrategy::Hash { columns, .. } => query.select(Select::Columns(columns.clone())),
|
||||
SplitStrategy::Calculated { calculation } => query.select(Select::Dynamic(vec![
|
||||
(SPLIT_ID_COLUMN.to_string(), calculation.clone()),
|
||||
(ROW_ID.to_string(), ROW_ID.to_string()),
|
||||
])),
|
||||
SplitStrategy::Hash { columns, .. } => {
|
||||
let mut cols = columns.clone();
|
||||
cols.push(ROW_ID.to_string());
|
||||
query.select(Select::Columns(cols))
|
||||
}
|
||||
_ => query,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user