mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-28 09:20:39 +00:00
feat: add use_index parameter to merge insert operations (#2674)
## Summary Exposes `use_index` Merge Insert parameter, which was created upstream in https://github.com/lancedb/lance/pull/4688. ## API Examples ### Python ```python # Force table scan table.merge_insert(["id"]) \ .when_not_matched_insert_all() \ .use_index(False) \ .execute(data) ``` ### Node.js/TypeScript ```typescript // Force table scan await table.mergeInsert("id") .whenNotMatchedInsertAll() .useIndex(false) .execute(data); ``` ### Rust ```rust // Force table scan let mut builder = table.merge_insert(&["id"]); builder.when_not_matched_insert_all() .use_index(false); builder.execute(data).await?; ``` 🤖 Generated with [Claude Code](https://claude.ai/code) Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1452,6 +1452,14 @@ struct MergeInsertRequest {
|
||||
when_not_matched_insert_all: bool,
|
||||
when_not_matched_by_source_delete: bool,
|
||||
when_not_matched_by_source_delete_filt: Option<String>,
|
||||
// For backwards compatibility, only serialize use_index when it's false
|
||||
// (the default is true)
|
||||
#[serde(skip_serializing_if = "is_true")]
|
||||
use_index: bool,
|
||||
}
|
||||
|
||||
fn is_true(b: &bool) -> bool {
|
||||
*b
|
||||
}
|
||||
|
||||
impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
|
||||
@@ -1476,6 +1484,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
|
||||
when_not_matched_insert_all: value.when_not_matched_insert_all,
|
||||
when_not_matched_by_source_delete: value.when_not_matched_by_source_delete,
|
||||
when_not_matched_by_source_delete_filt: value.when_not_matched_by_source_delete_filt,
|
||||
// Only serialize use_index when it's false for backwards compatibility
|
||||
use_index: value.use_index,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1942,6 +1952,7 @@ mod tests {
|
||||
assert_eq!(params["when_not_matched_by_source_delete"], "false");
|
||||
assert!(!params.contains_key("when_matched_update_all_filt"));
|
||||
assert!(!params.contains_key("when_not_matched_by_source_delete_filt"));
|
||||
assert!(!params.contains_key("use_index"));
|
||||
|
||||
if old_server {
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
|
||||
@@ -2399,6 +2399,7 @@ impl BaseTable for NativeTable {
|
||||
} else {
|
||||
builder.when_not_matched_by_source(WhenNotMatchedBySource::Keep);
|
||||
}
|
||||
builder.use_index(params.use_index);
|
||||
|
||||
let future = if let Some(timeout) = params.timeout {
|
||||
// The default retry timeout is 30s, so we pass the full timeout down
|
||||
@@ -2906,6 +2907,38 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_merge_insert_use_index() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
|
||||
// Create a dataset with i=0..10
|
||||
let batches = merge_insert_test_batches(0, 0);
|
||||
let table = conn
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 10);
|
||||
|
||||
// Test use_index=true (default behavior)
|
||||
let new_batches = Box::new(merge_insert_test_batches(5, 1));
|
||||
let mut merge_insert_builder = table.merge_insert(&["i"]);
|
||||
merge_insert_builder.when_not_matched_insert_all();
|
||||
merge_insert_builder.use_index(true);
|
||||
merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 15);
|
||||
|
||||
// Test use_index=false (force table scan)
|
||||
let new_batches = Box::new(merge_insert_test_batches(15, 2));
|
||||
let mut merge_insert_builder = table.merge_insert(&["i"]);
|
||||
merge_insert_builder.when_not_matched_insert_all();
|
||||
merge_insert_builder.use_index(false);
|
||||
merge_insert_builder.execute(new_batches).await.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 25);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_add_overwrite() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
@@ -22,6 +22,7 @@ pub struct MergeInsertBuilder {
|
||||
pub(crate) when_not_matched_by_source_delete: bool,
|
||||
pub(crate) when_not_matched_by_source_delete_filt: Option<String>,
|
||||
pub(crate) timeout: Option<Duration>,
|
||||
pub(crate) use_index: bool,
|
||||
}
|
||||
|
||||
impl MergeInsertBuilder {
|
||||
@@ -35,6 +36,7 @@ impl MergeInsertBuilder {
|
||||
when_not_matched_by_source_delete: false,
|
||||
when_not_matched_by_source_delete_filt: None,
|
||||
timeout: None,
|
||||
use_index: true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -101,6 +103,19 @@ impl MergeInsertBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Controls whether to use indexes for the merge operation.
|
||||
///
|
||||
/// When set to `true` (the default), the operation will use an index if available
|
||||
/// on the join key for improved performance. When set to `false`, it forces a full
|
||||
/// table scan even if an index exists. This can be useful for benchmarking or when
|
||||
/// the query optimizer chooses a suboptimal path.
|
||||
///
|
||||
/// If not set, defaults to `true` (use index if available).
|
||||
pub fn use_index(&mut self, use_index: bool) -> &mut Self {
|
||||
self.use_index = use_index;
|
||||
self
|
||||
}
|
||||
|
||||
/// Executes the merge insert operation
|
||||
///
|
||||
/// Returns version and statistics about the merge operation including the number of rows
|
||||
|
||||
Reference in New Issue
Block a user