From 3c6c21c1371a76df3795f920a4dfaecfb15d89b9 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Sun, 7 Jul 2024 09:46:41 -0700 Subject: [PATCH] feat(rust): enable fast search flag in rust (#1432) --- rust/lancedb/src/connection.rs | 2 ++ rust/lancedb/src/query.rs | 39 ++++++++++++++++++++++++++++++++++ rust/lancedb/src/table.rs | 13 ++++++++++++ 3 files changed, 54 insertions(+) diff --git a/rust/lancedb/src/connection.rs b/rust/lancedb/src/connection.rs index eaa790627..2d4f9cc98 100644 --- a/rust/lancedb/src/connection.rs +++ b/rust/lancedb/src/connection.rs @@ -1191,6 +1191,7 @@ mod tests { .query() .execute_with_options(QueryExecutionOptions { max_batch_length: 50000, + ..Default::default() }) .await .unwrap() @@ -1211,6 +1212,7 @@ mod tests { .query() .execute_with_options(QueryExecutionOptions { max_batch_length: 50000, + ..Default::default() }) .await .unwrap() diff --git a/rust/lancedb/src/query.rs b/rust/lancedb/src/query.rs index 87a829d45..bcbc7980e 100644 --- a/rust/lancedb/src/query.rs +++ b/rust/lancedb/src/query.rs @@ -374,6 +374,16 @@ pub trait QueryBase { /// Columns will always be returned in the order given, even if that order is different than /// the order used when adding the data. fn select(self, selection: Select) -> Self; + + /// Only execute the query over indexed data. + /// + /// This allows weak-consistent fast path for queries that only need to access the indexed data. + /// + /// Users can use [`crate::Table::optimize`] to merge new data into the index, and make the + /// new data available for fast search. + /// + /// By default, it is false. + fn fast_search(self) -> Self; } pub trait HasQuery { @@ -395,6 +405,11 @@ impl QueryBase for T { self.mut_query().select = select; self } + + fn fast_search(mut self) -> Self { + self.mut_query().fast_search = true; + self + } } /// Options for controlling the execution of a query @@ -491,6 +506,12 @@ pub struct Query { pub(crate) filter: Option, /// Select column projection. pub(crate) select: Select, + + /// If set to true, the query is executed only on the indexed data, + /// and yields faster results. + /// + /// By default, this is false. + pub(crate) fast_search: bool, } impl Query { @@ -500,6 +521,7 @@ impl Query { limit: None, filter: None, select: Select::All, + fast_search: false, } } @@ -1001,6 +1023,7 @@ mod tests { .query() .execute_with_options(QueryExecutionOptions { max_batch_length: 10, + ..Default::default() }) .await .unwrap(); @@ -1065,4 +1088,20 @@ mod tests { .to_string() .contains("No vector column found to match with the query vector dimension: 3")); } + + #[tokio::test] + async fn test_fast_search_plan() { + let tmp_dir = tempdir().unwrap(); + let table = make_test_table(&tmp_dir).await; + let plan = table + .query() + .select(Select::columns(&["_distance"])) + .nearest_to(vec![0.1, 0.2, 0.3, 0.4]) + .unwrap() + .fast_search() + .explain_plan(true) + .await + .unwrap(); + assert!(!plan.contains("Take")); + } } diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 68eb29858..4a1eb7ab6 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1735,10 +1735,22 @@ impl TableInternal for NativeTable { scanner.nprobs(query.nprobes); scanner.use_index(query.use_index); scanner.prefilter(query.prefilter); + match query.base.select { + Select::Columns(ref columns) => { + scanner.project(columns.as_slice())?; + } + Select::Dynamic(ref select_with_transform) => { + scanner.project_with_transform(select_with_transform.as_slice())?; + } + Select::All => {} + } if let Some(opts) = options { scanner.batch_size(opts.max_batch_length as usize); } + if query.base.fast_search { + scanner.fast_search(); + } Ok(scanner) } @@ -1773,6 +1785,7 @@ impl TableInternal for NativeTable { if let Some(distance_type) = query.distance_type { scanner.distance_metric(distance_type.into()); } + Ok(scanner.create_plan().await?) }