Compare commits

...

3 Commits

Author SHA1 Message Date
albertlockett
ffcf632abb use local path for lance 14.1 2024-07-11 15:16:36 -03:00
Lei Xu
3c6c21c137 feat(rust): enable fast search flag in rust (#1432) 2024-07-07 09:46:41 -07:00
Lei Xu
fd5ca20f34 chore: bump lance to 0.14 (#1430) 2024-07-06 14:10:42 -07:00
6 changed files with 79 additions and 11 deletions

View File

@@ -20,11 +20,18 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
lance = { "version" = "=0.13.0", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.13.0" }
lance-linalg = { "version" = "=0.13.0" }
lance-testing = { "version" = "=0.13.0" }
lance-datafusion = { "version" = "=0.13.0" }
# lance = { "version" = "=0.14.0", "features" = ["dynamodb"] }
# lance-index = { "version" = "=0.14.0" }
# lance-linalg = { "version" = "=0.14.0" }
# lance-testing = { "version" = "=0.14.0" }
# lance-datafusion = { "version" = "=0.14.0" }
lance = { path = "../lance/rust/lance", "features" = ["dynamodb"] }
lance-index = { path = "../lance/rust/lance-index" }
lance-linalg = { path = "../lance/rust/lance-linalg" }
lance-testing = { path = "../lance/rust/lance-testing" }
lance-datafusion = { path = "../lance/rust/lance-datafusion" }
# Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.13.0",
"pylance==0.14.0",
"ratelimiter~=1.0",
"requests>=2.31.0",
"retry>=0.9.2",

View File

@@ -429,8 +429,10 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).explain_plan(True)
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
@@ -1213,8 +1215,10 @@ class AsyncQueryBase(object):
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------

View File

@@ -1191,6 +1191,7 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 50000,
..Default::default()
})
.await
.unwrap()
@@ -1211,6 +1212,7 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 50000,
..Default::default()
})
.await
.unwrap()

View File

@@ -374,6 +374,16 @@ pub trait QueryBase {
/// Columns will always be returned in the order given, even if that order is different than
/// the order used when adding the data.
fn select(self, selection: Select) -> Self;
/// Only execute the query over indexed data.
///
/// This allows weak-consistent fast path for queries that only need to access the indexed data.
///
/// Users can use [`crate::Table::optimize`] to merge new data into the index, and make the
/// new data available for fast search.
///
/// By default, it is false.
fn fast_search(self) -> Self;
}
pub trait HasQuery {
@@ -395,6 +405,11 @@ impl<T: HasQuery> QueryBase for T {
self.mut_query().select = select;
self
}
fn fast_search(mut self) -> Self {
self.mut_query().fast_search = true;
self
}
}
/// Options for controlling the execution of a query
@@ -491,6 +506,12 @@ pub struct Query {
pub(crate) filter: Option<String>,
/// Select column projection.
pub(crate) select: Select,
/// If set to true, the query is executed only on the indexed data,
/// and yields faster results.
///
/// By default, this is false.
pub(crate) fast_search: bool,
}
impl Query {
@@ -500,6 +521,7 @@ impl Query {
limit: None,
filter: None,
select: Select::All,
fast_search: false,
}
}
@@ -1001,6 +1023,7 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 10,
..Default::default()
})
.await
.unwrap();
@@ -1065,4 +1088,20 @@ mod tests {
.to_string()
.contains("No vector column found to match with the query vector dimension: 3"));
}
#[tokio::test]
async fn test_fast_search_plan() {
let tmp_dir = tempdir().unwrap();
let table = make_test_table(&tmp_dir).await;
let plan = table
.query()
.select(Select::columns(&["_distance"]))
.nearest_to(vec![0.1, 0.2, 0.3, 0.4])
.unwrap()
.fast_search()
.explain_plan(true)
.await
.unwrap();
assert!(!plan.contains("Take"));
}
}

View File

@@ -35,6 +35,7 @@ use lance::dataset::{
Dataset, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
};
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
use lance::index::scalar::ScalarIndexType;
use lance::io::WrappingObjectStore;
use lance_datafusion::exec::execute_plan;
use lance_index::vector::hnsw::builder::HnswBuildParams;
@@ -1503,7 +1504,9 @@ impl NativeTable {
}
let mut dataset = self.dataset.get_mut().await?;
let lance_idx_params = lance::index::scalar::ScalarIndexParams {};
let lance_idx_params = lance::index::scalar::ScalarIndexParams {
force_index_type: Some(ScalarIndexType::BTree),
};
dataset
.create_index(
&[field.name()],
@@ -1735,10 +1738,22 @@ impl TableInternal for NativeTable {
scanner.nprobs(query.nprobes);
scanner.use_index(query.use_index);
scanner.prefilter(query.prefilter);
match query.base.select {
Select::Columns(ref columns) => {
scanner.project(columns.as_slice())?;
}
Select::Dynamic(ref select_with_transform) => {
scanner.project_with_transform(select_with_transform.as_slice())?;
}
Select::All => {}
}
if let Some(opts) = options {
scanner.batch_size(opts.max_batch_length as usize);
}
if query.base.fast_search {
scanner.fast_search();
}
Ok(scanner)
}
@@ -1773,6 +1788,7 @@ impl TableInternal for NativeTable {
if let Some(distance_type) = query.distance_type {
scanner.distance_metric(distance_type.into());
}
Ok(scanner.create_plan().await?)
}