Compare commits

..

2 Commits

Author SHA1 Message Date
Lei Xu
4278862205 local ref 2024-07-04 14:54:09 -07:00
Lei Xu
e06a63f427 bump lance version 2024-07-04 14:54:09 -07:00
6 changed files with 7 additions and 75 deletions

View File

@@ -20,18 +20,11 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
[workspace.dependencies]
# lance = { "version" = "=0.14.0", "features" = ["dynamodb"] }
# lance-index = { "version" = "=0.14.0" }
# lance-linalg = { "version" = "=0.14.0" }
# lance-testing = { "version" = "=0.14.0" }
# lance-datafusion = { "version" = "=0.14.0" }
lance = { path = "../lance/rust/lance", "features" = ["dynamodb"] }
lance = { path = "../lance/rust/lance", eatures = ["dynamodb"] }
lance-index = { path = "../lance/rust/lance-index" }
lance-linalg = { path = "../lance/rust/lance-linalg" }
lance-testing = { path = "../lance/rust/lance-testing" }
lance-datafusion = { path = "../lance/rust/lance-datafusion" }
# Note that this one does not include pyarrow
arrow = { version = "51.0", optional = false }
arrow-array = "51.0"

View File

@@ -3,7 +3,7 @@ name = "lancedb"
# version in Cargo.toml
dependencies = [
"deprecation",
"pylance==0.14.0",
"pylance==0.13.0",
"ratelimiter~=1.0",
"requests>=2.31.0",
"retry>=0.9.2",

View File

@@ -429,10 +429,8 @@ class LanceQueryBuilder(ABC):
>>> plan = table.search(query).explain_plan(True)
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------
@@ -1215,10 +1213,8 @@ class AsyncQueryBase(object):
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
Projection: fields=[vector, _distance]
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
KNNVectorDistance: metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
KNNFlat: k=10 metric=l2
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
Parameters
----------

View File

@@ -1191,7 +1191,6 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 50000,
..Default::default()
})
.await
.unwrap()
@@ -1212,7 +1211,6 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 50000,
..Default::default()
})
.await
.unwrap()

View File

@@ -374,16 +374,6 @@ pub trait QueryBase {
/// Columns will always be returned in the order given, even if that order is different than
/// the order used when adding the data.
fn select(self, selection: Select) -> Self;
/// Only execute the query over indexed data.
///
/// This allows weak-consistent fast path for queries that only need to access the indexed data.
///
/// Users can use [`crate::Table::optimize`] to merge new data into the index, and make the
/// new data available for fast search.
///
/// By default, it is false.
fn fast_search(self) -> Self;
}
pub trait HasQuery {
@@ -405,11 +395,6 @@ impl<T: HasQuery> QueryBase for T {
self.mut_query().select = select;
self
}
fn fast_search(mut self) -> Self {
self.mut_query().fast_search = true;
self
}
}
/// Options for controlling the execution of a query
@@ -506,12 +491,6 @@ pub struct Query {
pub(crate) filter: Option<String>,
/// Select column projection.
pub(crate) select: Select,
/// If set to true, the query is executed only on the indexed data,
/// and yields faster results.
///
/// By default, this is false.
pub(crate) fast_search: bool,
}
impl Query {
@@ -521,7 +500,6 @@ impl Query {
limit: None,
filter: None,
select: Select::All,
fast_search: false,
}
}
@@ -1023,7 +1001,6 @@ mod tests {
.query()
.execute_with_options(QueryExecutionOptions {
max_batch_length: 10,
..Default::default()
})
.await
.unwrap();
@@ -1088,20 +1065,4 @@ mod tests {
.to_string()
.contains("No vector column found to match with the query vector dimension: 3"));
}
#[tokio::test]
async fn test_fast_search_plan() {
let tmp_dir = tempdir().unwrap();
let table = make_test_table(&tmp_dir).await;
let plan = table
.query()
.select(Select::columns(&["_distance"]))
.nearest_to(vec![0.1, 0.2, 0.3, 0.4])
.unwrap()
.fast_search()
.explain_plan(true)
.await
.unwrap();
assert!(!plan.contains("Take"));
}
}

View File

@@ -35,7 +35,6 @@ use lance::dataset::{
Dataset, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
};
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
use lance::index::scalar::ScalarIndexType;
use lance::io::WrappingObjectStore;
use lance_datafusion::exec::execute_plan;
use lance_index::vector::hnsw::builder::HnswBuildParams;
@@ -1504,9 +1503,7 @@ impl NativeTable {
}
let mut dataset = self.dataset.get_mut().await?;
let lance_idx_params = lance::index::scalar::ScalarIndexParams {
force_index_type: Some(ScalarIndexType::BTree),
};
let lance_idx_params = lance::index::scalar::ScalarIndexParams {};
dataset
.create_index(
&[field.name()],
@@ -1738,22 +1735,10 @@ impl TableInternal for NativeTable {
scanner.nprobs(query.nprobes);
scanner.use_index(query.use_index);
scanner.prefilter(query.prefilter);
match query.base.select {
Select::Columns(ref columns) => {
scanner.project(columns.as_slice())?;
}
Select::Dynamic(ref select_with_transform) => {
scanner.project_with_transform(select_with_transform.as_slice())?;
}
Select::All => {}
}
if let Some(opts) = options {
scanner.batch_size(opts.max_batch_length as usize);
}
if query.base.fast_search {
scanner.fast_search();
}
Ok(scanner)
}
@@ -1788,7 +1773,6 @@ impl TableInternal for NativeTable {
if let Some(distance_type) = query.distance_type {
scanner.distance_metric(distance_type.into());
}
Ok(scanner.create_plan().await?)
}