mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-05 11:22:58 +00:00
Compare commits
2 Commits
lance-14.1
...
lei/lance-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4278862205 | ||
|
|
e06a63f427 |
@@ -20,18 +20,11 @@ keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
|
||||
[workspace.dependencies]
|
||||
# lance = { "version" = "=0.14.0", "features" = ["dynamodb"] }
|
||||
# lance-index = { "version" = "=0.14.0" }
|
||||
# lance-linalg = { "version" = "=0.14.0" }
|
||||
# lance-testing = { "version" = "=0.14.0" }
|
||||
# lance-datafusion = { "version" = "=0.14.0" }
|
||||
|
||||
lance = { path = "../lance/rust/lance", "features" = ["dynamodb"] }
|
||||
lance = { path = "../lance/rust/lance", eatures = ["dynamodb"] }
|
||||
lance-index = { path = "../lance/rust/lance-index" }
|
||||
lance-linalg = { path = "../lance/rust/lance-linalg" }
|
||||
lance-testing = { path = "../lance/rust/lance-testing" }
|
||||
lance-datafusion = { path = "../lance/rust/lance-datafusion" }
|
||||
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "51.0", optional = false }
|
||||
arrow-array = "51.0"
|
||||
|
||||
@@ -3,7 +3,7 @@ name = "lancedb"
|
||||
# version in Cargo.toml
|
||||
dependencies = [
|
||||
"deprecation",
|
||||
"pylance==0.14.0",
|
||||
"pylance==0.13.0",
|
||||
"ratelimiter~=1.0",
|
||||
"requests>=2.31.0",
|
||||
"retry>=0.9.2",
|
||||
|
||||
@@ -429,10 +429,8 @@ class LanceQueryBuilder(ABC):
|
||||
>>> plan = table.search(query).explain_plan(True)
|
||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
Projection: fields=[vector, _distance]
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
KNNFlat: k=10 metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -1215,10 +1213,8 @@ class AsyncQueryBase(object):
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
Projection: fields=[vector, _distance]
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
KNNFlat: k=10 metric=l2
|
||||
LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
@@ -1191,7 +1191,6 @@ mod tests {
|
||||
.query()
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 50000,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
@@ -1212,7 +1211,6 @@ mod tests {
|
||||
.query()
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 50000,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
|
||||
@@ -374,16 +374,6 @@ pub trait QueryBase {
|
||||
/// Columns will always be returned in the order given, even if that order is different than
|
||||
/// the order used when adding the data.
|
||||
fn select(self, selection: Select) -> Self;
|
||||
|
||||
/// Only execute the query over indexed data.
|
||||
///
|
||||
/// This allows weak-consistent fast path for queries that only need to access the indexed data.
|
||||
///
|
||||
/// Users can use [`crate::Table::optimize`] to merge new data into the index, and make the
|
||||
/// new data available for fast search.
|
||||
///
|
||||
/// By default, it is false.
|
||||
fn fast_search(self) -> Self;
|
||||
}
|
||||
|
||||
pub trait HasQuery {
|
||||
@@ -405,11 +395,6 @@ impl<T: HasQuery> QueryBase for T {
|
||||
self.mut_query().select = select;
|
||||
self
|
||||
}
|
||||
|
||||
fn fast_search(mut self) -> Self {
|
||||
self.mut_query().fast_search = true;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for controlling the execution of a query
|
||||
@@ -506,12 +491,6 @@ pub struct Query {
|
||||
pub(crate) filter: Option<String>,
|
||||
/// Select column projection.
|
||||
pub(crate) select: Select,
|
||||
|
||||
/// If set to true, the query is executed only on the indexed data,
|
||||
/// and yields faster results.
|
||||
///
|
||||
/// By default, this is false.
|
||||
pub(crate) fast_search: bool,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
@@ -521,7 +500,6 @@ impl Query {
|
||||
limit: None,
|
||||
filter: None,
|
||||
select: Select::All,
|
||||
fast_search: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1023,7 +1001,6 @@ mod tests {
|
||||
.query()
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 10,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1088,20 +1065,4 @@ mod tests {
|
||||
.to_string()
|
||||
.contains("No vector column found to match with the query vector dimension: 3"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_fast_search_plan() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let table = make_test_table(&tmp_dir).await;
|
||||
let plan = table
|
||||
.query()
|
||||
.select(Select::columns(&["_distance"]))
|
||||
.nearest_to(vec![0.1, 0.2, 0.3, 0.4])
|
||||
.unwrap()
|
||||
.fast_search()
|
||||
.explain_plan(true)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(!plan.contains("Take"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@ use lance::dataset::{
|
||||
Dataset, UpdateBuilder as LanceUpdateBuilder, WhenMatched, WriteMode, WriteParams,
|
||||
};
|
||||
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
|
||||
use lance::index::scalar::ScalarIndexType;
|
||||
use lance::io::WrappingObjectStore;
|
||||
use lance_datafusion::exec::execute_plan;
|
||||
use lance_index::vector::hnsw::builder::HnswBuildParams;
|
||||
@@ -1504,9 +1503,7 @@ impl NativeTable {
|
||||
}
|
||||
|
||||
let mut dataset = self.dataset.get_mut().await?;
|
||||
let lance_idx_params = lance::index::scalar::ScalarIndexParams {
|
||||
force_index_type: Some(ScalarIndexType::BTree),
|
||||
};
|
||||
let lance_idx_params = lance::index::scalar::ScalarIndexParams {};
|
||||
dataset
|
||||
.create_index(
|
||||
&[field.name()],
|
||||
@@ -1738,22 +1735,10 @@ impl TableInternal for NativeTable {
|
||||
scanner.nprobs(query.nprobes);
|
||||
scanner.use_index(query.use_index);
|
||||
scanner.prefilter(query.prefilter);
|
||||
match query.base.select {
|
||||
Select::Columns(ref columns) => {
|
||||
scanner.project(columns.as_slice())?;
|
||||
}
|
||||
Select::Dynamic(ref select_with_transform) => {
|
||||
scanner.project_with_transform(select_with_transform.as_slice())?;
|
||||
}
|
||||
Select::All => {}
|
||||
}
|
||||
|
||||
if let Some(opts) = options {
|
||||
scanner.batch_size(opts.max_batch_length as usize);
|
||||
}
|
||||
if query.base.fast_search {
|
||||
scanner.fast_search();
|
||||
}
|
||||
|
||||
Ok(scanner)
|
||||
}
|
||||
@@ -1788,7 +1773,6 @@ impl TableInternal for NativeTable {
|
||||
if let Some(distance_type) = query.distance_type {
|
||||
scanner.distance_metric(distance_type.into());
|
||||
}
|
||||
|
||||
Ok(scanner.create_plan().await?)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user