mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
Compare commits
5 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2c2cf31d2 | ||
|
|
3bc6d0ee82 | ||
|
|
6602a86dcb | ||
|
|
2025aefe80 | ||
|
|
68724c5d57 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -41,5 +41,3 @@ dist
|
|||||||
target
|
target
|
||||||
|
|
||||||
**/sccache.log
|
**/sccache.log
|
||||||
|
|
||||||
Cargo.lock
|
|
||||||
|
|||||||
7145
Cargo.lock
generated
Normal file
7145
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -45,7 +45,8 @@ log = "0.4"
|
|||||||
object_store = "0.9.0"
|
object_store = "0.9.0"
|
||||||
pin-project = "1.0.7"
|
pin-project = "1.0.7"
|
||||||
snafu = "0.7.4"
|
snafu = "0.7.4"
|
||||||
url = "2"
|
url = "=2.3.1"
|
||||||
num-traits = "0.2"
|
num-traits = "0.2"
|
||||||
regex = "1.10"
|
regex = "1.10"
|
||||||
lazy_static = "1"
|
lazy_static = "1"
|
||||||
|
napi-build = "=2.1.2"
|
||||||
|
|||||||
@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
|
|||||||
arrow-ipc.workspace = true
|
arrow-ipc.workspace = true
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
lancedb = { path = "../rust/lancedb" }
|
lancedb = { path = "../rust/lancedb" }
|
||||||
napi = { version = "2.15", default-features = false, features = [
|
napi = { version = "=2.16.1", default-features = false, features = [
|
||||||
"napi7",
|
"napi7",
|
||||||
"async",
|
"async",
|
||||||
] }
|
] }
|
||||||
napi-derive = "2"
|
napi-derive = "=2.16.1"
|
||||||
|
napi-build = "=2.1.2"
|
||||||
|
|
||||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||||
lzma-sys = { version = "*", features = ["static"] }
|
lzma-sys = { version = "*", features = ["static"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
napi-build = "2.1"
|
napi-build = "=2.1.2"
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.9.0-beta.8"
|
current_version = "0.9.0-beta.10"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.9.0-beta.8"
|
version = "0.9.0-beta.10"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
|
|||||||
|
|
||||||
fast_search: bool = False
|
fast_search: bool = False
|
||||||
|
|
||||||
|
bypass_vector_index: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
class LanceQueryBuilder(ABC):
|
class LanceQueryBuilder(ABC):
|
||||||
"""An abstract query builder. Subclasses are defined for vector search,
|
"""An abstract query builder. Subclasses are defined for vector search,
|
||||||
@@ -135,6 +137,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
fts_columns: Union[str, List[str]] = [],
|
fts_columns: Union[str, List[str]] = [],
|
||||||
fast_search: bool = False,
|
fast_search: bool = False,
|
||||||
|
bypass_vector_index: Optional[bool] = None,
|
||||||
) -> LanceQueryBuilder:
|
) -> LanceQueryBuilder:
|
||||||
"""
|
"""
|
||||||
Create a query builder based on the given query and query type.
|
Create a query builder based on the given query and query type.
|
||||||
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
|
|||||||
The name of the vector column to use for vector search.
|
The name of the vector column to use for vector search.
|
||||||
fast_search: bool
|
fast_search: bool
|
||||||
Skip flat search of unindexed data.
|
Skip flat search of unindexed data.
|
||||||
|
bypass_vector_index: Optional[bool]
|
||||||
|
Bypass the vector index and use a brute force search.
|
||||||
"""
|
"""
|
||||||
# Check hybrid search first as it supports empty query pattern
|
# Check hybrid search first as it supports empty query pattern
|
||||||
if query_type == "hybrid":
|
if query_type == "hybrid":
|
||||||
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
|
|||||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||||
|
|
||||||
return LanceVectorQueryBuilder(
|
return LanceVectorQueryBuilder(
|
||||||
table, query, vector_column_name, str_query, fast_search
|
table,
|
||||||
|
query,
|
||||||
|
vector_column_name,
|
||||||
|
str_query,
|
||||||
|
fast_search,
|
||||||
|
bypass_vector_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -563,6 +573,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
vector_column: str,
|
vector_column: str,
|
||||||
str_query: Optional[str] = None,
|
str_query: Optional[str] = None,
|
||||||
fast_search: bool = False,
|
fast_search: bool = False,
|
||||||
|
bypass_vector_index: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
super().__init__(table)
|
super().__init__(table)
|
||||||
self._query = query
|
self._query = query
|
||||||
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._reranker = None
|
self._reranker = None
|
||||||
self._str_query = str_query
|
self._str_query = str_query
|
||||||
self._fast_search = fast_search
|
self._fast_search = fast_search
|
||||||
|
self._bypass_vector_index = bypass_vector_index
|
||||||
|
|
||||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||||
"""Set the distance metric to use.
|
"""Set the distance metric to use.
|
||||||
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
with_row_id=self._with_row_id,
|
with_row_id=self._with_row_id,
|
||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
fast_search=self._fast_search,
|
fast_search=self._fast_search,
|
||||||
|
bypass_vector_index=self._bypass_vector_index,
|
||||||
ef=self._ef,
|
ef=self._ef,
|
||||||
)
|
)
|
||||||
result_set = self._table._execute_query(query, batch_size)
|
result_set = self._table._execute_query(query, batch_size)
|
||||||
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
|||||||
def _validate_fts_index(self):
|
def _validate_fts_index(self):
|
||||||
if self._table._get_fts_index_path() is None:
|
if self._table._get_fts_index_path() is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Please create a full-text search index " "to perform hybrid search."
|
"Please create a full-text search index to perform hybrid search."
|
||||||
)
|
)
|
||||||
|
|
||||||
def _validate_query(self, query):
|
def _validate_query(self, query):
|
||||||
|
|||||||
@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
|
|||||||
|
|
||||||
vector_column: str = VECTOR_COLUMN_NAME
|
vector_column: str = VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
|
bypass_vector_index: Optional[bool] = None
|
||||||
|
|
||||||
|
|
||||||
@attrs.define
|
@attrs.define
|
||||||
class VectorQueryResult:
|
class VectorQueryResult:
|
||||||
|
|||||||
@@ -234,8 +234,11 @@ class RemoteTable(Table):
|
|||||||
query_type: str = "vector",
|
query_type: str = "vector",
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
fast_search: bool = False,
|
fast_search: bool = False,
|
||||||
|
bypass_vector_index: Optional[bool] = None,
|
||||||
) -> LanceVectorQueryBuilder:
|
) -> LanceVectorQueryBuilder:
|
||||||
return self.search(query, query_type, vector_column_name, fast_search)
|
return self.search(
|
||||||
|
query, query_type, vector_column_name, fast_search, bypass_vector_index
|
||||||
|
)
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
@@ -243,6 +246,7 @@ class RemoteTable(Table):
|
|||||||
query_type: str = "vector",
|
query_type: str = "vector",
|
||||||
vector_column_name: Optional[str] = None,
|
vector_column_name: Optional[str] = None,
|
||||||
fast_search: bool = False,
|
fast_search: bool = False,
|
||||||
|
bypass_vector_index: Optional[bool] = None,
|
||||||
) -> LanceVectorQueryBuilder:
|
) -> LanceVectorQueryBuilder:
|
||||||
"""Create a search query to find the nearest neighbors
|
"""Create a search query to find the nearest neighbors
|
||||||
of the given query vector. We currently support [vector search][search]
|
of the given query vector. We currently support [vector search][search]
|
||||||
@@ -294,6 +298,15 @@ class RemoteTable(Table):
|
|||||||
search performance but search results will not include unindexed data.
|
search performance but search results will not include unindexed data.
|
||||||
|
|
||||||
- *default False*.
|
- *default False*.
|
||||||
|
|
||||||
|
bypass_vector_index: bool, optional
|
||||||
|
If True, the query will bypass the vector index and perform a full scan.
|
||||||
|
An exhaustive (flat) search will be performed. The query vector will
|
||||||
|
be compared to every vector in the table. At high scales this can be
|
||||||
|
expensive. However, this is often still useful. For example, skipping
|
||||||
|
the vector index can give you ground truth results which you can use to
|
||||||
|
calculate your recall to select an appropriate value for nprobes.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
LanceQueryBuilder
|
LanceQueryBuilder
|
||||||
@@ -316,6 +329,7 @@ class RemoteTable(Table):
|
|||||||
query_type,
|
query_type,
|
||||||
vector_column_name=vector_column_name,
|
vector_column_name=vector_column_name,
|
||||||
fast_search=fast_search,
|
fast_search=fast_search,
|
||||||
|
bypass_vector_index=bypass_vector_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _execute_query(
|
def _execute_query(
|
||||||
@@ -377,9 +391,9 @@ class RemoteTable(Table):
|
|||||||
params["on"] = merge._on[0]
|
params["on"] = merge._on[0]
|
||||||
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
|
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
|
||||||
if merge._when_matched_update_all_condition is not None:
|
if merge._when_matched_update_all_condition is not None:
|
||||||
params[
|
params["when_matched_update_all_filt"] = (
|
||||||
"when_matched_update_all_filt"
|
merge._when_matched_update_all_condition
|
||||||
] = merge._when_matched_update_all_condition
|
)
|
||||||
params["when_not_matched_insert_all"] = str(
|
params["when_not_matched_insert_all"] = str(
|
||||||
merge._when_not_matched_insert_all
|
merge._when_not_matched_insert_all
|
||||||
).lower()
|
).lower()
|
||||||
@@ -387,9 +401,9 @@ class RemoteTable(Table):
|
|||||||
merge._when_not_matched_by_source_delete
|
merge._when_not_matched_by_source_delete
|
||||||
).lower()
|
).lower()
|
||||||
if merge._when_not_matched_by_source_condition is not None:
|
if merge._when_not_matched_by_source_condition is not None:
|
||||||
params[
|
params["when_not_matched_by_source_delete_filt"] = (
|
||||||
"when_not_matched_by_source_delete_filt"
|
merge._when_not_matched_by_source_condition
|
||||||
] = merge._when_not_matched_by_source_condition
|
)
|
||||||
|
|
||||||
self._conn._client.post(
|
self._conn._client.post(
|
||||||
f"/v1/table/{self._name}/merge_insert/",
|
f"/v1/table/{self._name}/merge_insert/",
|
||||||
|
|||||||
@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
|
|||||||
|
|
||||||
table = conn["test"]
|
table = conn["test"]
|
||||||
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||||
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow())
|
print(
|
||||||
|
table.query([0, 0], fast_search=True)
|
||||||
|
.select(["vector"])
|
||||||
|
.where("foo == bar")
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bypass_vector_query_with_filter():
|
||||||
|
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
|
||||||
|
setattr(conn, "_client", FakeLanceDBClient())
|
||||||
|
|
||||||
|
table = conn["test"]
|
||||||
|
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||||
|
print(
|
||||||
|
table.query([0, 0], bypass_vector_index=True)
|
||||||
|
.select(["vector"])
|
||||||
|
.where("foo == bar")
|
||||||
|
.to_arrow()
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user