mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 21:39:57 +00:00
Compare commits
5 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2c2cf31d2 | ||
|
|
3bc6d0ee82 | ||
|
|
6602a86dcb | ||
|
|
2025aefe80 | ||
|
|
68724c5d57 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -41,5 +41,3 @@ dist
|
||||
target
|
||||
|
||||
**/sccache.log
|
||||
|
||||
Cargo.lock
|
||||
|
||||
7145
Cargo.lock
generated
Normal file
7145
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -45,7 +45,8 @@ log = "0.4"
|
||||
object_store = "0.9.0"
|
||||
pin-project = "1.0.7"
|
||||
snafu = "0.7.4"
|
||||
url = "2"
|
||||
url = "=2.3.1"
|
||||
num-traits = "0.2"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
|
||||
arrow-ipc.workspace = true
|
||||
futures.workspace = true
|
||||
lancedb = { path = "../rust/lancedb" }
|
||||
napi = { version = "2.15", default-features = false, features = [
|
||||
napi = { version = "=2.16.1", default-features = false, features = [
|
||||
"napi7",
|
||||
"async",
|
||||
] }
|
||||
napi-derive = "2"
|
||||
napi-derive = "=2.16.1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
lzma-sys = { version = "*", features = ["static"] }
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.9.0-beta.8"
|
||||
current_version = "0.9.0-beta.10"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.9.0-beta.8"
|
||||
version = "0.9.0-beta.10"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
|
||||
@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
bypass_vector_index: Optional[bool] = None
|
||||
|
||||
|
||||
class LanceQueryBuilder(ABC):
|
||||
"""An abstract query builder. Subclasses are defined for vector search,
|
||||
@@ -127,14 +129,15 @@ class LanceQueryBuilder(ABC):
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
table: "Table",
|
||||
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
|
||||
query_type: str,
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
cls,
|
||||
table: "Table",
|
||||
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
|
||||
query_type: str,
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceQueryBuilder:
|
||||
"""
|
||||
Create a query builder based on the given query and query type.
|
||||
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
|
||||
The name of the vector column to use for vector search.
|
||||
fast_search: bool
|
||||
Skip flat search of unindexed data.
|
||||
bypass_vector_index: Optional[bool]
|
||||
Bypass the vector index and use a brute force search.
|
||||
"""
|
||||
# Check hybrid search first as it supports empty query pattern
|
||||
if query_type == "hybrid":
|
||||
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
|
||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||
|
||||
return LanceVectorQueryBuilder(
|
||||
table, query, vector_column_name, str_query, fast_search
|
||||
table,
|
||||
query,
|
||||
vector_column_name,
|
||||
str_query,
|
||||
fast_search,
|
||||
bypass_vector_index,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -557,12 +567,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
table: "Table",
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
self,
|
||||
table: "Table",
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._reranker = None
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
self._bypass_vector_index = bypass_vector_index
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
with_row_id=self._with_row_id,
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
bypass_vector_index=self._bypass_vector_index,
|
||||
ef=self._ef,
|
||||
)
|
||||
result_set = self._table._execute_query(query, batch_size)
|
||||
@@ -728,7 +741,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
return self
|
||||
|
||||
def rerank(
|
||||
self, reranker: Reranker, query_string: Optional[str] = None
|
||||
self, reranker: Reranker, query_string: Optional[str] = None
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Rerank the results using the specified reranker.
|
||||
|
||||
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
def _validate_fts_index(self):
|
||||
if self._table._get_fts_index_path() is None:
|
||||
raise ValueError(
|
||||
"Please create a full-text search index " "to perform hybrid search."
|
||||
"Please create a full-text search index to perform hybrid search."
|
||||
)
|
||||
|
||||
def _validate_query(self, query):
|
||||
|
||||
@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
|
||||
|
||||
vector_column: str = VECTOR_COLUMN_NAME
|
||||
|
||||
bypass_vector_index: Optional[bool] = None
|
||||
|
||||
|
||||
@attrs.define
|
||||
class VectorQueryResult:
|
||||
|
||||
@@ -234,8 +234,11 @@ class RemoteTable(Table):
|
||||
query_type: str = "vector",
|
||||
vector_column_name: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
return self.search(query, query_type, vector_column_name, fast_search)
|
||||
return self.search(
|
||||
query, query_type, vector_column_name, fast_search, bypass_vector_index
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -243,6 +246,7 @@ class RemoteTable(Table):
|
||||
query_type: str = "vector",
|
||||
vector_column_name: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector. We currently support [vector search][search]
|
||||
@@ -294,6 +298,15 @@ class RemoteTable(Table):
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
|
||||
bypass_vector_index: bool, optional
|
||||
If True, the query will bypass the vector index and perform a full scan.
|
||||
An exhaustive (flat) search will be performed. The query vector will
|
||||
be compared to every vector in the table. At high scales this can be
|
||||
expensive. However, this is often still useful. For example, skipping
|
||||
the vector index can give you ground truth results which you can use to
|
||||
calculate your recall to select an appropriate value for nprobes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
@@ -316,6 +329,7 @@ class RemoteTable(Table):
|
||||
query_type,
|
||||
vector_column_name=vector_column_name,
|
||||
fast_search=fast_search,
|
||||
bypass_vector_index=bypass_vector_index,
|
||||
)
|
||||
|
||||
def _execute_query(
|
||||
@@ -377,9 +391,9 @@ class RemoteTable(Table):
|
||||
params["on"] = merge._on[0]
|
||||
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
|
||||
if merge._when_matched_update_all_condition is not None:
|
||||
params[
|
||||
"when_matched_update_all_filt"
|
||||
] = merge._when_matched_update_all_condition
|
||||
params["when_matched_update_all_filt"] = (
|
||||
merge._when_matched_update_all_condition
|
||||
)
|
||||
params["when_not_matched_insert_all"] = str(
|
||||
merge._when_not_matched_insert_all
|
||||
).lower()
|
||||
@@ -387,9 +401,9 @@ class RemoteTable(Table):
|
||||
merge._when_not_matched_by_source_delete
|
||||
).lower()
|
||||
if merge._when_not_matched_by_source_condition is not None:
|
||||
params[
|
||||
"when_not_matched_by_source_delete_filt"
|
||||
] = merge._when_not_matched_by_source_condition
|
||||
params["when_not_matched_by_source_delete_filt"] = (
|
||||
merge._when_not_matched_by_source_condition
|
||||
)
|
||||
|
||||
self._conn._client.post(
|
||||
f"/v1/table/{self._name}/merge_insert/",
|
||||
|
||||
@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
|
||||
|
||||
table = conn["test"]
|
||||
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow())
|
||||
print(
|
||||
table.query([0, 0], fast_search=True)
|
||||
.select(["vector"])
|
||||
.where("foo == bar")
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
|
||||
def test_bypass_vector_query_with_filter():
|
||||
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
|
||||
setattr(conn, "_client", FakeLanceDBClient())
|
||||
|
||||
table = conn["test"]
|
||||
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||
print(
|
||||
table.query([0, 0], bypass_vector_index=True)
|
||||
.select(["vector"])
|
||||
.where("foo == bar")
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user