Compare commits

...

8 Commits

Author SHA1 Message Date
Lance Release
a2c2cf31d2 Bump version: 0.9.0-beta.9 → 0.9.0-beta.10 2025-05-28 19:23:35 +00:00
Lu Qiu
3bc6d0ee82 Support bypass_vector_index 2025-05-28 12:22:27 -07:00
Lance Release
6602a86dcb Bump version: 0.9.0-beta.8 → 0.9.0-beta.9 2025-05-28 16:53:20 +00:00
Lu Qiu
2025aefe80 Fix build issues 2025-05-28 09:50:42 -07:00
Lu Qiu
68724c5d57 add bypass_vector_column 2025-05-27 20:46:04 -07:00
Lance Release
1884fe8a3e Bump version: 0.9.0-beta.7 → 0.9.0-beta.8 2025-02-26 15:03:57 +00:00
Ryan Green
d8111b259c Merge remote-tracking branch 'origin/python-v0.9.4-patch' into python-v0.9.4-patch 2025-02-26 11:31:34 -03:30
Ryan Green
3c74bf5c7a Pin chrono version 2025-02-26 11:31:29 -03:30
10 changed files with 7228 additions and 34 deletions

2
.gitignore vendored
View File

@@ -41,5 +41,3 @@ dist
target
**/sccache.log
Cargo.lock

7145
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -35,7 +35,7 @@ arrow-schema = "51.0"
arrow-arith = "51.0"
arrow-cast = "51.0"
async-trait = "0"
chrono = "0.4.35"
chrono = "=0.4.39"
datafusion-physical-plan = "37.1"
half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits",
@@ -45,7 +45,8 @@ log = "0.4"
object_store = "0.9.0"
pin-project = "1.0.7"
snafu = "0.7.4"
url = "2"
url = "=2.3.1"
num-traits = "0.2"
regex = "1.10"
lazy_static = "1"
napi-build = "=2.1.2"

View File

@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
arrow-ipc.workspace = true
futures.workspace = true
lancedb = { path = "../rust/lancedb" }
napi = { version = "2.15", default-features = false, features = [
napi = { version = "=2.16.1", default-features = false, features = [
"napi7",
"async",
] }
napi-derive = "2"
napi-derive = "=2.16.1"
napi-build = "=2.1.2"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] }
[build-dependencies]
napi-build = "2.1"
napi-build = "=2.1.2"

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.9.0-beta.7"
current_version = "0.9.0-beta.10"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.9.0-beta.7"
version = "0.9.0-beta.10"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
@@ -20,6 +20,7 @@ env_logger = "0.10"
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
base64ct = "=1.6.0" # workaround for https://github.com/RustCrypto/formats/issues/1684
chrono = "=0.4.39"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] }

View File

@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
fast_search: bool = False
bypass_vector_index: Optional[bool] = None
class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search,
@@ -127,14 +129,15 @@ class LanceQueryBuilder(ABC):
@classmethod
def create(
cls,
table: "Table",
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
query_type: str,
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
cls,
table: "Table",
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
query_type: str,
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
bypass_vector_index: Optional[bool]
Bypass the vector index and use a brute force search.
"""
# Check hybrid search first as it supports empty query pattern
if query_type == "hybrid":
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
table,
query,
vector_column_name,
str_query,
fast_search,
bypass_vector_index,
)
@classmethod
@@ -557,12 +567,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
"""
def __init__(
self,
table: "Table",
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
self,
table: "Table",
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
):
super().__init__(table)
self._query = query
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._reranker = None
self._str_query = str_query
self._fast_search = fast_search
self._bypass_vector_index = bypass_vector_index
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
with_row_id=self._with_row_id,
offset=self._offset,
fast_search=self._fast_search,
bypass_vector_index=self._bypass_vector_index,
ef=self._ef,
)
result_set = self._table._execute_query(query, batch_size)
@@ -728,7 +741,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
return self
def rerank(
self, reranker: Reranker, query_string: Optional[str] = None
self, reranker: Reranker, query_string: Optional[str] = None
) -> LanceVectorQueryBuilder:
"""Rerank the results using the specified reranker.
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
def _validate_fts_index(self):
if self._table._get_fts_index_path() is None:
raise ValueError(
"Please create a full-text search index " "to perform hybrid search."
"Please create a full-text search index to perform hybrid search."
)
def _validate_query(self, query):

View File

@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
bypass_vector_index: Optional[bool] = None
@attrs.define
class VectorQueryResult:

View File

@@ -234,8 +234,11 @@ class RemoteTable(Table):
query_type: str = "vector",
vector_column_name: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder:
return self.search(query, query_type, vector_column_name, fast_search)
return self.search(
query, query_type, vector_column_name, fast_search, bypass_vector_index
)
def search(
self,
@@ -243,6 +246,7 @@ class RemoteTable(Table):
query_type: str = "vector",
vector_column_name: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search]
@@ -294,6 +298,15 @@ class RemoteTable(Table):
search performance but search results will not include unindexed data.
- *default False*.
bypass_vector_index: bool, optional
If True, the query will bypass the vector index and perform a full scan.
An exhaustive (flat) search will be performed. The query vector will
be compared to every vector in the table. At high scales this can be
expensive. However, this is often still useful. For example, skipping
the vector index can give you ground truth results which you can use to
calculate your recall to select an appropriate value for nprobes.
Returns
-------
LanceQueryBuilder
@@ -316,6 +329,7 @@ class RemoteTable(Table):
query_type,
vector_column_name=vector_column_name,
fast_search=fast_search,
bypass_vector_index=bypass_vector_index,
)
def _execute_query(
@@ -377,9 +391,9 @@ class RemoteTable(Table):
params["on"] = merge._on[0]
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
if merge._when_matched_update_all_condition is not None:
params[
"when_matched_update_all_filt"
] = merge._when_matched_update_all_condition
params["when_matched_update_all_filt"] = (
merge._when_matched_update_all_condition
)
params["when_not_matched_insert_all"] = str(
merge._when_not_matched_insert_all
).lower()
@@ -387,9 +401,9 @@ class RemoteTable(Table):
merge._when_not_matched_by_source_delete
).lower()
if merge._when_not_matched_by_source_condition is not None:
params[
"when_not_matched_by_source_delete_filt"
] = merge._when_not_matched_by_source_condition
params["when_not_matched_by_source_delete_filt"] = (
merge._when_not_matched_by_source_condition
)
self._conn._client.post(
f"/v1/table/{self._name}/merge_insert/",

View File

@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow())
print(
table.query([0, 0], fast_search=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)
def test_bypass_vector_query_with_filter():
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
setattr(conn, "_client", FakeLanceDBClient())
table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(
table.query([0, 0], bypass_vector_index=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)