Compare commits

..

16 Commits

Author SHA1 Message Date
Lance Release
a2c2cf31d2 Bump version: 0.9.0-beta.9 → 0.9.0-beta.10 2025-05-28 19:23:35 +00:00
Lu Qiu
3bc6d0ee82 Support bypass_vector_index 2025-05-28 12:22:27 -07:00
Lance Release
6602a86dcb Bump version: 0.9.0-beta.8 → 0.9.0-beta.9 2025-05-28 16:53:20 +00:00
Lu Qiu
2025aefe80 Fix build issues 2025-05-28 09:50:42 -07:00
Lu Qiu
68724c5d57 add bypass_vector_column 2025-05-27 20:46:04 -07:00
Lance Release
1884fe8a3e Bump version: 0.9.0-beta.7 → 0.9.0-beta.8 2025-02-26 15:03:57 +00:00
Ryan Green
d8111b259c Merge remote-tracking branch 'origin/python-v0.9.4-patch' into python-v0.9.4-patch 2025-02-26 11:31:34 -03:30
Ryan Green
3c74bf5c7a Pin chrono version 2025-02-26 11:31:29 -03:30
Lance Release
b64bb75a82 Bump version: 0.9.0-beta.6 → 0.9.0-beta.7 2025-02-26 13:29:54 +00:00
Ryan Green
93e03ec702 revert worfklow 2025-02-26 09:56:08 -03:30
Ryan Green
7a94a7e171 Merge remote-tracking branch 'origin/python-v0.9.4-patch' into python-v0.9.4-patch 2025-02-26 09:52:55 -03:30
Ryan Green
acae6522fb workaround "edition2024" issue 2025-02-26 09:52:48 -03:30
Lance Release
005d5b64ac Bump version: 0.5.2 → 0.5.2-final.1 2025-02-26 13:05:01 +00:00
Lance Release
1e89d07fe2 Bump version: 0.9.0-beta.5 → 0.9.0-beta.6 2025-02-26 13:04:48 +00:00
Ryan Green
1da55719e7 fix windows workflow 2025-02-26 09:33:42 -03:30
Ryan Green
9d0ca5a823 merge PyPi Publish workflow from main 2025-02-26 09:31:18 -03:30
21 changed files with 7240 additions and 45 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.5.2" current_version = "0.5.2-final.1"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -28,7 +28,7 @@ runs:
args: ${{ inputs.args }} args: ${{ inputs.args }}
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/" docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
working-directory: python working-directory: python
- uses: actions/upload-artifact@v3 - uses: actions/upload-artifact@v4
with: with:
name: windows-wheels name: windows-wheels
path: python\target\wheels path: python\target\wheels

2
.gitignore vendored
View File

@@ -41,5 +41,3 @@ dist
target target
**/sccache.log **/sccache.log
Cargo.lock

7145
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -35,7 +35,7 @@ arrow-schema = "51.0"
arrow-arith = "51.0" arrow-arith = "51.0"
arrow-cast = "51.0" arrow-cast = "51.0"
async-trait = "0" async-trait = "0"
chrono = "0.4.35" chrono = "=0.4.39"
datafusion-physical-plan = "37.1" datafusion-physical-plan = "37.1"
half = { "version" = "=2.4.1", default-features = false, features = [ half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits", "num-traits",
@@ -45,7 +45,8 @@ log = "0.4"
object_store = "0.9.0" object_store = "0.9.0"
pin-project = "1.0.7" pin-project = "1.0.7"
snafu = "0.7.4" snafu = "0.7.4"
url = "2" url = "=2.3.1"
num-traits = "0.2" num-traits = "0.2"
regex = "1.10" regex = "1.10"
lazy_static = "1" lazy_static = "1"
napi-build = "=2.1.2"

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.5.2", "version": "0.5.2-final.1",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",

View File

@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
arrow-ipc.workspace = true arrow-ipc.workspace = true
futures.workspace = true futures.workspace = true
lancedb = { path = "../rust/lancedb" } lancedb = { path = "../rust/lancedb" }
napi = { version = "2.15", default-features = false, features = [ napi = { version = "=2.16.1", default-features = false, features = [
"napi7", "napi7",
"async", "async",
] } ] }
napi-derive = "2" napi-derive = "=2.16.1"
napi-build = "=2.1.2"
# Prevent dynamic linking of lzma, which comes from datafusion # Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] } lzma-sys = { version = "*", features = ["static"] }
[build-dependencies] [build-dependencies]
napi-build = "2.1" napi-build = "=2.1.2"

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-arm64", "name": "@lancedb/lancedb-darwin-arm64",
"version": "0.5.2", "version": "0.5.2-final.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node", "main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-darwin-x64", "name": "@lancedb/lancedb-darwin-x64",
"version": "0.5.2", "version": "0.5.2-final.1",
"os": ["darwin"], "os": ["darwin"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.darwin-x64.node", "main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-arm64-gnu", "name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.5.2", "version": "0.5.2-final.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["arm64"], "cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node", "main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-linux-x64-gnu", "name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.5.2", "version": "0.5.2-final.1",
"os": ["linux"], "os": ["linux"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node", "main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{ {
"name": "@lancedb/lancedb-win32-x64-msvc", "name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.5.2", "version": "0.5.2-final.1",
"os": ["win32"], "os": ["win32"],
"cpu": ["x64"], "cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node", "main": "lancedb.win32-x64-msvc.node",

View File

@@ -10,7 +10,7 @@
"vector database", "vector database",
"ann" "ann"
], ],
"version": "0.5.2", "version": "0.5.2-final.1",
"main": "dist/index.js", "main": "dist/index.js",
"exports": { "exports": {
".": "./dist/index.js", ".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion] [tool.bumpversion]
current_version = "0.9.0-beta.5" current_version = "0.9.0-beta.10"
parse = """(?x) parse = """(?x)
(?P<major>0|[1-9]\\d*)\\. (?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\. (?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-python" name = "lancedb-python"
version = "0.9.0-beta.5" version = "0.9.0-beta.10"
edition.workspace = true edition.workspace = true
description = "Python bindings for LanceDB" description = "Python bindings for LanceDB"
license.workspace = true license.workspace = true
@@ -19,6 +19,8 @@ lancedb = { path = "../rust/lancedb" }
env_logger = "0.10" env_logger = "0.10"
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] } pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] } pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
base64ct = "=1.6.0" # workaround for https://github.com/RustCrypto/formats/issues/1684
chrono = "=0.4.39"
# Prevent dynamic linking of lzma, which comes from datafusion # Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] } lzma-sys = { version = "*", features = ["static"] }

View File

@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
fast_search: bool = False fast_search: bool = False
bypass_vector_index: Optional[bool] = None
class LanceQueryBuilder(ABC): class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search, """An abstract query builder. Subclasses are defined for vector search,
@@ -127,14 +129,15 @@ class LanceQueryBuilder(ABC):
@classmethod @classmethod
def create( def create(
cls, cls,
table: "Table", table: "Table",
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]], query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
query_type: str, query_type: str,
vector_column_name: str, vector_column_name: str,
ordering_field_name: Optional[str] = None, ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [], fts_columns: Union[str, List[str]] = [],
fast_search: bool = False, fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceQueryBuilder: ) -> LanceQueryBuilder:
""" """
Create a query builder based on the given query and query type. Create a query builder based on the given query and query type.
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
The name of the vector column to use for vector search. The name of the vector column to use for vector search.
fast_search: bool fast_search: bool
Skip flat search of unindexed data. Skip flat search of unindexed data.
bypass_vector_index: Optional[bool]
Bypass the vector index and use a brute force search.
""" """
# Check hybrid search first as it supports empty query pattern # Check hybrid search first as it supports empty query pattern
if query_type == "hybrid": if query_type == "hybrid":
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
raise TypeError(f"Unsupported query type: {type(query)}") raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder( return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search table,
query,
vector_column_name,
str_query,
fast_search,
bypass_vector_index,
) )
@classmethod @classmethod
@@ -557,12 +567,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
""" """
def __init__( def __init__(
self, self,
table: "Table", table: "Table",
query: Union[np.ndarray, list, "PIL.Image.Image"], query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str, vector_column: str,
str_query: Optional[str] = None, str_query: Optional[str] = None,
fast_search: bool = False, fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
): ):
super().__init__(table) super().__init__(table)
self._query = query self._query = query
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._reranker = None self._reranker = None
self._str_query = str_query self._str_query = str_query
self._fast_search = fast_search self._fast_search = fast_search
self._bypass_vector_index = bypass_vector_index
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder: def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use. """Set the distance metric to use.
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
with_row_id=self._with_row_id, with_row_id=self._with_row_id,
offset=self._offset, offset=self._offset,
fast_search=self._fast_search, fast_search=self._fast_search,
bypass_vector_index=self._bypass_vector_index,
ef=self._ef, ef=self._ef,
) )
result_set = self._table._execute_query(query, batch_size) result_set = self._table._execute_query(query, batch_size)
@@ -728,7 +741,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
return self return self
def rerank( def rerank(
self, reranker: Reranker, query_string: Optional[str] = None self, reranker: Reranker, query_string: Optional[str] = None
) -> LanceVectorQueryBuilder: ) -> LanceVectorQueryBuilder:
"""Rerank the results using the specified reranker. """Rerank the results using the specified reranker.
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
def _validate_fts_index(self): def _validate_fts_index(self):
if self._table._get_fts_index_path() is None: if self._table._get_fts_index_path() is None:
raise ValueError( raise ValueError(
"Please create a full-text search index " "to perform hybrid search." "Please create a full-text search index to perform hybrid search."
) )
def _validate_query(self, query): def _validate_query(self, query):

View File

@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME vector_column: str = VECTOR_COLUMN_NAME
bypass_vector_index: Optional[bool] = None
@attrs.define @attrs.define
class VectorQueryResult: class VectorQueryResult:

View File

@@ -234,8 +234,11 @@ class RemoteTable(Table):
query_type: str = "vector", query_type: str = "vector",
vector_column_name: Optional[str] = None, vector_column_name: Optional[str] = None,
fast_search: bool = False, fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder: ) -> LanceVectorQueryBuilder:
return self.search(query, query_type, vector_column_name, fast_search) return self.search(
query, query_type, vector_column_name, fast_search, bypass_vector_index
)
def search( def search(
self, self,
@@ -243,6 +246,7 @@ class RemoteTable(Table):
query_type: str = "vector", query_type: str = "vector",
vector_column_name: Optional[str] = None, vector_column_name: Optional[str] = None,
fast_search: bool = False, fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder: ) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors """Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search] of the given query vector. We currently support [vector search][search]
@@ -294,6 +298,15 @@ class RemoteTable(Table):
search performance but search results will not include unindexed data. search performance but search results will not include unindexed data.
- *default False*. - *default False*.
bypass_vector_index: bool, optional
If True, the query will bypass the vector index and perform a full scan.
An exhaustive (flat) search will be performed. The query vector will
be compared to every vector in the table. At high scales this can be
expensive. However, this is often still useful. For example, skipping
the vector index can give you ground truth results which you can use to
calculate your recall to select an appropriate value for nprobes.
Returns Returns
------- -------
LanceQueryBuilder LanceQueryBuilder
@@ -316,6 +329,7 @@ class RemoteTable(Table):
query_type, query_type,
vector_column_name=vector_column_name, vector_column_name=vector_column_name,
fast_search=fast_search, fast_search=fast_search,
bypass_vector_index=bypass_vector_index,
) )
def _execute_query( def _execute_query(
@@ -377,9 +391,9 @@ class RemoteTable(Table):
params["on"] = merge._on[0] params["on"] = merge._on[0]
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower() params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
if merge._when_matched_update_all_condition is not None: if merge._when_matched_update_all_condition is not None:
params[ params["when_matched_update_all_filt"] = (
"when_matched_update_all_filt" merge._when_matched_update_all_condition
] = merge._when_matched_update_all_condition )
params["when_not_matched_insert_all"] = str( params["when_not_matched_insert_all"] = str(
merge._when_not_matched_insert_all merge._when_not_matched_insert_all
).lower() ).lower()
@@ -387,9 +401,9 @@ class RemoteTable(Table):
merge._when_not_matched_by_source_delete merge._when_not_matched_by_source_delete
).lower() ).lower()
if merge._when_not_matched_by_source_condition is not None: if merge._when_not_matched_by_source_condition is not None:
params[ params["when_not_matched_by_source_delete_filt"] = (
"when_not_matched_by_source_delete_filt" merge._when_not_matched_by_source_condition
] = merge._when_not_matched_by_source_condition )
self._conn._client.post( self._conn._client.post(
f"/v1/table/{self._name}/merge_insert/", f"/v1/table/{self._name}/merge_insert/",

View File

@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
table = conn["test"] table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))]) table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow()) print(
table.query([0, 0], fast_search=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)
def test_bypass_vector_query_with_filter():
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
setattr(conn, "_client", FakeLanceDBClient())
table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(
table.query([0, 0], bypass_vector_index=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb-node" name = "lancedb-node"
version = "0.5.2" version = "0.5.2-final.1"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true
edition.workspace = true edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "lancedb" name = "lancedb"
version = "0.5.2" version = "0.5.2-final.1"
edition.workspace = true edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true license.workspace = true