Compare commits

...

13 Commits

Author SHA1 Message Date
Lance Release
a2c2cf31d2 Bump version: 0.9.0-beta.9 → 0.9.0-beta.10 2025-05-28 19:23:35 +00:00
Lu Qiu
3bc6d0ee82 Support bypass_vector_index 2025-05-28 12:22:27 -07:00
Lance Release
6602a86dcb Bump version: 0.9.0-beta.8 → 0.9.0-beta.9 2025-05-28 16:53:20 +00:00
Lu Qiu
2025aefe80 Fix build issues 2025-05-28 09:50:42 -07:00
Lu Qiu
68724c5d57 add bypass_vector_column 2025-05-27 20:46:04 -07:00
Lance Release
1884fe8a3e Bump version: 0.9.0-beta.7 → 0.9.0-beta.8 2025-02-26 15:03:57 +00:00
Ryan Green
d8111b259c Merge remote-tracking branch 'origin/python-v0.9.4-patch' into python-v0.9.4-patch 2025-02-26 11:31:34 -03:30
Ryan Green
3c74bf5c7a Pin chrono version 2025-02-26 11:31:29 -03:30
Lance Release
b64bb75a82 Bump version: 0.9.0-beta.6 → 0.9.0-beta.7 2025-02-26 13:29:54 +00:00
Ryan Green
93e03ec702 revert worfklow 2025-02-26 09:56:08 -03:30
Ryan Green
7a94a7e171 Merge remote-tracking branch 'origin/python-v0.9.4-patch' into python-v0.9.4-patch 2025-02-26 09:52:55 -03:30
Ryan Green
acae6522fb workaround "edition2024" issue 2025-02-26 09:52:48 -03:30
Lance Release
005d5b64ac Bump version: 0.5.2 → 0.5.2-final.1 2025-02-26 13:05:01 +00:00
21 changed files with 7245 additions and 56 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.5.2"
current_version = "0.5.2-final.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -15,21 +15,15 @@ jobs:
- platform: x86_64
manylinux: "2_17"
extra_args: ""
runner: ubuntu-22.04
- platform: x86_64
manylinux: "2_28"
extra_args: "--features fp16kernels"
runner: ubuntu-22.04
- platform: aarch64
manylinux: "2_17"
manylinux: "2_24"
extra_args: ""
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
runner: ubuntu-2404-8x-arm64
- platform: aarch64
manylinux: "2_28"
extra_args: "--features fp16kernels"
runner: ubuntu-2404-8x-arm64
runs-on: ${{ matrix.config.runner }}
# We don't build fp16 kernels for aarch64, because it uses
# cross compilation image, which doesn't have a new enough compiler.
runs-on: "ubuntu-22.04"
steps:
- uses: actions/checkout@v4
with:
@@ -89,7 +83,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.12
python-version: 3.8
- uses: ./.github/workflows/build_windows_wheel
with:
python-minor-version: 8
@@ -163,4 +157,4 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}
generate_release_notes: false
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
body: ${{ steps.python_release_notes.outputs.changelog }}
body: ${{ steps.python_release_notes.outputs.changelog }}

2
.gitignore vendored
View File

@@ -41,5 +41,3 @@ dist
target
**/sccache.log
Cargo.lock

7145
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -35,7 +35,7 @@ arrow-schema = "51.0"
arrow-arith = "51.0"
arrow-cast = "51.0"
async-trait = "0"
chrono = "0.4.35"
chrono = "=0.4.39"
datafusion-physical-plan = "37.1"
half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits",
@@ -45,7 +45,8 @@ log = "0.4"
object_store = "0.9.0"
pin-project = "1.0.7"
snafu = "0.7.4"
url = "2"
url = "=2.3.1"
num-traits = "0.2"
regex = "1.10"
lazy_static = "1"
napi-build = "=2.1.2"

View File

@@ -1,6 +1,6 @@
{
"name": "vectordb",
"version": "0.5.2",
"version": "0.5.2-final.1",
"description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
arrow-ipc.workspace = true
futures.workspace = true
lancedb = { path = "../rust/lancedb" }
napi = { version = "2.15", default-features = false, features = [
napi = { version = "=2.16.1", default-features = false, features = [
"napi7",
"async",
] }
napi-derive = "2"
napi-derive = "=2.16.1"
napi-build = "=2.1.2"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] }
[build-dependencies]
napi-build = "2.1"
napi-build = "=2.1.2"

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.5.2",
"version": "0.5.2-final.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.5.2",
"version": "0.5.2-final.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.5.2",
"version": "0.5.2-final.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.5.2",
"version": "0.5.2-final.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.5.2",
"version": "0.5.2-final.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -10,7 +10,7 @@
"vector database",
"ann"
],
"version": "0.5.2",
"version": "0.5.2-final.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.9.0-beta.6"
current_version = "0.9.0-beta.10"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.9.0-beta.6"
version = "0.9.0-beta.10"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
@@ -19,6 +19,8 @@ lancedb = { path = "../rust/lancedb" }
env_logger = "0.10"
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
base64ct = "=1.6.0" # workaround for https://github.com/RustCrypto/formats/issues/1684
chrono = "=0.4.39"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] }

View File

@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
fast_search: bool = False
bypass_vector_index: Optional[bool] = None
class LanceQueryBuilder(ABC):
"""An abstract query builder. Subclasses are defined for vector search,
@@ -127,14 +129,15 @@ class LanceQueryBuilder(ABC):
@classmethod
def create(
cls,
table: "Table",
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
query_type: str,
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
cls,
table: "Table",
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
query_type: str,
vector_column_name: str,
ordering_field_name: Optional[str] = None,
fts_columns: Union[str, List[str]] = [],
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceQueryBuilder:
"""
Create a query builder based on the given query and query type.
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
The name of the vector column to use for vector search.
fast_search: bool
Skip flat search of unindexed data.
bypass_vector_index: Optional[bool]
Bypass the vector index and use a brute force search.
"""
# Check hybrid search first as it supports empty query pattern
if query_type == "hybrid":
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
raise TypeError(f"Unsupported query type: {type(query)}")
return LanceVectorQueryBuilder(
table, query, vector_column_name, str_query, fast_search
table,
query,
vector_column_name,
str_query,
fast_search,
bypass_vector_index,
)
@classmethod
@@ -557,12 +567,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
"""
def __init__(
self,
table: "Table",
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
self,
table: "Table",
query: Union[np.ndarray, list, "PIL.Image.Image"],
vector_column: str,
str_query: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
):
super().__init__(table)
self._query = query
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._reranker = None
self._str_query = str_query
self._fast_search = fast_search
self._bypass_vector_index = bypass_vector_index
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
with_row_id=self._with_row_id,
offset=self._offset,
fast_search=self._fast_search,
bypass_vector_index=self._bypass_vector_index,
ef=self._ef,
)
result_set = self._table._execute_query(query, batch_size)
@@ -728,7 +741,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
return self
def rerank(
self, reranker: Reranker, query_string: Optional[str] = None
self, reranker: Reranker, query_string: Optional[str] = None
) -> LanceVectorQueryBuilder:
"""Rerank the results using the specified reranker.
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
def _validate_fts_index(self):
if self._table._get_fts_index_path() is None:
raise ValueError(
"Please create a full-text search index " "to perform hybrid search."
"Please create a full-text search index to perform hybrid search."
)
def _validate_query(self, query):

View File

@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
bypass_vector_index: Optional[bool] = None
@attrs.define
class VectorQueryResult:

View File

@@ -234,8 +234,11 @@ class RemoteTable(Table):
query_type: str = "vector",
vector_column_name: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder:
return self.search(query, query_type, vector_column_name, fast_search)
return self.search(
query, query_type, vector_column_name, fast_search, bypass_vector_index
)
def search(
self,
@@ -243,6 +246,7 @@ class RemoteTable(Table):
query_type: str = "vector",
vector_column_name: Optional[str] = None,
fast_search: bool = False,
bypass_vector_index: Optional[bool] = None,
) -> LanceVectorQueryBuilder:
"""Create a search query to find the nearest neighbors
of the given query vector. We currently support [vector search][search]
@@ -294,6 +298,15 @@ class RemoteTable(Table):
search performance but search results will not include unindexed data.
- *default False*.
bypass_vector_index: bool, optional
If True, the query will bypass the vector index and perform a full scan.
An exhaustive (flat) search will be performed. The query vector will
be compared to every vector in the table. At high scales this can be
expensive. However, this is often still useful. For example, skipping
the vector index can give you ground truth results which you can use to
calculate your recall to select an appropriate value for nprobes.
Returns
-------
LanceQueryBuilder
@@ -316,6 +329,7 @@ class RemoteTable(Table):
query_type,
vector_column_name=vector_column_name,
fast_search=fast_search,
bypass_vector_index=bypass_vector_index,
)
def _execute_query(
@@ -377,9 +391,9 @@ class RemoteTable(Table):
params["on"] = merge._on[0]
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
if merge._when_matched_update_all_condition is not None:
params[
"when_matched_update_all_filt"
] = merge._when_matched_update_all_condition
params["when_matched_update_all_filt"] = (
merge._when_matched_update_all_condition
)
params["when_not_matched_insert_all"] = str(
merge._when_not_matched_insert_all
).lower()
@@ -387,9 +401,9 @@ class RemoteTable(Table):
merge._when_not_matched_by_source_delete
).lower()
if merge._when_not_matched_by_source_condition is not None:
params[
"when_not_matched_by_source_delete_filt"
] = merge._when_not_matched_by_source_condition
params["when_not_matched_by_source_delete_filt"] = (
merge._when_not_matched_by_source_condition
)
self._conn._client.post(
f"/v1/table/{self._name}/merge_insert/",

View File

@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow())
print(
table.query([0, 0], fast_search=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)
def test_bypass_vector_query_with_filter():
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
setattr(conn, "_client", FakeLanceDBClient())
table = conn["test"]
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
print(
table.query([0, 0], bypass_vector_index=True)
.select(["vector"])
.where("foo == bar")
.to_arrow()
)

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-node"
version = "0.5.2"
version = "0.5.2-final.1"
description = "Serverless, low-latency vector database for AI applications"
license.workspace = true
edition.workspace = true

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.5.2"
version = "0.5.2-final.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true