mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-24 05:49:57 +00:00
Compare commits
13 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2c2cf31d2 | ||
|
|
3bc6d0ee82 | ||
|
|
6602a86dcb | ||
|
|
2025aefe80 | ||
|
|
68724c5d57 | ||
|
|
1884fe8a3e | ||
|
|
d8111b259c | ||
|
|
3c74bf5c7a | ||
|
|
b64bb75a82 | ||
|
|
93e03ec702 | ||
|
|
7a94a7e171 | ||
|
|
acae6522fb | ||
|
|
005d5b64ac |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.5.2"
|
||||
current_version = "0.5.2-final.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
18
.github/workflows/pypi-publish.yml
vendored
18
.github/workflows/pypi-publish.yml
vendored
@@ -15,21 +15,15 @@ jobs:
|
||||
- platform: x86_64
|
||||
manylinux: "2_17"
|
||||
extra_args: ""
|
||||
runner: ubuntu-22.04
|
||||
- platform: x86_64
|
||||
manylinux: "2_28"
|
||||
extra_args: "--features fp16kernels"
|
||||
runner: ubuntu-22.04
|
||||
- platform: aarch64
|
||||
manylinux: "2_17"
|
||||
manylinux: "2_24"
|
||||
extra_args: ""
|
||||
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
|
||||
runner: ubuntu-2404-8x-arm64
|
||||
- platform: aarch64
|
||||
manylinux: "2_28"
|
||||
extra_args: "--features fp16kernels"
|
||||
runner: ubuntu-2404-8x-arm64
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
# We don't build fp16 kernels for aarch64, because it uses
|
||||
# cross compilation image, which doesn't have a new enough compiler.
|
||||
runs-on: "ubuntu-22.04"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -89,7 +83,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.12
|
||||
python-version: 3.8
|
||||
- uses: ./.github/workflows/build_windows_wheel
|
||||
with:
|
||||
python-minor-version: 8
|
||||
@@ -163,4 +157,4 @@ jobs:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
generate_release_notes: false
|
||||
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
|
||||
body: ${{ steps.python_release_notes.outputs.changelog }}
|
||||
body: ${{ steps.python_release_notes.outputs.changelog }}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -41,5 +41,3 @@ dist
|
||||
target
|
||||
|
||||
**/sccache.log
|
||||
|
||||
Cargo.lock
|
||||
|
||||
7145
Cargo.lock
generated
Normal file
7145
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -35,7 +35,7 @@ arrow-schema = "51.0"
|
||||
arrow-arith = "51.0"
|
||||
arrow-cast = "51.0"
|
||||
async-trait = "0"
|
||||
chrono = "0.4.35"
|
||||
chrono = "=0.4.39"
|
||||
datafusion-physical-plan = "37.1"
|
||||
half = { "version" = "=2.4.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
@@ -45,7 +45,8 @@ log = "0.4"
|
||||
object_store = "0.9.0"
|
||||
pin-project = "1.0.7"
|
||||
snafu = "0.7.4"
|
||||
url = "2"
|
||||
url = "=2.3.1"
|
||||
num-traits = "0.2"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
|
||||
@@ -15,14 +15,15 @@ crate-type = ["cdylib"]
|
||||
arrow-ipc.workspace = true
|
||||
futures.workspace = true
|
||||
lancedb = { path = "../rust/lancedb" }
|
||||
napi = { version = "2.15", default-features = false, features = [
|
||||
napi = { version = "=2.16.1", default-features = false, features = [
|
||||
"napi7",
|
||||
"async",
|
||||
] }
|
||||
napi-derive = "2"
|
||||
napi-derive = "=2.16.1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
lzma-sys = { version = "*", features = ["static"] }
|
||||
|
||||
[build-dependencies]
|
||||
napi-build = "2.1"
|
||||
napi-build = "=2.1.2"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"vector database",
|
||||
"ann"
|
||||
],
|
||||
"version": "0.5.2",
|
||||
"version": "0.5.2-final.1",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.9.0-beta.6"
|
||||
current_version = "0.9.0-beta.10"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.9.0-beta.6"
|
||||
version = "0.9.0-beta.10"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -19,6 +19,8 @@ lancedb = { path = "../rust/lancedb" }
|
||||
env_logger = "0.10"
|
||||
pyo3 = { version = "0.20", features = ["extension-module", "abi3-py38"] }
|
||||
pyo3-asyncio = { version = "0.20", features = ["attributes", "tokio-runtime"] }
|
||||
base64ct = "=1.6.0" # workaround for https://github.com/RustCrypto/formats/issues/1684
|
||||
chrono = "=0.4.39"
|
||||
|
||||
# Prevent dynamic linking of lzma, which comes from datafusion
|
||||
lzma-sys = { version = "*", features = ["static"] }
|
||||
|
||||
@@ -119,6 +119,8 @@ class Query(pydantic.BaseModel):
|
||||
|
||||
fast_search: bool = False
|
||||
|
||||
bypass_vector_index: Optional[bool] = None
|
||||
|
||||
|
||||
class LanceQueryBuilder(ABC):
|
||||
"""An abstract query builder. Subclasses are defined for vector search,
|
||||
@@ -127,14 +129,15 @@ class LanceQueryBuilder(ABC):
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
table: "Table",
|
||||
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
|
||||
query_type: str,
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
cls,
|
||||
table: "Table",
|
||||
query: Optional[Union[np.ndarray, str, "PIL.Image.Image", Tuple]],
|
||||
query_type: str,
|
||||
vector_column_name: str,
|
||||
ordering_field_name: Optional[str] = None,
|
||||
fts_columns: Union[str, List[str]] = [],
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceQueryBuilder:
|
||||
"""
|
||||
Create a query builder based on the given query and query type.
|
||||
@@ -153,6 +156,8 @@ class LanceQueryBuilder(ABC):
|
||||
The name of the vector column to use for vector search.
|
||||
fast_search: bool
|
||||
Skip flat search of unindexed data.
|
||||
bypass_vector_index: Optional[bool]
|
||||
Bypass the vector index and use a brute force search.
|
||||
"""
|
||||
# Check hybrid search first as it supports empty query pattern
|
||||
if query_type == "hybrid":
|
||||
@@ -195,7 +200,12 @@ class LanceQueryBuilder(ABC):
|
||||
raise TypeError(f"Unsupported query type: {type(query)}")
|
||||
|
||||
return LanceVectorQueryBuilder(
|
||||
table, query, vector_column_name, str_query, fast_search
|
||||
table,
|
||||
query,
|
||||
vector_column_name,
|
||||
str_query,
|
||||
fast_search,
|
||||
bypass_vector_index,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@@ -557,12 +567,13 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
table: "Table",
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
self,
|
||||
table: "Table",
|
||||
query: Union[np.ndarray, list, "PIL.Image.Image"],
|
||||
vector_column: str,
|
||||
str_query: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
@@ -574,6 +585,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._reranker = None
|
||||
self._str_query = str_query
|
||||
self._fast_search = fast_search
|
||||
self._bypass_vector_index = bypass_vector_index
|
||||
|
||||
def metric(self, metric: Literal["L2", "cosine", "dot"]) -> LanceVectorQueryBuilder:
|
||||
"""Set the distance metric to use.
|
||||
@@ -697,6 +709,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
with_row_id=self._with_row_id,
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
bypass_vector_index=self._bypass_vector_index,
|
||||
ef=self._ef,
|
||||
)
|
||||
result_set = self._table._execute_query(query, batch_size)
|
||||
@@ -728,7 +741,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
return self
|
||||
|
||||
def rerank(
|
||||
self, reranker: Reranker, query_string: Optional[str] = None
|
||||
self, reranker: Reranker, query_string: Optional[str] = None
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Rerank the results using the specified reranker.
|
||||
|
||||
@@ -947,7 +960,7 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
def _validate_fts_index(self):
|
||||
if self._table._get_fts_index_path() is None:
|
||||
raise ValueError(
|
||||
"Please create a full-text search index " "to perform hybrid search."
|
||||
"Please create a full-text search index to perform hybrid search."
|
||||
)
|
||||
|
||||
def _validate_query(self, query):
|
||||
|
||||
@@ -47,6 +47,8 @@ class VectorQuery(BaseModel):
|
||||
|
||||
vector_column: str = VECTOR_COLUMN_NAME
|
||||
|
||||
bypass_vector_index: Optional[bool] = None
|
||||
|
||||
|
||||
@attrs.define
|
||||
class VectorQueryResult:
|
||||
|
||||
@@ -234,8 +234,11 @@ class RemoteTable(Table):
|
||||
query_type: str = "vector",
|
||||
vector_column_name: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
return self.search(query, query_type, vector_column_name, fast_search)
|
||||
return self.search(
|
||||
query, query_type, vector_column_name, fast_search, bypass_vector_index
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
@@ -243,6 +246,7 @@ class RemoteTable(Table):
|
||||
query_type: str = "vector",
|
||||
vector_column_name: Optional[str] = None,
|
||||
fast_search: bool = False,
|
||||
bypass_vector_index: Optional[bool] = None,
|
||||
) -> LanceVectorQueryBuilder:
|
||||
"""Create a search query to find the nearest neighbors
|
||||
of the given query vector. We currently support [vector search][search]
|
||||
@@ -294,6 +298,15 @@ class RemoteTable(Table):
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
- *default False*.
|
||||
|
||||
bypass_vector_index: bool, optional
|
||||
If True, the query will bypass the vector index and perform a full scan.
|
||||
An exhaustive (flat) search will be performed. The query vector will
|
||||
be compared to every vector in the table. At high scales this can be
|
||||
expensive. However, this is often still useful. For example, skipping
|
||||
the vector index can give you ground truth results which you can use to
|
||||
calculate your recall to select an appropriate value for nprobes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
@@ -316,6 +329,7 @@ class RemoteTable(Table):
|
||||
query_type,
|
||||
vector_column_name=vector_column_name,
|
||||
fast_search=fast_search,
|
||||
bypass_vector_index=bypass_vector_index,
|
||||
)
|
||||
|
||||
def _execute_query(
|
||||
@@ -377,9 +391,9 @@ class RemoteTable(Table):
|
||||
params["on"] = merge._on[0]
|
||||
params["when_matched_update_all"] = str(merge._when_matched_update_all).lower()
|
||||
if merge._when_matched_update_all_condition is not None:
|
||||
params[
|
||||
"when_matched_update_all_filt"
|
||||
] = merge._when_matched_update_all_condition
|
||||
params["when_matched_update_all_filt"] = (
|
||||
merge._when_matched_update_all_condition
|
||||
)
|
||||
params["when_not_matched_insert_all"] = str(
|
||||
merge._when_not_matched_insert_all
|
||||
).lower()
|
||||
@@ -387,9 +401,9 @@ class RemoteTable(Table):
|
||||
merge._when_not_matched_by_source_delete
|
||||
).lower()
|
||||
if merge._when_not_matched_by_source_condition is not None:
|
||||
params[
|
||||
"when_not_matched_by_source_delete_filt"
|
||||
] = merge._when_not_matched_by_source_condition
|
||||
params["when_not_matched_by_source_delete_filt"] = (
|
||||
merge._when_not_matched_by_source_condition
|
||||
)
|
||||
|
||||
self._conn._client.post(
|
||||
f"/v1/table/{self._name}/merge_insert/",
|
||||
|
||||
@@ -57,4 +57,23 @@ def test_fast_search_query_with_filter():
|
||||
|
||||
table = conn["test"]
|
||||
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||
print(table.query([0, 0], fast_search=True).select(["vector"]).where("foo == bar").to_arrow())
|
||||
print(
|
||||
table.query([0, 0], fast_search=True)
|
||||
.select(["vector"])
|
||||
.where("foo == bar")
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
|
||||
def test_bypass_vector_query_with_filter():
|
||||
conn = lancedb.connect("db://client-will-be-injected", api_key="fake")
|
||||
setattr(conn, "_client", FakeLanceDBClient())
|
||||
|
||||
table = conn["test"]
|
||||
table.schema = pa.schema([pa.field("vector", pa.list_(pa.float32(), 2))])
|
||||
print(
|
||||
table.query([0, 0], bypass_vector_index=True)
|
||||
.select(["vector"])
|
||||
.where("foo == bar")
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-node"
|
||||
version = "0.5.2"
|
||||
version = "0.5.2-final.1"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
edition.workspace = true
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.5.2"
|
||||
version = "0.5.2-final.1"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
Reference in New Issue
Block a user