Compare commits

..

5 Commits

Author SHA1 Message Date
Lance Release
065ffde443 Bump version: 0.4.1 → 0.4.2 2023-12-30 00:53:30 +00:00
Lance Release
c3059dc689 [python] Bump version: 0.4.2 → 0.4.3 2023-12-30 00:52:54 +00:00
Lei Xu
a9caa5f2d4 chore: bump pylance to 0.9.2 (#754) 2023-12-29 16:39:45 -08:00
Xin Hao
8411c36b96 docs: fix link (#752) 2023-12-29 15:33:24 -08:00
Chang She
7773bda7ee feat(python): first cut batch queries for remote api (#753)
issue separate requests under the hood and concatenate results
2023-12-29 15:33:03 -08:00
10 changed files with 56 additions and 52 deletions

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.4.1 current_version = 0.4.2
commit = True commit = True
message = Bump version: {current_version} → {new_version} message = Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -5,10 +5,10 @@ exclude = ["python"]
resolver = "2" resolver = "2"
[workspace.dependencies] [workspace.dependencies]
lance = { "version" = "=0.9.1", "features" = ["dynamodb"] } lance = { "version" = "=0.9.2", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.1" } lance-index = { "version" = "=0.9.2" }
lance-linalg = { "version" = "=0.9.1" } lance-linalg = { "version" = "=0.9.2" }
lance-testing = { "version" = "=0.9.1" } lance-testing = { "version" = "=0.9.2" }
# Note that this one does not include pyarrow # Note that this one does not include pyarrow
arrow = { version = "49.0.0", optional = false } arrow = { version = "49.0.0", optional = false }
arrow-array = "49.0" arrow-array = "49.0"

View File

@@ -7,7 +7,7 @@ LanceDB integrates with Pydantic for schema inference, data ingestion, and query
LanceDB supports to create Apache Arrow Schema from a LanceDB supports to create Apache Arrow Schema from a
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel) [Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method. via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
::: lancedb.pydantic.pydantic_to_schema ::: lancedb.pydantic.pydantic_to_schema

View File

@@ -1,6 +1,6 @@
{ {
"name": "vectordb", "name": "vectordb",
"version": "0.4.1", "version": "0.4.2",
"description": " Serverless, low-latency vector database for AI applications", "description": " Serverless, low-latency vector database for AI applications",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
@@ -81,10 +81,10 @@
} }
}, },
"optionalDependencies": { "optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.4.1", "@lancedb/vectordb-darwin-arm64": "0.4.2",
"@lancedb/vectordb-darwin-x64": "0.4.1", "@lancedb/vectordb-darwin-x64": "0.4.2",
"@lancedb/vectordb-linux-arm64-gnu": "0.4.1", "@lancedb/vectordb-linux-arm64-gnu": "0.4.2",
"@lancedb/vectordb-linux-x64-gnu": "0.4.1", "@lancedb/vectordb-linux-x64-gnu": "0.4.2",
"@lancedb/vectordb-win32-x64-msvc": "0.4.1" "@lancedb/vectordb-win32-x64-msvc": "0.4.2"
} }
} }

View File

@@ -1,5 +1,5 @@
[bumpversion] [bumpversion]
current_version = 0.4.2 current_version = 0.4.3
commit = True commit = True
message = [python] Bump version: {current_version} → {new_version} message = [python] Bump version: {current_version} → {new_version}
tag = True tag = True

View File

@@ -16,12 +16,6 @@ from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union
try:
# Python 3.11+
from typing import Self
except ImportError:
from typing_extensions import Self
import deprecation import deprecation
import numpy as np import numpy as np
import pyarrow as pa import pyarrow as pa
@@ -76,7 +70,7 @@ class Query(pydantic.BaseModel):
vector_column: str = VECTOR_COLUMN_NAME vector_column: str = VECTOR_COLUMN_NAME
# vector to search for # vector to search for
vector: List[float] vector: Union[List[float], List[List[float]]]
# sql filter to refine the query with # sql filter to refine the query with
filter: Optional[str] = None filter: Optional[str] = None
@@ -281,7 +275,7 @@ class LanceQueryBuilder(ABC):
self._limit = limit self._limit = limit
return self return self
def select(self, columns: list) -> Self: def select(self, columns: list) -> LanceQueryBuilder:
"""Set the columns to return. """Set the columns to return.
Parameters Parameters
@@ -297,7 +291,7 @@ class LanceQueryBuilder(ABC):
self._columns = columns self._columns = columns
return self return self
def where(self, where: str, prefilter: bool = False) -> Self: def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
"""Set the where clause. """Set the where clause.
Parameters Parameters
@@ -357,7 +351,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._vector_column = vector_column self._vector_column = vector_column
self._prefilter = False self._prefilter = False
def metric(self, metric: Literal["L2", "cosine"]) -> Self: def metric(self, metric: Literal["L2", "cosine"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use. """Set the distance metric to use.
Parameters Parameters
@@ -373,7 +367,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._metric = metric self._metric = metric
return self return self
def nprobes(self, nprobes: int) -> Self: def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
"""Set the number of probes to use. """Set the number of probes to use.
Higher values will yield better recall (more likely to find vectors if Higher values will yield better recall (more likely to find vectors if
@@ -395,7 +389,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._nprobes = nprobes self._nprobes = nprobes
return self return self
def refine_factor(self, refine_factor: int) -> Self: def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
"""Set the refine factor to use, increasing the number of vectors sampled. """Set the refine factor to use, increasing the number of vectors sampled.
As an example, a refine factor of 2 will sample 2x as many vectors as As an example, a refine factor of 2 will sample 2x as many vectors as
@@ -427,6 +421,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector and the returned vectors. vector and the returned vectors.
""" """
vector = self._query if isinstance(self._query, list) else self._query.tolist() vector = self._query if isinstance(self._query, list) else self._query.tolist()
if isinstance(vector[0], np.ndarray):
vector = [v.tolist() for v in vector]
query = Query( query = Query(
vector=vector, vector=vector,
filter=self._where, filter=self._where,
@@ -440,7 +436,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
) )
return self._table._execute_query(query) return self._table._execute_query(query)
def where(self, where: str, prefilter: bool = False) -> Self: def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
"""Set the where clause. """Set the where clause.
Parameters Parameters

View File

@@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import asyncio
import uuid import uuid
from functools import cached_property from functools import cached_property
from typing import Dict, Optional, Union from typing import Dict, Optional, Union
@@ -227,8 +228,24 @@ class RemoteTable(Table):
return LanceVectorQueryBuilder(self, query, vector_column_name) return LanceVectorQueryBuilder(self, query, vector_column_name)
def _execute_query(self, query: Query) -> pa.Table: def _execute_query(self, query: Query) -> pa.Table:
result = self._conn._client.query(self._name, query) if (
return self._conn._loop.run_until_complete(result).to_arrow() query.vector is not None
and len(query.vector) > 0
and not isinstance(query.vector[0], float)
):
futures = []
for v in query.vector:
v = list(v)
q = query.copy()
q.vector = v
futures.append(self._conn._client.query(self._name, q))
result = self._conn._loop.run_until_complete(asyncio.gather(*futures))
return pa.concat_tables(
[add_index(r.to_arrow(), i) for i, r in enumerate(result)]
)
else:
result = self._conn._client.query(self._name, query)
return self._conn._loop.run_until_complete(result).to_arrow()
def delete(self, predicate: str): def delete(self, predicate: str):
"""Delete rows from the table. """Delete rows from the table.
@@ -342,3 +359,11 @@ class RemoteTable(Table):
self._conn._loop.run_until_complete( self._conn._loop.run_until_complete(
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload) self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
) )
def add_index(tbl: pa.Table, i: int) -> pa.Table:
return tbl.add_column(
0,
pa.field("query_index", pa.uint32()),
pa.array([i] * len(tbl), pa.uint32()),
)

View File

@@ -1,9 +1,9 @@
[project] [project]
name = "lancedb" name = "lancedb"
version = "0.4.2" version = "0.4.3"
dependencies = [ dependencies = [
"deprecation", "deprecation",
"pylance==0.9.1", "pylance==0.9.2",
"ratelimiter~=1.0", "ratelimiter~=1.0",
"retry>=0.9.2", "retry>=0.9.2",
"tqdm>=4.27.0", "tqdm>=4.27.0",
@@ -15,8 +15,7 @@ dependencies = [
"pyyaml>=6.0", "pyyaml>=6.0",
"click>=8.1.7", "click>=8.1.7",
"requests>=2.31.0", "requests>=2.31.0",
"overrides>=0.7", "overrides>=0.7"
"typing_extensions>=4.7",
] ]
description = "lancedb" description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }] authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -50,27 +49,11 @@ classifiers = [
repository = "https://github.com/lancedb/lancedb" repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies] [project.optional-dependencies]
tests = [ tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"]
"pandas>=1.4",
"pytest",
"pytest-mock",
"pytest-asyncio",
"requests",
"duckdb",
"pytz"
]
dev = ["ruff", "pre-commit", "black"] dev = ["ruff", "pre-commit", "black"]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"] docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
clip = ["torch", "pillow", "open-clip"] clip = ["torch", "pillow", "open-clip"]
embeddings = [ embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"]
"openai>=1.6.1",
"sentence-transformers",
"torch",
"pillow",
"open-clip-torch",
"cohere",
"InstructorEmbedding"
]
[project.scripts] [project.scripts]
lancedb = "lancedb.cli.cli:cli" lancedb = "lancedb.cli.cli:cli"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "vectordb-node" name = "vectordb-node"
version = "0.4.1" version = "0.4.2"
description = "Serverless, low-latency vector database for AI applications" description = "Serverless, low-latency vector database for AI applications"
license = "Apache-2.0" license = "Apache-2.0"
edition = "2018" edition = "2018"

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "vectordb" name = "vectordb"
version = "0.4.1" version = "0.4.2"
edition = "2021" edition = "2021"
description = "LanceDB: A serverless, low-latency vector database for AI applications" description = "LanceDB: A serverless, low-latency vector database for AI applications"
license = "Apache-2.0" license = "Apache-2.0"