Compare commits

..

4 Commits

Author SHA1 Message Date
Lance Release
c3059dc689 [python] Bump version: 0.4.2 → 0.4.3 2023-12-30 00:52:54 +00:00
Lei Xu
a9caa5f2d4 chore: bump pylance to 0.9.2 (#754) 2023-12-29 16:39:45 -08:00
Xin Hao
8411c36b96 docs: fix link (#752) 2023-12-29 15:33:24 -08:00
Chang She
7773bda7ee feat(python): first cut batch queries for remote api (#753)
issue separate requests under the hood and concatenate results
2023-12-29 15:33:03 -08:00
6 changed files with 47 additions and 43 deletions

View File

@@ -5,10 +5,10 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
lance = { "version" = "=0.9.1", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.1" }
lance-linalg = { "version" = "=0.9.1" }
lance-testing = { "version" = "=0.9.1" }
lance = { "version" = "=0.9.2", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.2" }
lance-linalg = { "version" = "=0.9.2" }
lance-testing = { "version" = "=0.9.2" }
# Note that this one does not include pyarrow
arrow = { version = "49.0.0", optional = false }
arrow-array = "49.0"

View File

@@ -7,7 +7,7 @@ LanceDB integrates with Pydantic for schema inference, data ingestion, and query
LanceDB supports to create Apache Arrow Schema from a
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
::: lancedb.pydantic.pydantic_to_schema

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.2
current_version = 0.4.3
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -16,12 +16,6 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union
try:
# Python 3.11+
from typing import Self
except ImportError:
from typing_extensions import Self
import deprecation
import numpy as np
import pyarrow as pa
@@ -76,7 +70,7 @@ class Query(pydantic.BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
# vector to search for
vector: List[float]
vector: Union[List[float], List[List[float]]]
# sql filter to refine the query with
filter: Optional[str] = None
@@ -281,7 +275,7 @@ class LanceQueryBuilder(ABC):
self._limit = limit
return self
def select(self, columns: list) -> Self:
def select(self, columns: list) -> LanceQueryBuilder:
"""Set the columns to return.
Parameters
@@ -297,7 +291,7 @@ class LanceQueryBuilder(ABC):
self._columns = columns
return self
def where(self, where: str, prefilter: bool = False) -> Self:
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
"""Set the where clause.
Parameters
@@ -357,7 +351,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._vector_column = vector_column
self._prefilter = False
def metric(self, metric: Literal["L2", "cosine"]) -> Self:
def metric(self, metric: Literal["L2", "cosine"]) -> LanceVectorQueryBuilder:
"""Set the distance metric to use.
Parameters
@@ -373,7 +367,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._metric = metric
return self
def nprobes(self, nprobes: int) -> Self:
def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
"""Set the number of probes to use.
Higher values will yield better recall (more likely to find vectors if
@@ -395,7 +389,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._nprobes = nprobes
return self
def refine_factor(self, refine_factor: int) -> Self:
def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
"""Set the refine factor to use, increasing the number of vectors sampled.
As an example, a refine factor of 2 will sample 2x as many vectors as
@@ -427,6 +421,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector and the returned vectors.
"""
vector = self._query if isinstance(self._query, list) else self._query.tolist()
if isinstance(vector[0], np.ndarray):
vector = [v.tolist() for v in vector]
query = Query(
vector=vector,
filter=self._where,
@@ -440,7 +436,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
)
return self._table._execute_query(query)
def where(self, where: str, prefilter: bool = False) -> Self:
def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
"""Set the where clause.
Parameters

View File

@@ -11,6 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import uuid
from functools import cached_property
from typing import Dict, Optional, Union
@@ -227,8 +228,24 @@ class RemoteTable(Table):
return LanceVectorQueryBuilder(self, query, vector_column_name)
def _execute_query(self, query: Query) -> pa.Table:
result = self._conn._client.query(self._name, query)
return self._conn._loop.run_until_complete(result).to_arrow()
if (
query.vector is not None
and len(query.vector) > 0
and not isinstance(query.vector[0], float)
):
futures = []
for v in query.vector:
v = list(v)
q = query.copy()
q.vector = v
futures.append(self._conn._client.query(self._name, q))
result = self._conn._loop.run_until_complete(asyncio.gather(*futures))
return pa.concat_tables(
[add_index(r.to_arrow(), i) for i, r in enumerate(result)]
)
else:
result = self._conn._client.query(self._name, query)
return self._conn._loop.run_until_complete(result).to_arrow()
def delete(self, predicate: str):
"""Delete rows from the table.
@@ -342,3 +359,11 @@ class RemoteTable(Table):
self._conn._loop.run_until_complete(
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
)
def add_index(tbl: pa.Table, i: int) -> pa.Table:
return tbl.add_column(
0,
pa.field("query_index", pa.uint32()),
pa.array([i] * len(tbl), pa.uint32()),
)

View File

@@ -1,9 +1,9 @@
[project]
name = "lancedb"
version = "0.4.2"
version = "0.4.3"
dependencies = [
"deprecation",
"pylance==0.9.1",
"pylance==0.9.2",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.27.0",
@@ -15,8 +15,7 @@ dependencies = [
"pyyaml>=6.0",
"click>=8.1.7",
"requests>=2.31.0",
"overrides>=0.7",
"typing_extensions>=4.7",
"overrides>=0.7"
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -50,27 +49,11 @@ classifiers = [
repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
tests = [
"pandas>=1.4",
"pytest",
"pytest-mock",
"pytest-asyncio",
"requests",
"duckdb",
"pytz"
]
tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"]
dev = ["ruff", "pre-commit", "black"]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
clip = ["torch", "pillow", "open-clip"]
embeddings = [
"openai>=1.6.1",
"sentence-transformers",
"torch",
"pillow",
"open-clip-torch",
"cohere",
"InstructorEmbedding"
]
embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"]
[project.scripts]
lancedb = "lancedb.cli.cli:cli"