Compare commits

..

1 Commits

Author SHA1 Message Date
Lei Xu
3dc8b3305e typing Self 2023-12-29 09:16:46 -08:00
6 changed files with 43 additions and 47 deletions

View File

@@ -5,10 +5,10 @@ exclude = ["python"]
resolver = "2"
[workspace.dependencies]
lance = { "version" = "=0.9.2", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.2" }
lance-linalg = { "version" = "=0.9.2" }
lance-testing = { "version" = "=0.9.2" }
lance = { "version" = "=0.9.1", "features" = ["dynamodb"] }
lance-index = { "version" = "=0.9.1" }
lance-linalg = { "version" = "=0.9.1" }
lance-testing = { "version" = "=0.9.1" }
# Note that this one does not include pyarrow
arrow = { version = "49.0.0", optional = false }
arrow-array = "49.0"

View File

@@ -7,7 +7,7 @@ LanceDB integrates with Pydantic for schema inference, data ingestion, and query
LanceDB supports to create Apache Arrow Schema from a
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
::: lancedb.pydantic.pydantic_to_schema

View File

@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.3
current_version = 0.4.2
commit = True
message = [python] Bump version: {current_version} → {new_version}
tag = True

View File

@@ -16,6 +16,12 @@ from __future__ import annotations
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union
try:
# Python 3.11+
from typing import Self
except ImportError:
from typing_extensions import Self
import deprecation
import numpy as np
import pyarrow as pa
@@ -70,7 +76,7 @@ class Query(pydantic.BaseModel):
vector_column: str = VECTOR_COLUMN_NAME
# vector to search for
vector: Union[List[float], List[List[float]]]
vector: List[float]
# sql filter to refine the query with
filter: Optional[str] = None
@@ -275,7 +281,7 @@ class LanceQueryBuilder(ABC):
self._limit = limit
return self
def select(self, columns: list) -> LanceQueryBuilder:
def select(self, columns: list) -> Self:
"""Set the columns to return.
Parameters
@@ -291,7 +297,7 @@ class LanceQueryBuilder(ABC):
self._columns = columns
return self
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
def where(self, where: str, prefilter: bool = False) -> Self:
"""Set the where clause.
Parameters
@@ -351,7 +357,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._vector_column = vector_column
self._prefilter = False
def metric(self, metric: Literal["L2", "cosine"]) -> LanceVectorQueryBuilder:
def metric(self, metric: Literal["L2", "cosine"]) -> Self:
"""Set the distance metric to use.
Parameters
@@ -367,7 +373,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._metric = metric
return self
def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
def nprobes(self, nprobes: int) -> Self:
"""Set the number of probes to use.
Higher values will yield better recall (more likely to find vectors if
@@ -389,7 +395,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._nprobes = nprobes
return self
def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
def refine_factor(self, refine_factor: int) -> Self:
"""Set the refine factor to use, increasing the number of vectors sampled.
As an example, a refine factor of 2 will sample 2x as many vectors as
@@ -421,8 +427,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
vector and the returned vectors.
"""
vector = self._query if isinstance(self._query, list) else self._query.tolist()
if isinstance(vector[0], np.ndarray):
vector = [v.tolist() for v in vector]
query = Query(
vector=vector,
filter=self._where,
@@ -436,7 +440,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
)
return self._table._execute_query(query)
def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
def where(self, where: str, prefilter: bool = False) -> Self:
"""Set the where clause.
Parameters

View File

@@ -11,7 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import uuid
from functools import cached_property
from typing import Dict, Optional, Union
@@ -228,24 +227,8 @@ class RemoteTable(Table):
return LanceVectorQueryBuilder(self, query, vector_column_name)
def _execute_query(self, query: Query) -> pa.Table:
if (
query.vector is not None
and len(query.vector) > 0
and not isinstance(query.vector[0], float)
):
futures = []
for v in query.vector:
v = list(v)
q = query.copy()
q.vector = v
futures.append(self._conn._client.query(self._name, q))
result = self._conn._loop.run_until_complete(asyncio.gather(*futures))
return pa.concat_tables(
[add_index(r.to_arrow(), i) for i, r in enumerate(result)]
)
else:
result = self._conn._client.query(self._name, query)
return self._conn._loop.run_until_complete(result).to_arrow()
result = self._conn._client.query(self._name, query)
return self._conn._loop.run_until_complete(result).to_arrow()
def delete(self, predicate: str):
"""Delete rows from the table.
@@ -359,11 +342,3 @@ class RemoteTable(Table):
self._conn._loop.run_until_complete(
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
)
def add_index(tbl: pa.Table, i: int) -> pa.Table:
return tbl.add_column(
0,
pa.field("query_index", pa.uint32()),
pa.array([i] * len(tbl), pa.uint32()),
)

View File

@@ -1,9 +1,9 @@
[project]
name = "lancedb"
version = "0.4.3"
version = "0.4.2"
dependencies = [
"deprecation",
"pylance==0.9.2",
"pylance==0.9.1",
"ratelimiter~=1.0",
"retry>=0.9.2",
"tqdm>=4.27.0",
@@ -15,7 +15,8 @@ dependencies = [
"pyyaml>=6.0",
"click>=8.1.7",
"requests>=2.31.0",
"overrides>=0.7"
"overrides>=0.7",
"typing_extensions>=4.7",
]
description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
@@ -49,11 +50,27 @@ classifiers = [
repository = "https://github.com/lancedb/lancedb"
[project.optional-dependencies]
tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"]
tests = [
"pandas>=1.4",
"pytest",
"pytest-mock",
"pytest-asyncio",
"requests",
"duckdb",
"pytz"
]
dev = ["ruff", "pre-commit", "black"]
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
clip = ["torch", "pillow", "open-clip"]
embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"]
embeddings = [
"openai>=1.6.1",
"sentence-transformers",
"torch",
"pillow",
"open-clip-torch",
"cohere",
"InstructorEmbedding"
]
[project.scripts]
lancedb = "lancedb.cli.cli:cli"