mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 12:22:59 +00:00
Compare commits
5 Commits
lei/batch_
...
v0.4.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
065ffde443 | ||
|
|
c3059dc689 | ||
|
|
a9caa5f2d4 | ||
|
|
8411c36b96 | ||
|
|
7773bda7ee |
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.4.1
|
current_version = 0.4.2
|
||||||
commit = True
|
commit = True
|
||||||
message = Bump version: {current_version} → {new_version}
|
message = Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -5,10 +5,10 @@ exclude = ["python"]
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=0.9.1", "features" = ["dynamodb"] }
|
lance = { "version" = "=0.9.2", "features" = ["dynamodb"] }
|
||||||
lance-index = { "version" = "=0.9.1" }
|
lance-index = { "version" = "=0.9.2" }
|
||||||
lance-linalg = { "version" = "=0.9.1" }
|
lance-linalg = { "version" = "=0.9.2" }
|
||||||
lance-testing = { "version" = "=0.9.1" }
|
lance-testing = { "version" = "=0.9.2" }
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "49.0.0", optional = false }
|
arrow = { version = "49.0.0", optional = false }
|
||||||
arrow-array = "49.0"
|
arrow-array = "49.0"
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ LanceDB integrates with Pydantic for schema inference, data ingestion, and query
|
|||||||
|
|
||||||
LanceDB supports to create Apache Arrow Schema from a
|
LanceDB supports to create Apache Arrow Schema from a
|
||||||
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
[Pydantic BaseModel](https://docs.pydantic.dev/latest/api/main/#pydantic.main.BaseModel)
|
||||||
via [pydantic_to_schema()](python.md##lancedb.pydantic.pydantic_to_schema) method.
|
via [pydantic_to_schema()](python.md#lancedb.pydantic.pydantic_to_schema) method.
|
||||||
|
|
||||||
::: lancedb.pydantic.pydantic_to_schema
|
::: lancedb.pydantic.pydantic_to_schema
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.4.1",
|
"version": "0.4.2",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -81,10 +81,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.4.1",
|
"@lancedb/vectordb-darwin-arm64": "0.4.2",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.4.1",
|
"@lancedb/vectordb-darwin-x64": "0.4.2",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.4.1",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.4.2",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.4.1",
|
"@lancedb/vectordb-linux-x64-gnu": "0.4.2",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.4.1"
|
"@lancedb/vectordb-win32-x64-msvc": "0.4.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.4.2
|
current_version = 0.4.3
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -16,12 +16,6 @@ from __future__ import annotations
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union
|
from typing import TYPE_CHECKING, List, Literal, Optional, Type, Union
|
||||||
|
|
||||||
try:
|
|
||||||
# Python 3.11+
|
|
||||||
from typing import Self
|
|
||||||
except ImportError:
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
import deprecation
|
import deprecation
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -76,7 +70,7 @@ class Query(pydantic.BaseModel):
|
|||||||
vector_column: str = VECTOR_COLUMN_NAME
|
vector_column: str = VECTOR_COLUMN_NAME
|
||||||
|
|
||||||
# vector to search for
|
# vector to search for
|
||||||
vector: List[float]
|
vector: Union[List[float], List[List[float]]]
|
||||||
|
|
||||||
# sql filter to refine the query with
|
# sql filter to refine the query with
|
||||||
filter: Optional[str] = None
|
filter: Optional[str] = None
|
||||||
@@ -281,7 +275,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
self._limit = limit
|
self._limit = limit
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def select(self, columns: list) -> Self:
|
def select(self, columns: list) -> LanceQueryBuilder:
|
||||||
"""Set the columns to return.
|
"""Set the columns to return.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -297,7 +291,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
self._columns = columns
|
self._columns = columns
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def where(self, where: str, prefilter: bool = False) -> Self:
|
def where(self, where: str, prefilter: bool = False) -> LanceQueryBuilder:
|
||||||
"""Set the where clause.
|
"""Set the where clause.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -357,7 +351,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._vector_column = vector_column
|
self._vector_column = vector_column
|
||||||
self._prefilter = False
|
self._prefilter = False
|
||||||
|
|
||||||
def metric(self, metric: Literal["L2", "cosine"]) -> Self:
|
def metric(self, metric: Literal["L2", "cosine"]) -> LanceVectorQueryBuilder:
|
||||||
"""Set the distance metric to use.
|
"""Set the distance metric to use.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
@@ -373,7 +367,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._metric = metric
|
self._metric = metric
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def nprobes(self, nprobes: int) -> Self:
|
def nprobes(self, nprobes: int) -> LanceVectorQueryBuilder:
|
||||||
"""Set the number of probes to use.
|
"""Set the number of probes to use.
|
||||||
|
|
||||||
Higher values will yield better recall (more likely to find vectors if
|
Higher values will yield better recall (more likely to find vectors if
|
||||||
@@ -395,7 +389,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
self._nprobes = nprobes
|
self._nprobes = nprobes
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def refine_factor(self, refine_factor: int) -> Self:
|
def refine_factor(self, refine_factor: int) -> LanceVectorQueryBuilder:
|
||||||
"""Set the refine factor to use, increasing the number of vectors sampled.
|
"""Set the refine factor to use, increasing the number of vectors sampled.
|
||||||
|
|
||||||
As an example, a refine factor of 2 will sample 2x as many vectors as
|
As an example, a refine factor of 2 will sample 2x as many vectors as
|
||||||
@@ -427,6 +421,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
vector and the returned vectors.
|
vector and the returned vectors.
|
||||||
"""
|
"""
|
||||||
vector = self._query if isinstance(self._query, list) else self._query.tolist()
|
vector = self._query if isinstance(self._query, list) else self._query.tolist()
|
||||||
|
if isinstance(vector[0], np.ndarray):
|
||||||
|
vector = [v.tolist() for v in vector]
|
||||||
query = Query(
|
query = Query(
|
||||||
vector=vector,
|
vector=vector,
|
||||||
filter=self._where,
|
filter=self._where,
|
||||||
@@ -440,7 +436,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
|||||||
)
|
)
|
||||||
return self._table._execute_query(query)
|
return self._table._execute_query(query)
|
||||||
|
|
||||||
def where(self, where: str, prefilter: bool = False) -> Self:
|
def where(self, where: str, prefilter: bool = False) -> LanceVectorQueryBuilder:
|
||||||
"""Set the where clause.
|
"""Set the where clause.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import uuid
|
import uuid
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Dict, Optional, Union
|
from typing import Dict, Optional, Union
|
||||||
@@ -227,8 +228,24 @@ class RemoteTable(Table):
|
|||||||
return LanceVectorQueryBuilder(self, query, vector_column_name)
|
return LanceVectorQueryBuilder(self, query, vector_column_name)
|
||||||
|
|
||||||
def _execute_query(self, query: Query) -> pa.Table:
|
def _execute_query(self, query: Query) -> pa.Table:
|
||||||
result = self._conn._client.query(self._name, query)
|
if (
|
||||||
return self._conn._loop.run_until_complete(result).to_arrow()
|
query.vector is not None
|
||||||
|
and len(query.vector) > 0
|
||||||
|
and not isinstance(query.vector[0], float)
|
||||||
|
):
|
||||||
|
futures = []
|
||||||
|
for v in query.vector:
|
||||||
|
v = list(v)
|
||||||
|
q = query.copy()
|
||||||
|
q.vector = v
|
||||||
|
futures.append(self._conn._client.query(self._name, q))
|
||||||
|
result = self._conn._loop.run_until_complete(asyncio.gather(*futures))
|
||||||
|
return pa.concat_tables(
|
||||||
|
[add_index(r.to_arrow(), i) for i, r in enumerate(result)]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
result = self._conn._client.query(self._name, query)
|
||||||
|
return self._conn._loop.run_until_complete(result).to_arrow()
|
||||||
|
|
||||||
def delete(self, predicate: str):
|
def delete(self, predicate: str):
|
||||||
"""Delete rows from the table.
|
"""Delete rows from the table.
|
||||||
@@ -342,3 +359,11 @@ class RemoteTable(Table):
|
|||||||
self._conn._loop.run_until_complete(
|
self._conn._loop.run_until_complete(
|
||||||
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
|
self._conn._client.post(f"/v1/table/{self._name}/update/", data=payload)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def add_index(tbl: pa.Table, i: int) -> pa.Table:
|
||||||
|
return tbl.add_column(
|
||||||
|
0,
|
||||||
|
pa.field("query_index", pa.uint32()),
|
||||||
|
pa.array([i] * len(tbl), pa.uint32()),
|
||||||
|
)
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.4.2"
|
version = "0.4.3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deprecation",
|
"deprecation",
|
||||||
"pylance==0.9.1",
|
"pylance==0.9.2",
|
||||||
"ratelimiter~=1.0",
|
"ratelimiter~=1.0",
|
||||||
"retry>=0.9.2",
|
"retry>=0.9.2",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
@@ -15,8 +15,7 @@ dependencies = [
|
|||||||
"pyyaml>=6.0",
|
"pyyaml>=6.0",
|
||||||
"click>=8.1.7",
|
"click>=8.1.7",
|
||||||
"requests>=2.31.0",
|
"requests>=2.31.0",
|
||||||
"overrides>=0.7",
|
"overrides>=0.7"
|
||||||
"typing_extensions>=4.7",
|
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
@@ -50,27 +49,11 @@ classifiers = [
|
|||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
tests = [
|
tests = ["pandas>=1.4", "pytest", "pytest-mock", "pytest-asyncio", "requests", "duckdb", "pytz"]
|
||||||
"pandas>=1.4",
|
|
||||||
"pytest",
|
|
||||||
"pytest-mock",
|
|
||||||
"pytest-asyncio",
|
|
||||||
"requests",
|
|
||||||
"duckdb",
|
|
||||||
"pytz"
|
|
||||||
]
|
|
||||||
dev = ["ruff", "pre-commit", "black"]
|
dev = ["ruff", "pre-commit", "black"]
|
||||||
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
|
docs = ["mkdocs", "mkdocs-jupyter", "mkdocs-material", "mkdocstrings[python]"]
|
||||||
clip = ["torch", "pillow", "open-clip"]
|
clip = ["torch", "pillow", "open-clip"]
|
||||||
embeddings = [
|
embeddings = ["openai>=1.6.1", "sentence-transformers", "torch", "pillow", "open-clip-torch", "cohere", "InstructorEmbedding"]
|
||||||
"openai>=1.6.1",
|
|
||||||
"sentence-transformers",
|
|
||||||
"torch",
|
|
||||||
"pillow",
|
|
||||||
"open-clip-torch",
|
|
||||||
"cohere",
|
|
||||||
"InstructorEmbedding"
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
lancedb = "lancedb.cli.cli:cli"
|
lancedb = "lancedb.cli.cli:cli"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb-node"
|
name = "vectordb-node"
|
||||||
version = "0.4.1"
|
version = "0.4.2"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb"
|
name = "vectordb"
|
||||||
version = "0.4.1"
|
version = "0.4.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user