mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 21:02:58 +00:00
Compare commits
12 Commits
python-v0.
...
lei/better
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09585390f7 | ||
|
|
8391ffee84 | ||
|
|
fe8848efb9 | ||
|
|
213c313b99 | ||
|
|
157e995a43 | ||
|
|
ab97e5d632 | ||
|
|
87e9a0250f | ||
|
|
e587a17a64 | ||
|
|
2f1f9f6338 | ||
|
|
a34fa4df26 | ||
|
|
e20979b335 | ||
|
|
08689c345d |
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.2.3
|
current_version = 0.2.4
|
||||||
commit = True
|
commit = True
|
||||||
message = Bump version: {current_version} → {new_version}
|
message = Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"lockfileVersion": 2,
|
"lockfileVersion": 2,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
@@ -51,11 +51,11 @@
|
|||||||
"typescript": "*"
|
"typescript": "*"
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.2.3",
|
"@lancedb/vectordb-darwin-arm64": "0.2.4",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.2.3",
|
"@lancedb/vectordb-darwin-x64": "0.2.4",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
|
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
|
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@apache-arrow/ts": {
|
"node_modules/@apache-arrow/ts": {
|
||||||
@@ -315,9 +315,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
|
||||||
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
|
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -327,9 +327,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
|
||||||
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
|
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -339,9 +339,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
|
||||||
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
|
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"arm64"
|
"arm64"
|
||||||
],
|
],
|
||||||
@@ -351,9 +351,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
|
||||||
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
|
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -363,9 +363,9 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
|
||||||
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
|
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64"
|
"x64"
|
||||||
],
|
],
|
||||||
@@ -4852,33 +4852,33 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-darwin-arm64": {
|
"@lancedb/vectordb-darwin-arm64": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
|
||||||
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
|
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-darwin-x64": {
|
"@lancedb/vectordb-darwin-x64": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
|
||||||
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
|
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": {
|
"@lancedb/vectordb-linux-arm64-gnu": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
|
||||||
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
|
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-linux-x64-gnu": {
|
"@lancedb/vectordb-linux-x64-gnu": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
|
||||||
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
|
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@lancedb/vectordb-win32-x64-msvc": {
|
"@lancedb/vectordb-win32-x64-msvc": {
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
|
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
|
||||||
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
|
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"@neon-rs/cli": {
|
"@neon-rs/cli": {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "vectordb",
|
"name": "vectordb",
|
||||||
"version": "0.2.3",
|
"version": "0.2.4",
|
||||||
"description": " Serverless, low-latency vector database for AI applications",
|
"description": " Serverless, low-latency vector database for AI applications",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"types": "dist/index.d.ts",
|
"types": "dist/index.d.ts",
|
||||||
@@ -78,10 +78,10 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"optionalDependencies": {
|
"optionalDependencies": {
|
||||||
"@lancedb/vectordb-darwin-arm64": "0.2.3",
|
"@lancedb/vectordb-darwin-arm64": "0.2.4",
|
||||||
"@lancedb/vectordb-darwin-x64": "0.2.3",
|
"@lancedb/vectordb-darwin-x64": "0.2.4",
|
||||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
|
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
|
||||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
|
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
|
||||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
|
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[bumpversion]
|
[bumpversion]
|
||||||
current_version = 0.2.1
|
current_version = 0.2.2
|
||||||
commit = True
|
commit = True
|
||||||
message = [python] Bump version: {current_version} → {new_version}
|
message = [python] Bump version: {current_version} → {new_version}
|
||||||
tag = True
|
tag = True
|
||||||
|
|||||||
@@ -17,13 +17,14 @@ import inspect
|
|||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Iterable, List, Union
|
from typing import Iterable, List, Optional, Union
|
||||||
|
|
||||||
import lance
|
import lance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
import pyarrow.compute as pc
|
import pyarrow.compute as pc
|
||||||
from lance import LanceDataset
|
from lance import LanceDataset
|
||||||
|
from lance.dataset import ReaderLike
|
||||||
from lance.vector import vec_to_table
|
from lance.vector import vec_to_table
|
||||||
|
|
||||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||||
@@ -311,7 +312,7 @@ class LanceTable(Table):
|
|||||||
|
|
||||||
This allows viewing previous versions of the table. If you wish to
|
This allows viewing previous versions of the table. If you wish to
|
||||||
keep writing to the dataset starting from an old version, then use
|
keep writing to the dataset starting from an old version, then use
|
||||||
the `restore` function instead.
|
the `restore` function.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -341,16 +342,18 @@ class LanceTable(Table):
|
|||||||
raise ValueError(f"Invalid version {version}")
|
raise ValueError(f"Invalid version {version}")
|
||||||
self._reset_dataset(version=version)
|
self._reset_dataset(version=version)
|
||||||
|
|
||||||
def restore(self, version: int):
|
def restore(self, version: int = None):
|
||||||
"""Restore a version of the table. This is an in-place operation.
|
"""Restore a version of the table. This is an in-place operation.
|
||||||
|
|
||||||
This creates a new version where the data is equivalent to the
|
This creates a new version where the data is equivalent to the
|
||||||
specified previous version. Note that this creates a new snapshot.
|
specified previous version. Data is not copied (as of python-v0.2.1).
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
version : int
|
version : int, default None
|
||||||
The version to restore.
|
The version to restore. If unspecified then restores the currently
|
||||||
|
checked out version. If the currently checked out version is the
|
||||||
|
latest version then this is a no-op.
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
@@ -373,15 +376,18 @@ class LanceTable(Table):
|
|||||||
3
|
3
|
||||||
"""
|
"""
|
||||||
max_ver = max([v["version"] for v in self._dataset.versions()])
|
max_ver = max([v["version"] for v in self._dataset.versions()])
|
||||||
if version < 1 or version >= max_ver:
|
if version is None:
|
||||||
|
version = self.version
|
||||||
|
elif version < 1 or version > max_ver:
|
||||||
raise ValueError(f"Invalid version {version}")
|
raise ValueError(f"Invalid version {version}")
|
||||||
if version == max_ver:
|
else:
|
||||||
self._reset_dataset()
|
|
||||||
return
|
|
||||||
self.checkout(version)
|
self.checkout(version)
|
||||||
data = self.to_arrow()
|
|
||||||
self.checkout(max_ver)
|
if version == max_ver:
|
||||||
self.add(data, mode="overwrite")
|
# no-op if restoring the latest version
|
||||||
|
return
|
||||||
|
|
||||||
|
self._dataset.restore()
|
||||||
self._reset_dataset()
|
self._reset_dataset()
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
@@ -500,6 +506,69 @@ class LanceTable(Table):
|
|||||||
lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode)
|
lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode)
|
||||||
self._reset_dataset()
|
self._reset_dataset()
|
||||||
|
|
||||||
|
def merge(
|
||||||
|
self,
|
||||||
|
other_table: Union[LanceTable, ReaderLike],
|
||||||
|
left_on: str,
|
||||||
|
right_on: Optional[str] = None,
|
||||||
|
schema: Optional[pa.Schema, LanceModel] = None,
|
||||||
|
):
|
||||||
|
"""Merge another table into this table.
|
||||||
|
|
||||||
|
Performs a left join, where the dataset is the left side and other_table
|
||||||
|
is the right side. Rows existing in the dataset but not on the left will
|
||||||
|
be filled with null values, unless Lance doesn't support null values for
|
||||||
|
some types, in which case an error will be raised. The only overlapping
|
||||||
|
column allowed is the join column. If other overlapping columns exist,
|
||||||
|
an error will be raised.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
other_table: LanceTable or Reader-like
|
||||||
|
The data to be merged. Acceptable types are:
|
||||||
|
- Pandas DataFrame, Pyarrow Table, Dataset, Scanner,
|
||||||
|
Iterator[RecordBatch], or RecordBatchReader
|
||||||
|
- LanceTable
|
||||||
|
left_on: str
|
||||||
|
The name of the column in the dataset to join on.
|
||||||
|
right_on: str or None
|
||||||
|
The name of the column in other_table to join on. If None, defaults to
|
||||||
|
left_on.
|
||||||
|
schema: pa.Schema or LanceModel, optional
|
||||||
|
The schema of the other_table.
|
||||||
|
If not provided, the schema is inferred from the data.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
>>> import lancedb
|
||||||
|
>>> import pyarrow as pa
|
||||||
|
>>> df = pa.table({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
|
||||||
|
>>> db = lancedb.connect("./.lancedb")
|
||||||
|
>>> table = db.create_table("dataset", df)
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x y
|
||||||
|
0 1 a
|
||||||
|
1 2 b
|
||||||
|
2 3 c
|
||||||
|
>>> new_df = pa.table({'x': [1, 2, 3], 'z': ['d', 'e', 'f']})
|
||||||
|
>>> table.merge(new_df, 'x')
|
||||||
|
>>> table.to_pandas()
|
||||||
|
x y z
|
||||||
|
0 1 a d
|
||||||
|
1 2 b e
|
||||||
|
2 3 c f
|
||||||
|
"""
|
||||||
|
if isinstance(schema, LanceModel):
|
||||||
|
schema = schema.to_arrow_schema()
|
||||||
|
if isinstance(other_table, LanceTable):
|
||||||
|
other_table = other_table.to_lance()
|
||||||
|
if isinstance(other_table, LanceDataset):
|
||||||
|
other_table = other_table.to_table()
|
||||||
|
self._dataset.merge(
|
||||||
|
other_table, left_on=left_on, right_on=right_on, schema=schema
|
||||||
|
)
|
||||||
|
self._reset_dataset()
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
||||||
) -> LanceQueryBuilder:
|
) -> LanceQueryBuilder:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.2.1"
|
version = "0.2.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"pylance==0.6.5",
|
"pylance==0.6.5",
|
||||||
"ratelimiter",
|
"ratelimiter",
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
|||||||
from typing import List
|
from typing import List
|
||||||
from unittest.mock import PropertyMock, patch
|
from unittest.mock import PropertyMock, patch
|
||||||
|
|
||||||
|
import lance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -280,3 +281,38 @@ def test_restore(db):
|
|||||||
table.restore(1)
|
table.restore(1)
|
||||||
assert len(table.list_versions()) == 3
|
assert len(table.list_versions()) == 3
|
||||||
assert len(table) == 1
|
assert len(table) == 1
|
||||||
|
|
||||||
|
expected = table.to_arrow()
|
||||||
|
table.checkout(1)
|
||||||
|
table.restore()
|
||||||
|
assert len(table.list_versions()) == 4
|
||||||
|
assert table.to_arrow() == expected
|
||||||
|
|
||||||
|
table.restore(4) # latest version should be no-op
|
||||||
|
assert len(table.list_versions()) == 4
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
table.restore(5)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
table.restore(0)
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge(db, tmp_path):
|
||||||
|
table = LanceTable.create(
|
||||||
|
db,
|
||||||
|
"my_table",
|
||||||
|
data=[{"vector": [1.1, 0.9], "id": 0}, {"vector": [1.2, 1.9], "id": 1}],
|
||||||
|
)
|
||||||
|
other_table = pa.table({"document": ["foo", "bar"], "id": [0, 1]})
|
||||||
|
table.merge(other_table, left_on="id")
|
||||||
|
assert len(table.list_versions()) == 2
|
||||||
|
expected = pa.table(
|
||||||
|
{"vector": [[1.1, 0.9], [1.2, 1.9]], "id": [0, 1], "document": ["foo", "bar"]},
|
||||||
|
schema=table.schema,
|
||||||
|
)
|
||||||
|
assert table.to_arrow() == expected
|
||||||
|
|
||||||
|
other_dataset = lance.write_dataset(other_table, tmp_path / "other_table.lance")
|
||||||
|
table.restore(1)
|
||||||
|
table.merge(other_dataset, left_on="id")
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb-node"
|
name = "vectordb-node"
|
||||||
version = "0.2.3"
|
version = "0.2.4"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "Serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "vectordb"
|
name = "vectordb"
|
||||||
version = "0.2.3"
|
version = "0.2.4"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
description = "Serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
|
keywords = ["lancedb", "lance", "database", "search"]
|
||||||
|
categories = ["database-implementations"]
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|||||||
3
rust/vectordb/README.md
Normal file
3
rust/vectordb/README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# LanceDB Rust
|
||||||
|
|
||||||
|
Rust client for LanceDB, a serverless vector database. Read more at: https://lancedb.com/
|
||||||
@@ -32,8 +32,42 @@ pub enum Error {
|
|||||||
Store { message: String },
|
Store { message: String },
|
||||||
#[snafu(display("LanceDBError: {message}"))]
|
#[snafu(display("LanceDBError: {message}"))]
|
||||||
Lance { message: String },
|
Lance { message: String },
|
||||||
|
#[snafu(display("Bad query: {message}"))]
|
||||||
|
InvalidQuery { message: String },
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Error {
|
||||||
|
pub fn invalid_table_name(name: &str) -> Self {
|
||||||
|
Self::InvalidTableName {
|
||||||
|
name: name.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn table_not_found(name: &str) -> Self {
|
||||||
|
Self::TableNotFound {
|
||||||
|
name: name.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn table_already_exists(name: &str) -> Self {
|
||||||
|
Self::TableAlreadyExists {
|
||||||
|
name: name.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn invalid_query(message: &str) -> Self {
|
||||||
|
Self::InvalidQuery {
|
||||||
|
message: message.to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn create_dir(path: &str, source: std::io::Error) -> Self {
|
||||||
|
Self::CreateDir {
|
||||||
|
path: path.to_string(),
|
||||||
|
source,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
||||||
impl From<lance::Error> for Error {
|
impl From<lance::Error> for Error {
|
||||||
|
|||||||
@@ -14,12 +14,73 @@
|
|||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::Float32Array;
|
use arrow_array::{Array, Float32Array};
|
||||||
use lance::dataset::scanner::{DatasetRecordBatchStream, Scanner};
|
use arrow_schema::DataType;
|
||||||
use lance::dataset::Dataset;
|
use lance::dataset::{
|
||||||
|
scanner::{DatasetRecordBatchStream, Scanner},
|
||||||
|
Dataset,
|
||||||
|
};
|
||||||
|
use lance::datatypes::Schema;
|
||||||
use lance::index::vector::MetricType;
|
use lance::index::vector::MetricType;
|
||||||
|
|
||||||
use crate::error::Result;
|
use crate::error::{Error, Result};
|
||||||
|
|
||||||
|
struct VectorQuery<T: Array> {
|
||||||
|
query: T,
|
||||||
|
column: String,
|
||||||
|
nprobs: usize,
|
||||||
|
refine_factor: Option<u32>,
|
||||||
|
metric_type: Option<MetricType>,
|
||||||
|
use_index: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Best effort to find potential vector columns in a [Schema], which is a fixed size column with
|
||||||
|
/// float number, where the list size is equal to the vector dimension.
|
||||||
|
///
|
||||||
|
fn find_vector_columns(schema: &Schema, dim: i32) -> Vec<String> {
|
||||||
|
schema
|
||||||
|
.fields
|
||||||
|
.iter()
|
||||||
|
.filter(|f| match &f.data_type() {
|
||||||
|
DataType::FixedSizeList(field, list_size) => {
|
||||||
|
*list_size == dim && field.data_type().is_floating()
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
})
|
||||||
|
.map(|f| f.name.to_string())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Array> VectorQuery<T> {
|
||||||
|
fn try_new(dataset: &Dataset, query: T) -> Result<Self> {
|
||||||
|
let schema = dataset.schema();
|
||||||
|
let dim: i32 = query.len() as i32;
|
||||||
|
let vector_columns = find_vector_columns(schema, dim);
|
||||||
|
|
||||||
|
if vector_columns.is_empty() {
|
||||||
|
return Err(Error::InvalidQuery {
|
||||||
|
message: format!("Unable to find a vector column with dimension {}", dim),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
if vector_columns.len() != 1 {
|
||||||
|
return Err(Error::invalid_query(
|
||||||
|
"Vector query can be applied to more than one vector columns, please specify the column to use"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self::with_column(query, &vector_columns[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with_column(query: T, column: &str) -> Self {
|
||||||
|
VectorQuery {
|
||||||
|
query,
|
||||||
|
column: column.to_string(),
|
||||||
|
nprobs: 20,
|
||||||
|
refine_factor: None,
|
||||||
|
metric_type: None,
|
||||||
|
use_index: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A builder for nearest neighbor queries for LanceDB.
|
/// A builder for nearest neighbor queries for LanceDB.
|
||||||
pub struct Query {
|
pub struct Query {
|
||||||
@@ -32,6 +93,7 @@ pub struct Query {
|
|||||||
pub refine_factor: Option<u32>,
|
pub refine_factor: Option<u32>,
|
||||||
pub metric_type: Option<MetricType>,
|
pub metric_type: Option<MetricType>,
|
||||||
pub use_index: bool,
|
pub use_index: bool,
|
||||||
|
vector_query: Option<VectorQuery<Float32Array>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Query {
|
impl Query {
|
||||||
@@ -48,6 +110,7 @@ impl Query {
|
|||||||
pub(crate) fn new(dataset: Arc<Dataset>, vector: Float32Array) -> Self {
|
pub(crate) fn new(dataset: Arc<Dataset>, vector: Float32Array) -> Self {
|
||||||
Query {
|
Query {
|
||||||
dataset,
|
dataset,
|
||||||
|
vector_query: None,
|
||||||
query_vector: vector,
|
query_vector: vector,
|
||||||
limit: 10,
|
limit: 10,
|
||||||
nprobes: 20,
|
nprobes: 20,
|
||||||
@@ -101,6 +164,25 @@ impl Query {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn vector_search(mut self, query: Float32Array) -> Result<Query> {
|
||||||
|
let dim = query.len();
|
||||||
|
|
||||||
|
self.query_vector = query;
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Vector search on a given column.
|
||||||
|
pub fn vector_search_on(mut self, query: Float32Array, column: &str) -> Result<Query> {
|
||||||
|
if self.vector_query.is_some() {
|
||||||
|
return Err(Error::invalid_query("Vector search is already set"));
|
||||||
|
};
|
||||||
|
|
||||||
|
let dim = query.len();
|
||||||
|
|
||||||
|
self.query_vector = query;
|
||||||
|
Ok(self)
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the number of probes to use.
|
/// Set the number of probes to use.
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
@@ -162,9 +244,11 @@ impl Query {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{Float32Array, RecordBatch, RecordBatchIterator, RecordBatchReader};
|
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||||
use lance::dataset::Dataset;
|
use lance::dataset::Dataset;
|
||||||
use lance::index::vector::MetricType;
|
use lance::index::vector::MetricType;
|
||||||
|
|||||||
Reference in New Issue
Block a user