mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
10 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe8848efb9 | ||
|
|
213c313b99 | ||
|
|
157e995a43 | ||
|
|
ab97e5d632 | ||
|
|
87e9a0250f | ||
|
|
e587a17a64 | ||
|
|
2f1f9f6338 | ||
|
|
a34fa4df26 | ||
|
|
e20979b335 | ||
|
|
08689c345d |
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.2.3
|
||||
current_version = 0.2.4
|
||||
commit = True
|
||||
message = Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
74
node/package-lock.json
generated
74
node/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.2.3",
|
||||
"version": "0.2.4",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "vectordb",
|
||||
"version": "0.2.3",
|
||||
"version": "0.2.4",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -51,11 +51,11 @@
|
||||
"typescript": "*"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.2.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.2.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.2.4",
|
||||
"@lancedb/vectordb-darwin-x64": "0.2.4",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@apache-arrow/ts": {
|
||||
@@ -315,9 +315,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
|
||||
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
|
||||
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -327,9 +327,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
|
||||
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
|
||||
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -339,9 +339,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
|
||||
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
|
||||
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@@ -351,9 +351,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
|
||||
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
|
||||
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -363,9 +363,9 @@
|
||||
]
|
||||
},
|
||||
"node_modules/@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
|
||||
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
|
||||
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@@ -4852,33 +4852,33 @@
|
||||
}
|
||||
},
|
||||
"@lancedb/vectordb-darwin-arm64": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.3.tgz",
|
||||
"integrity": "sha512-/9dRCXrV/UsZv3fqAC/Q+D2FPKXMRprcb+a77tt4I0Iy5iGT55UDRfpaXvmJeKquhTJkZ0AuyoK5BmOh7cY41w==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.2.4.tgz",
|
||||
"integrity": "sha512-MqiZXamHYEOfguPsHWLBQ56IabIN6Az8u2Hx8LCyXcxW9gcyJZMSAfJc+CcA4KYHKotv0KsVBhgxZ3kaZQQyiw==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-darwin-x64": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.3.tgz",
|
||||
"integrity": "sha512-p06WkjmdVwDxkH8ghIWh59SCgUhjXBpy1gQISgktouymqfoFbBHz7vmeI6VO1oBA5ji6vSgGZxqjmeLRKM6blA==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-darwin-x64/-/vectordb-darwin-x64-0.2.4.tgz",
|
||||
"integrity": "sha512-DzL+mw5WhKDwXdEFlPh8M9zSDhGnfks7NvEh6ZqKbU6znH206YB7g3OA4WfFyV579IIEQ8jd4v/XDthNzQKuSA==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-linux-arm64-gnu": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.3.tgz",
|
||||
"integrity": "sha512-cSDcJgfbnRmCXZ3AoRWpCAa07PMdB/k8m1LjmxnhpOnP1ohg1eUl99jwPCgd+5GK+iZmezRqbyO+YXlgsCp7GQ==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-arm64-gnu/-/vectordb-linux-arm64-gnu-0.2.4.tgz",
|
||||
"integrity": "sha512-LP1nNfIpFxCgcCMlIQdseDX9dZU27TNhCL41xar8euqcetY5uKvi0YqhiVlpNO85Ss1FRQBgQ/GtnOM6Bo7oBQ==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-linux-x64-gnu": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.3.tgz",
|
||||
"integrity": "sha512-AFA3J4hBYapGC37iXheiN6tGruitx5bmoWXkUcDv/qAaE4tizVZHB9cgx9ThTB0RDsvZEOZ5zCy7BOzPH+oCOg==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-linux-x64-gnu/-/vectordb-linux-x64-gnu-0.2.4.tgz",
|
||||
"integrity": "sha512-m4RhOI5JJWPU9Ip2LlRIzXu4mwIv9M//OyAuTLiLKRm8726jQHhYi5VFUEtNzqY0o0p6pS0b3XbifYQ+cyJn3Q==",
|
||||
"optional": true
|
||||
},
|
||||
"@lancedb/vectordb-win32-x64-msvc": {
|
||||
"version": "0.2.3",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.3.tgz",
|
||||
"integrity": "sha512-LI1mz1HdcpNXTM7HbcLdXz0qvUU4LxSqRC7/kMU918VlOeWy/PnryRrjHnCjcgciGzu1rVlvCqRPh7fVwaG6Kg==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@lancedb/vectordb-win32-x64-msvc/-/vectordb-win32-x64-msvc-0.2.4.tgz",
|
||||
"integrity": "sha512-lMF/2e3YkKWnTYv0R7cUCfjMkAqepNaHSc/dvJzCNsFVEhfDsFdScQFLToARs5GGxnq4fOf+MKpaHg/W6QTxiA==",
|
||||
"optional": true
|
||||
},
|
||||
"@neon-rs/cli": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "vectordb",
|
||||
"version": "0.2.3",
|
||||
"version": "0.2.4",
|
||||
"description": " Serverless, low-latency vector database for AI applications",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -78,10 +78,10 @@
|
||||
}
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@lancedb/vectordb-darwin-arm64": "0.2.3",
|
||||
"@lancedb/vectordb-darwin-x64": "0.2.3",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.3",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.3",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.3"
|
||||
"@lancedb/vectordb-darwin-arm64": "0.2.4",
|
||||
"@lancedb/vectordb-darwin-x64": "0.2.4",
|
||||
"@lancedb/vectordb-linux-arm64-gnu": "0.2.4",
|
||||
"@lancedb/vectordb-linux-x64-gnu": "0.2.4",
|
||||
"@lancedb/vectordb-win32-x64-msvc": "0.2.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[bumpversion]
|
||||
current_version = 0.2.1
|
||||
current_version = 0.2.2
|
||||
commit = True
|
||||
message = [python] Bump version: {current_version} → {new_version}
|
||||
tag = True
|
||||
|
||||
@@ -17,13 +17,14 @@ import inspect
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from functools import cached_property
|
||||
from typing import Iterable, List, Union
|
||||
from typing import Iterable, List, Optional, Union
|
||||
|
||||
import lance
|
||||
import numpy as np
|
||||
import pyarrow as pa
|
||||
import pyarrow.compute as pc
|
||||
from lance import LanceDataset
|
||||
from lance.dataset import ReaderLike
|
||||
from lance.vector import vec_to_table
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
@@ -311,7 +312,7 @@ class LanceTable(Table):
|
||||
|
||||
This allows viewing previous versions of the table. If you wish to
|
||||
keep writing to the dataset starting from an old version, then use
|
||||
the `restore` function instead.
|
||||
the `restore` function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -341,16 +342,18 @@ class LanceTable(Table):
|
||||
raise ValueError(f"Invalid version {version}")
|
||||
self._reset_dataset(version=version)
|
||||
|
||||
def restore(self, version: int):
|
||||
def restore(self, version: int = None):
|
||||
"""Restore a version of the table. This is an in-place operation.
|
||||
|
||||
This creates a new version where the data is equivalent to the
|
||||
specified previous version. Note that this creates a new snapshot.
|
||||
specified previous version. Data is not copied (as of python-v0.2.1).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
version : int
|
||||
The version to restore.
|
||||
version : int, default None
|
||||
The version to restore. If unspecified then restores the currently
|
||||
checked out version. If the currently checked out version is the
|
||||
latest version then this is a no-op.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -373,15 +376,18 @@ class LanceTable(Table):
|
||||
3
|
||||
"""
|
||||
max_ver = max([v["version"] for v in self._dataset.versions()])
|
||||
if version < 1 or version >= max_ver:
|
||||
if version is None:
|
||||
version = self.version
|
||||
elif version < 1 or version > max_ver:
|
||||
raise ValueError(f"Invalid version {version}")
|
||||
else:
|
||||
self.checkout(version)
|
||||
|
||||
if version == max_ver:
|
||||
self._reset_dataset()
|
||||
# no-op if restoring the latest version
|
||||
return
|
||||
self.checkout(version)
|
||||
data = self.to_arrow()
|
||||
self.checkout(max_ver)
|
||||
self.add(data, mode="overwrite")
|
||||
|
||||
self._dataset.restore()
|
||||
self._reset_dataset()
|
||||
|
||||
def __len__(self):
|
||||
@@ -500,6 +506,69 @@ class LanceTable(Table):
|
||||
lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode)
|
||||
self._reset_dataset()
|
||||
|
||||
def merge(
|
||||
self,
|
||||
other_table: Union[LanceTable, ReaderLike],
|
||||
left_on: str,
|
||||
right_on: Optional[str] = None,
|
||||
schema: Optional[pa.Schema, LanceModel] = None,
|
||||
):
|
||||
"""Merge another table into this table.
|
||||
|
||||
Performs a left join, where the dataset is the left side and other_table
|
||||
is the right side. Rows existing in the dataset but not on the left will
|
||||
be filled with null values, unless Lance doesn't support null values for
|
||||
some types, in which case an error will be raised. The only overlapping
|
||||
column allowed is the join column. If other overlapping columns exist,
|
||||
an error will be raised.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
other_table: LanceTable or Reader-like
|
||||
The data to be merged. Acceptable types are:
|
||||
- Pandas DataFrame, Pyarrow Table, Dataset, Scanner,
|
||||
Iterator[RecordBatch], or RecordBatchReader
|
||||
- LanceTable
|
||||
left_on: str
|
||||
The name of the column in the dataset to join on.
|
||||
right_on: str or None
|
||||
The name of the column in other_table to join on. If None, defaults to
|
||||
left_on.
|
||||
schema: pa.Schema or LanceModel, optional
|
||||
The schema of the other_table.
|
||||
If not provided, the schema is inferred from the data.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import lancedb
|
||||
>>> import pyarrow as pa
|
||||
>>> df = pa.table({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
|
||||
>>> db = lancedb.connect("./.lancedb")
|
||||
>>> table = db.create_table("dataset", df)
|
||||
>>> table.to_pandas()
|
||||
x y
|
||||
0 1 a
|
||||
1 2 b
|
||||
2 3 c
|
||||
>>> new_df = pa.table({'x': [1, 2, 3], 'z': ['d', 'e', 'f']})
|
||||
>>> table.merge(new_df, 'x')
|
||||
>>> table.to_pandas()
|
||||
x y z
|
||||
0 1 a d
|
||||
1 2 b e
|
||||
2 3 c f
|
||||
"""
|
||||
if isinstance(schema, LanceModel):
|
||||
schema = schema.to_arrow_schema()
|
||||
if isinstance(other_table, LanceTable):
|
||||
other_table = other_table.to_lance()
|
||||
if isinstance(other_table, LanceDataset):
|
||||
other_table = other_table.to_table()
|
||||
self._dataset.merge(
|
||||
other_table, left_on=left_on, right_on=right_on, schema=schema
|
||||
)
|
||||
self._reset_dataset()
|
||||
|
||||
def search(
|
||||
self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME
|
||||
) -> LanceQueryBuilder:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "lancedb"
|
||||
version = "0.2.1"
|
||||
version = "0.2.2"
|
||||
dependencies = [
|
||||
"pylance==0.6.5",
|
||||
"ratelimiter",
|
||||
|
||||
@@ -16,6 +16,7 @@ from pathlib import Path
|
||||
from typing import List
|
||||
from unittest.mock import PropertyMock, patch
|
||||
|
||||
import lance
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
@@ -280,3 +281,38 @@ def test_restore(db):
|
||||
table.restore(1)
|
||||
assert len(table.list_versions()) == 3
|
||||
assert len(table) == 1
|
||||
|
||||
expected = table.to_arrow()
|
||||
table.checkout(1)
|
||||
table.restore()
|
||||
assert len(table.list_versions()) == 4
|
||||
assert table.to_arrow() == expected
|
||||
|
||||
table.restore(4) # latest version should be no-op
|
||||
assert len(table.list_versions()) == 4
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
table.restore(5)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
table.restore(0)
|
||||
|
||||
|
||||
def test_merge(db, tmp_path):
|
||||
table = LanceTable.create(
|
||||
db,
|
||||
"my_table",
|
||||
data=[{"vector": [1.1, 0.9], "id": 0}, {"vector": [1.2, 1.9], "id": 1}],
|
||||
)
|
||||
other_table = pa.table({"document": ["foo", "bar"], "id": [0, 1]})
|
||||
table.merge(other_table, left_on="id")
|
||||
assert len(table.list_versions()) == 2
|
||||
expected = pa.table(
|
||||
{"vector": [[1.1, 0.9], [1.2, 1.9]], "id": [0, 1], "document": ["foo", "bar"]},
|
||||
schema=table.schema,
|
||||
)
|
||||
assert table.to_arrow() == expected
|
||||
|
||||
other_dataset = lance.write_dataset(other_table, tmp_path / "other_table.lance")
|
||||
table.restore(1)
|
||||
table.merge(other_dataset, left_on="id")
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb-node"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
edition = "2018"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "vectordb"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
edition = "2021"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
license = "Apache-2.0"
|
||||
|
||||
Reference in New Issue
Block a user