mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-10 07:40:42 +00:00
Compare commits
16 Commits
python-v0.
...
codex/upda
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9c488d13c9 | ||
|
|
9c12fb6437 | ||
|
|
f260d3bf12 | ||
|
|
d9018067b3 | ||
|
|
53517b3aaa | ||
|
|
3e25f584eb | ||
|
|
59fbfd4158 | ||
|
|
f37e698e2f | ||
|
|
09b1bbc12a | ||
|
|
c484b24e51 | ||
|
|
3868965413 | ||
|
|
c13ebc6796 | ||
|
|
4b287fd9c4 | ||
|
|
64194ea8ad | ||
|
|
e6c5de1a58 | ||
|
|
39a9f3e1e9 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.30.1-beta.1"
|
||||
current_version = "0.30.1-beta.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
561
Cargo.lock
generated
561
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.2.0-beta.3", default-features = false, "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.2.0-beta.3", "tag" = "v7.2.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=8.0.0-beta.9", default-features = false, "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=8.0.0-beta.9", default-features = false, "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=8.0.0-beta.9", default-features = false, "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=8.0.0-beta.9", "tag" = "v8.0.0-beta.9", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
|
||||
@@ -147,6 +147,14 @@ allow = [
|
||||
"CDLA-Permissive-2.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Per-crate license exceptions: allow a license for a specific crate only,
|
||||
# rather than globally via the `allow` list above.
|
||||
exceptions = [
|
||||
# CDDL-1.0 (copyleft) is pulled in only as a dev/profiling dependency via
|
||||
# `inferno` -> `pprof` -> `lance-testing`; it is a test dependency that we
|
||||
# do not distribute, so scope the allowance to `inferno` alone.
|
||||
{ allow = ["CDDL-1.0"], crate = "inferno" },
|
||||
]
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
# license we've manually confirmed from upstream. Keep this list minimal.
|
||||
[[licenses.clarify]]
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.30.1-beta.1</version>
|
||||
<version>0.30.1-beta.2</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
43
docs/src/js/classes/BranchContents.md
Normal file
43
docs/src/js/classes/BranchContents.md
Normal file
@@ -0,0 +1,43 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / BranchContents
|
||||
|
||||
# Class: BranchContents
|
||||
|
||||
## Constructors
|
||||
|
||||
### new BranchContents()
|
||||
|
||||
```ts
|
||||
new BranchContents(): BranchContents
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
[`BranchContents`](BranchContents.md)
|
||||
|
||||
## Properties
|
||||
|
||||
### manifestSize
|
||||
|
||||
```ts
|
||||
manifestSize: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### parentBranch?
|
||||
|
||||
```ts
|
||||
optional parentBranch: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### parentVersion
|
||||
|
||||
```ts
|
||||
parentVersion: number;
|
||||
```
|
||||
96
docs/src/js/classes/Branches.md
Normal file
96
docs/src/js/classes/Branches.md
Normal file
@@ -0,0 +1,96 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / Branches
|
||||
|
||||
# Class: Branches
|
||||
|
||||
Branch manager for a [Table](Table.md).
|
||||
|
||||
Unlike tags, `create` and `checkout` return a new [Table](Table.md) handle scoped
|
||||
to the branch; writes on it do not affect `main`.
|
||||
|
||||
## Methods
|
||||
|
||||
### checkout()
|
||||
|
||||
```ts
|
||||
checkout(name, version?): Promise<Table>
|
||||
```
|
||||
|
||||
Check out an existing branch and return a handle scoped to it.
|
||||
|
||||
With `version` set, the returned handle is pinned to that version of the
|
||||
branch (a read-only, detached view); otherwise it tracks the branch's
|
||||
latest and stays writable.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
|
||||
* **version?**: `number`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
***
|
||||
|
||||
### create()
|
||||
|
||||
```ts
|
||||
create(
|
||||
name,
|
||||
fromRef?,
|
||||
fromVersion?): Promise<Table>
|
||||
```
|
||||
|
||||
Create a branch and return a handle scoped to it.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
Name of the new branch.
|
||||
|
||||
* **fromRef?**: `string`
|
||||
Source branch to fork from. Defaults to `main`.
|
||||
|
||||
* **fromVersion?**: `number`
|
||||
A specific version on `fromRef`. Defaults to latest.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
***
|
||||
|
||||
### delete()
|
||||
|
||||
```ts
|
||||
delete(name): Promise<void>
|
||||
```
|
||||
|
||||
Delete a branch.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### list()
|
||||
|
||||
```ts
|
||||
list(): Promise<Record<string, BranchContents>>
|
||||
```
|
||||
|
||||
List all branches, mapping name to branch metadata.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Record`<`string`, [`BranchContents`](BranchContents.md)>>
|
||||
@@ -110,6 +110,23 @@ containing the new version number of the table after altering the columns.
|
||||
|
||||
***
|
||||
|
||||
### branches()
|
||||
|
||||
```ts
|
||||
abstract branches(): Promise<Branches>
|
||||
```
|
||||
|
||||
Get the branch manager for this table.
|
||||
|
||||
Branches are isolated, writable lines of history forked from another
|
||||
branch (or version). Writes on a branch do not affect `main`.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Branches`](Branches.md)>
|
||||
|
||||
***
|
||||
|
||||
### checkout()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
|
||||
- [BooleanQuery](classes/BooleanQuery.md)
|
||||
- [BoostQuery](classes/BoostQuery.md)
|
||||
- [BranchContents](classes/BranchContents.md)
|
||||
- [Branches](classes/Branches.md)
|
||||
- [Connection](classes/Connection.md)
|
||||
- [HeaderProvider](classes/HeaderProvider.md)
|
||||
- [Index](classes/Index.md)
|
||||
|
||||
@@ -30,17 +30,6 @@ The type of the index
|
||||
|
||||
***
|
||||
|
||||
### loss?
|
||||
|
||||
```ts
|
||||
optional loss: number;
|
||||
```
|
||||
|
||||
The KMeans loss value of the index,
|
||||
it is only present for vector indices.
|
||||
|
||||
***
|
||||
|
||||
### numIndexedRows
|
||||
|
||||
```ts
|
||||
|
||||
@@ -8,6 +8,18 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### branch?
|
||||
|
||||
```ts
|
||||
optional branch: string;
|
||||
```
|
||||
|
||||
Open the table scoped to this branch instead of the default branch.
|
||||
|
||||
Reads and writes on the returned table operate in the branch's context.
|
||||
|
||||
***
|
||||
|
||||
### ~~indexCacheSize?~~
|
||||
|
||||
```ts
|
||||
@@ -43,3 +55,17 @@ Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
|
||||
***
|
||||
|
||||
### version?
|
||||
|
||||
```ts
|
||||
optional version: number;
|
||||
```
|
||||
|
||||
Open the table pinned to this version, producing a read-only view.
|
||||
|
||||
Composes with [OpenTableOptions.branch](OpenTableOptions.md#branch): when both are set, opens
|
||||
that branch at the version; otherwise opens `main` at the version. Call
|
||||
`checkoutLatest` to return to a writable state.
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.1-beta.1</version>
|
||||
<version>0.30.1-beta.2</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.30.1-beta.1</version>
|
||||
<version>0.30.1-beta.2</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>7.2.0-beta.1</lance-core.version>
|
||||
<lance-core.version>8.0.0-beta.9</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.30.1-beta.1"
|
||||
version = "0.30.1-beta.2"
|
||||
publish = false
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
|
||||
@@ -191,6 +191,34 @@ describe("remote connection", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("allows version on remote but rejects a non-main branch", async () => {
|
||||
await withMockDatabase(
|
||||
(_req, res) => {
|
||||
// describe (table open + version validation) always succeeds
|
||||
const body = JSON.stringify({
|
||||
name: "t",
|
||||
version: 2,
|
||||
schema: { fields: [] },
|
||||
});
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async (db) => {
|
||||
// version-only (and "main" + version) is allowed: remote supports
|
||||
// version time-travel even though it has no branches
|
||||
await db.openTable("t", undefined, { version: 2 });
|
||||
await db.openTable("t", undefined, { branch: "main", version: 2 });
|
||||
|
||||
// a non-main branch is rejected, with or without a version
|
||||
await expect(
|
||||
db.openTable("t", undefined, { branch: "exp" }),
|
||||
).rejects.toThrow(/branching/);
|
||||
await expect(
|
||||
db.openTable("t", undefined, { branch: "exp", version: 2 }),
|
||||
).rejects.toThrow(/branching/);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe("TlsConfig", () => {
|
||||
it("should create TlsConfig with all fields", () => {
|
||||
const tlsConfig: TlsConfig = {
|
||||
|
||||
@@ -85,6 +85,136 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
await expect(table.countRows()).resolves.toBe(3);
|
||||
});
|
||||
|
||||
it("should support branches", async () => {
|
||||
await table.add([{ id: 1 }]);
|
||||
expect(await table.countRows()).toBe(1);
|
||||
|
||||
// fork an isolated, writable branch from main
|
||||
const branch = await (await table.branches()).create("exp");
|
||||
expect(await branch.countRows()).toBe(1);
|
||||
await branch.add([{ id: 2 }]);
|
||||
expect(await branch.countRows()).toBe(2);
|
||||
// main is untouched by branch writes
|
||||
expect(await table.countRows()).toBe(1);
|
||||
|
||||
// listed, with main (null) as the parent
|
||||
const list = await (await table.branches()).list();
|
||||
expect(Object.keys(list)).toContain("exp");
|
||||
expect(list["exp"].parentBranch).toBeNull();
|
||||
|
||||
// fromRef="main" is equivalent to the default
|
||||
await (await table.branches()).create("exp2", "main");
|
||||
const list2 = await (await table.branches()).list();
|
||||
expect(list2["exp2"].parentBranch).toBeNull();
|
||||
|
||||
// checkout returns a handle scoped to the branch's latest
|
||||
const checkedOut = await (await table.branches()).checkout("exp");
|
||||
expect(await checkedOut.countRows()).toBe(2);
|
||||
|
||||
// delete removes it
|
||||
await (await table.branches()).delete("exp");
|
||||
await (await table.branches()).delete("exp2");
|
||||
const after = await (await table.branches()).list();
|
||||
expect(Object.keys(after)).not.toContain("exp");
|
||||
});
|
||||
|
||||
it("should open a branch via open_table", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
await table.add([{ id: 1 }]);
|
||||
const branch = await (await table.branches()).create("exp");
|
||||
await branch.add([{ id: 2 }]);
|
||||
|
||||
// open_table(..., { branch }) returns a handle scoped to the branch
|
||||
const opened = await db.openTable("some_table", undefined, {
|
||||
branch: "exp",
|
||||
});
|
||||
expect(await opened.countRows()).toBe(2);
|
||||
// opening without branch still tracks main
|
||||
expect(await (await db.openTable("some_table")).countRows()).toBe(1);
|
||||
});
|
||||
|
||||
it("should open a branch at a version isolated from main and HEAD", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
// main: a single fork-point row
|
||||
const t = await db.createTable("bv_table", [{ id: 0 }]);
|
||||
const mainV1 = await t.version();
|
||||
|
||||
// fork "exp", then advance exp AND main independently past the fork so
|
||||
// they diverge while sharing version numbers
|
||||
const exp = await (await t.branches()).create("exp");
|
||||
await exp.add([{ id: 1 }]); // exp: {0, 1}
|
||||
const expV2 = await exp.version();
|
||||
await exp.add([{ id: 2 }]); // exp HEAD: {0, 1, 2}
|
||||
await t.add([{ id: 100 }, { id: 101 }, { id: 102 }]); // main HEAD: {0,100,101,102}
|
||||
expect(await t.version()).toBe(expV2);
|
||||
|
||||
// open exp at the shared version: the data must be exp's, not main's.
|
||||
// count alone cannot prove this (main@v2 also exists), so assert
|
||||
// provenance by content.
|
||||
const pinned = await db.openTable("bv_table", undefined, {
|
||||
branch: "exp",
|
||||
version: expV2,
|
||||
});
|
||||
expect(await pinned.countRows()).toBe(2); // not exp HEAD (3), not main@v2 (4)
|
||||
expect(await pinned.countRows("id = 1")).toBe(1); // exp's post-fork row
|
||||
expect(await pinned.countRows("id = 100")).toBe(0); // main's rows invisible
|
||||
|
||||
// the same coordinate is reachable directly via branches().checkout(name, version)
|
||||
const pinnedDirect = await (await t.branches()).checkout("exp", expV2);
|
||||
expect(await pinnedDirect.countRows()).toBe(2);
|
||||
|
||||
// the HEADs are unaffected
|
||||
expect(
|
||||
await (
|
||||
await db.openTable("bv_table", undefined, { branch: "exp" })
|
||||
).countRows(),
|
||||
).toBe(3);
|
||||
expect(await (await db.openTable("bv_table")).countRows()).toBe(4);
|
||||
|
||||
// version-only (no branch) time-travels main itself: its fork-point
|
||||
// version holds only main's first row, and the shared version number
|
||||
// resolves to main's data, not the branch's ("opens main at the version")
|
||||
const oldMain = await db.openTable("bv_table", undefined, {
|
||||
version: mainV1,
|
||||
});
|
||||
expect(await oldMain.countRows()).toBe(1);
|
||||
const sharedOnMain = await db.openTable("bv_table", undefined, {
|
||||
version: expV2,
|
||||
});
|
||||
expect(await sharedOnMain.countRows()).toBe(4); // main@v2, not exp@v2 (2)
|
||||
|
||||
// detached head: writing to a pinned version is rejected
|
||||
await expect(pinned.add([{ id: 9 }])).rejects.toThrow(
|
||||
/cannot be modified/,
|
||||
);
|
||||
|
||||
// a nonexistent version is rejected -- on main, and on a branch (a
|
||||
// distinct resolution path, on the branch's manifests)
|
||||
await expect(
|
||||
db.openTable("bv_table", undefined, { version: 9999 }),
|
||||
).rejects.toThrow();
|
||||
await expect(
|
||||
db.openTable("bv_table", undefined, { branch: "exp", version: 9999 }),
|
||||
).rejects.toThrow();
|
||||
|
||||
// checkoutLatest re-attaches the pinned handle to the BRANCH's HEAD
|
||||
// (writable again), not main's HEAD (4), and not staying pinned (2)
|
||||
await pinned.checkoutLatest();
|
||||
expect(await pinned.countRows()).toBe(3); // exp HEAD
|
||||
await pinned.add([{ id: 3 }]);
|
||||
expect(await pinned.countRows()).toBe(4); // writable again
|
||||
});
|
||||
|
||||
it("rejects invalid branch inputs", async () => {
|
||||
const branches = await table.branches();
|
||||
await expect(branches.create("")).rejects.toThrow("non-empty");
|
||||
await expect(branches.checkout("")).rejects.toThrow("non-empty");
|
||||
await expect(branches.delete("")).rejects.toThrow("non-empty");
|
||||
await expect(branches.create("bad", "main", -1)).rejects.toThrow(
|
||||
"non-negative",
|
||||
);
|
||||
});
|
||||
|
||||
it("should show table stats", async () => {
|
||||
await table.add([{ id: 1 }, { id: 2 }]);
|
||||
await table.add([{ id: 1 }]);
|
||||
@@ -721,7 +851,7 @@ describe("When creating an index", () => {
|
||||
columns: ["vec"],
|
||||
});
|
||||
const stats = await tbl.indexStats("vec_idx");
|
||||
expect(stats?.loss).toBeDefined();
|
||||
expect(stats).toBeDefined();
|
||||
|
||||
// Search without specifying the column
|
||||
let rst = await tbl
|
||||
@@ -1150,7 +1280,6 @@ describe("When creating an index", () => {
|
||||
expect(stats?.distanceType).toBeUndefined();
|
||||
expect(stats?.indexType).toEqual("BTREE");
|
||||
expect(stats?.numIndices).toEqual(1);
|
||||
expect(stats?.loss).toBeUndefined();
|
||||
});
|
||||
|
||||
test("when getting stats on non-existent index", async () => {
|
||||
|
||||
@@ -84,6 +84,20 @@ export interface CreateTableOptions {
|
||||
}
|
||||
|
||||
export interface OpenTableOptions {
|
||||
/**
|
||||
* Open the table scoped to this branch instead of the default branch.
|
||||
*
|
||||
* Reads and writes on the returned table operate in the branch's context.
|
||||
*/
|
||||
branch?: string;
|
||||
/**
|
||||
* Open the table pinned to this version, producing a read-only view.
|
||||
*
|
||||
* Composes with {@link OpenTableOptions.branch}: when both are set, opens
|
||||
* that branch at the version; otherwise opens `main` at the version. Call
|
||||
* `checkoutLatest` to return to a writable state.
|
||||
*/
|
||||
version?: number;
|
||||
/**
|
||||
* Configuration for object storage.
|
||||
*
|
||||
@@ -483,7 +497,20 @@ export class LocalConnection extends Connection {
|
||||
options?.indexCacheSize,
|
||||
);
|
||||
|
||||
return new LocalTable(innerTable);
|
||||
let table: Table = new LocalTable(innerTable);
|
||||
// "main" is the default branch, so treat it as no branch. On a real branch,
|
||||
// scope and pin in one step (yielding "version V of branch B"); otherwise
|
||||
// pin the version, if any, against main.
|
||||
const branch =
|
||||
options?.branch != null && options.branch !== "main"
|
||||
? options.branch
|
||||
: undefined;
|
||||
if (branch != null) {
|
||||
table = await (await table.branches()).checkout(branch, options?.version);
|
||||
} else if (options?.version != null) {
|
||||
await table.checkout(options.version);
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
async cloneTable(
|
||||
|
||||
@@ -38,6 +38,7 @@ export {
|
||||
FragmentSummaryStats,
|
||||
Tags,
|
||||
TagContents,
|
||||
BranchContents,
|
||||
MergeResult,
|
||||
AddResult,
|
||||
AddColumnsResult,
|
||||
@@ -111,6 +112,7 @@ export {
|
||||
|
||||
export {
|
||||
Table,
|
||||
Branches,
|
||||
AddDataOptions,
|
||||
UpdateOptions,
|
||||
OptimizeOptions,
|
||||
|
||||
@@ -25,10 +25,12 @@ import {
|
||||
AddColumnsSql,
|
||||
AddResult,
|
||||
AlterColumnsResult,
|
||||
BranchContents,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
IndexConfig,
|
||||
IndexStatistics,
|
||||
Branches as NativeBranches,
|
||||
OptimizeStats,
|
||||
TableStatistics,
|
||||
Tags,
|
||||
@@ -653,6 +655,14 @@ export abstract class Table {
|
||||
*/
|
||||
abstract tags(): Promise<Tags>;
|
||||
|
||||
/**
|
||||
* Get the branch manager for this table.
|
||||
*
|
||||
* Branches are isolated, writable lines of history forked from another
|
||||
* branch (or version). Writes on a branch do not affect `main`.
|
||||
*/
|
||||
abstract branches(): Promise<Branches>;
|
||||
|
||||
/**
|
||||
* Restore the table to the currently checked out version
|
||||
*
|
||||
@@ -1108,6 +1118,10 @@ export class LocalTable extends Table {
|
||||
return await this.inner.tags();
|
||||
}
|
||||
|
||||
async branches(): Promise<Branches> {
|
||||
return new Branches(await this.inner.branches());
|
||||
}
|
||||
|
||||
async optimize(options?: Partial<OptimizeOptions>): Promise<OptimizeStats> {
|
||||
let cleanupOlderThanMs;
|
||||
if (
|
||||
@@ -1238,3 +1252,57 @@ export interface FieldMetadataUpdate {
|
||||
/** If true, replace the field's entire metadata map instead of merging. */
|
||||
replace?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Branch manager for a {@link Table}.
|
||||
*
|
||||
* Unlike tags, `create` and `checkout` return a new {@link Table} handle scoped
|
||||
* to the branch; writes on it do not affect `main`.
|
||||
*/
|
||||
export class Branches {
|
||||
#inner: NativeBranches;
|
||||
|
||||
/**
|
||||
* Construct a Branches manager. Internal use only.
|
||||
* @hidden
|
||||
*/
|
||||
constructor(inner: NativeBranches) {
|
||||
this.#inner = inner;
|
||||
}
|
||||
|
||||
/** List all branches, mapping name to branch metadata. */
|
||||
async list(): Promise<Record<string, BranchContents>> {
|
||||
return await this.#inner.list();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a branch and return a handle scoped to it.
|
||||
*
|
||||
* @param name Name of the new branch.
|
||||
* @param fromRef Source branch to fork from. Defaults to `main`.
|
||||
* @param fromVersion A specific version on `fromRef`. Defaults to latest.
|
||||
*/
|
||||
async create(
|
||||
name: string,
|
||||
fromRef?: string,
|
||||
fromVersion?: number,
|
||||
): Promise<Table> {
|
||||
return new LocalTable(await this.#inner.create(name, fromRef, fromVersion));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check out an existing branch and return a handle scoped to it.
|
||||
*
|
||||
* With `version` set, the returned handle is pinned to that version of the
|
||||
* branch (a read-only, detached view); otherwise it tracks the branch's
|
||||
* latest and stays writable.
|
||||
*/
|
||||
async checkout(name: string, version?: number): Promise<Table> {
|
||||
return new LocalTable(await this.#inner.checkout(name, version));
|
||||
}
|
||||
|
||||
/** Delete a branch. */
|
||||
async delete(name: string): Promise<void> {
|
||||
return await this.#inner.delete(name);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
14
nodejs/package-lock.json
generated
14
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
@@ -26,7 +26,7 @@
|
||||
"@aws-sdk/client-s3": "3.1003.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "3.5.1",
|
||||
"@napi-rs/cli": "3.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/jest": "^29.1.2",
|
||||
"@types/node": "22.7.4",
|
||||
@@ -2942,9 +2942,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@napi-rs/cli": {
|
||||
"version": "3.5.1",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-3.5.1.tgz",
|
||||
"integrity": "sha512-XBfLQRDcB3qhu6bazdMJsecWW55kR85l5/k0af9BIBELXQSsCFU0fzug7PX8eQp6vVdm7W/U3z6uP5WmITB2Gw==",
|
||||
"version": "3.7.0",
|
||||
"resolved": "https://registry.npmjs.org/@napi-rs/cli/-/cli-3.7.0.tgz",
|
||||
"integrity": "sha512-3d3+rmxlOIV/G1zPWeX4PCxuYnhcCQM2BvY9rtimC8RO0dFR9gtYP+Grov+WoduZtfWRj5N1XvytWeRxxCk5zw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@@ -2954,7 +2954,7 @@
|
||||
"@octokit/rest": "^22.0.1",
|
||||
"clipanion": "^4.0.0-rc.4",
|
||||
"colorette": "^2.0.20",
|
||||
"emnapi": "^1.7.1",
|
||||
"emnapi": "^1.10.0",
|
||||
"es-toolkit": "^1.41.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"obug": "^2.0.0",
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.30.1-beta.1",
|
||||
"version": "0.30.1-beta.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@@ -43,7 +43,7 @@
|
||||
"@aws-sdk/client-s3": "3.1003.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "3.5.1",
|
||||
"@napi-rs/cli": "3.7.0",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/jest": "^29.1.2",
|
||||
"@types/node": "22.7.4",
|
||||
|
||||
10
nodejs/pnpm-lock.yaml
generated
10
nodejs/pnpm-lock.yaml
generated
@@ -31,8 +31,8 @@ importers:
|
||||
specifier: ^29.7.0
|
||||
version: 29.7.0
|
||||
'@napi-rs/cli':
|
||||
specifier: 3.5.1
|
||||
version: 3.5.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)(@types/node@22.7.4)
|
||||
specifier: 3.7.0
|
||||
version: 3.7.0(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)(@types/node@22.7.4)
|
||||
'@types/axios':
|
||||
specifier: ^0.14.0
|
||||
version: 0.14.4
|
||||
@@ -887,8 +887,8 @@ packages:
|
||||
'@jridgewell/trace-mapping@0.3.31':
|
||||
resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==}
|
||||
|
||||
'@napi-rs/cli@3.5.1':
|
||||
resolution: {integrity: sha512-XBfLQRDcB3qhu6bazdMJsecWW55kR85l5/k0af9BIBELXQSsCFU0fzug7PX8eQp6vVdm7W/U3z6uP5WmITB2Gw==}
|
||||
'@napi-rs/cli@3.7.0':
|
||||
resolution: {integrity: sha512-3d3+rmxlOIV/G1zPWeX4PCxuYnhcCQM2BvY9rtimC8RO0dFR9gtYP+Grov+WoduZtfWRj5N1XvytWeRxxCk5zw==}
|
||||
engines: {node: '>= 16'}
|
||||
hasBin: true
|
||||
peerDependencies:
|
||||
@@ -4582,7 +4582,7 @@ snapshots:
|
||||
'@jridgewell/resolve-uri': 3.1.2
|
||||
'@jridgewell/sourcemap-codec': 1.5.5
|
||||
|
||||
'@napi-rs/cli@3.5.1(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)(@types/node@22.7.4)':
|
||||
'@napi-rs/cli@3.7.0(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)(@types/node@22.7.4)':
|
||||
dependencies:
|
||||
'@inquirer/prompts': 8.4.3(@types/node@22.7.4)
|
||||
'@napi-rs/cross-toolchain': 1.0.3(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)
|
||||
|
||||
@@ -7,7 +7,7 @@ use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
use lancedb::table::{
|
||||
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration,
|
||||
FieldMetadataUpdate as LanceFieldMetadataUpdate, NewColumnTransform, OptimizeAction,
|
||||
OptimizeOptions, Table as LanceDbTable,
|
||||
OptimizeOptions, Ref, Table as LanceDbTable,
|
||||
};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode};
|
||||
@@ -478,6 +478,13 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn branches(&self) -> napi::Result<Branches> {
|
||||
Ok(Branches {
|
||||
inner: self.inner_ref()?.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn optimize(
|
||||
&self,
|
||||
@@ -838,9 +845,6 @@ pub struct IndexStatistics {
|
||||
pub distance_type: Option<String>,
|
||||
/// The number of parts this index is split into.
|
||||
pub num_indices: Option<u32>,
|
||||
/// The KMeans loss value of the index,
|
||||
/// it is only present for vector indices.
|
||||
pub loss: Option<f64>,
|
||||
}
|
||||
impl From<lancedb::index::IndexStatistics> for IndexStatistics {
|
||||
fn from(value: lancedb::index::IndexStatistics) -> Self {
|
||||
@@ -850,7 +854,6 @@ impl From<lancedb::index::IndexStatistics> for IndexStatistics {
|
||||
index_type: value.index_type.to_string(),
|
||||
distance_type: value.distance_type.map(|d| d.to_string()),
|
||||
num_indices: value.num_indices,
|
||||
loss: value.loss,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1060,6 +1063,13 @@ pub struct TagContents {
|
||||
pub manifest_size: i64,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct BranchContents {
|
||||
pub parent_branch: Option<String>,
|
||||
pub parent_version: i64,
|
||||
pub manifest_size: i64,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct Tags {
|
||||
inner: LanceDbTable,
|
||||
@@ -1128,3 +1138,75 @@ impl Tags {
|
||||
.default_error()
|
||||
}
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub struct Branches {
|
||||
inner: LanceDbTable,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl Branches {
|
||||
#[napi]
|
||||
pub async fn list(&self) -> napi::Result<HashMap<String, BranchContents>> {
|
||||
let branches = self.inner.list_branches().await.default_error()?;
|
||||
let result = branches
|
||||
.into_iter()
|
||||
.map(|(k, v)| {
|
||||
(
|
||||
k,
|
||||
BranchContents {
|
||||
parent_branch: v.parent_branch,
|
||||
parent_version: v.parent_version as i64,
|
||||
manifest_size: v.manifest_size as i64,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn create(
|
||||
&self,
|
||||
name: String,
|
||||
from_ref: Option<String>,
|
||||
from_version: Option<i64>,
|
||||
) -> napi::Result<Table> {
|
||||
let from_ref = from_ref.filter(|b| b != "main");
|
||||
let from_version = from_version
|
||||
.map(|v| {
|
||||
u64::try_from(v).map_err(|_| {
|
||||
napi::Error::from_reason("from_version must be a non-negative integer")
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
let from = Ref::Version(from_ref, from_version);
|
||||
let table = self
|
||||
.inner
|
||||
.create_branch(&name, from)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(Table::new(table))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn checkout(&self, name: String, version: Option<i64>) -> napi::Result<Table> {
|
||||
let version = version
|
||||
.map(|v| {
|
||||
u64::try_from(v)
|
||||
.map_err(|_| napi::Error::from_reason("version must be a non-negative integer"))
|
||||
})
|
||||
.transpose()?;
|
||||
let table = self
|
||||
.inner
|
||||
.checkout_branch(&name, version)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(Table::new(table))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub async fn delete(&self, name: String) -> napi::Result<()> {
|
||||
self.inner.delete_branch(&name).await.default_error()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,6 +226,9 @@ class Table:
|
||||
async def close_lsm_writers(self) -> None: ...
|
||||
@property
|
||||
def tags(self) -> Tags: ...
|
||||
@property
|
||||
def branches(self) -> Branches: ...
|
||||
def current_branch(self) -> Optional[str]: ...
|
||||
def query(self) -> Query: ...
|
||||
def take_offsets(self, offsets: list[int]) -> TakeQuery: ...
|
||||
def take_row_ids(self, row_ids: list[int]) -> TakeQuery: ...
|
||||
@@ -238,6 +241,17 @@ class Tags:
|
||||
async def delete(self, tag: str): ...
|
||||
async def update(self, tag: str, version: int): ...
|
||||
|
||||
class Branches:
|
||||
async def list(self) -> Dict[str, Any]: ...
|
||||
async def create(
|
||||
self,
|
||||
name: str,
|
||||
from_ref: Optional[str] = None,
|
||||
from_version: Optional[int] = None,
|
||||
) -> Table: ...
|
||||
async def checkout(self, name: str, version: Optional[int] = None) -> Table: ...
|
||||
async def delete(self, name: str) -> None: ...
|
||||
|
||||
class IndexConfig:
|
||||
name: str
|
||||
index_type: str
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import os
|
||||
import threading
|
||||
import warnings
|
||||
@@ -37,6 +38,24 @@ class BackgroundEventLoop:
|
||||
|
||||
LOOP = BackgroundEventLoop()
|
||||
|
||||
|
||||
def _new_embedding_executor() -> concurrent.futures.ThreadPoolExecutor:
|
||||
return concurrent.futures.ThreadPoolExecutor(thread_name_prefix="lancedb-embedding")
|
||||
|
||||
|
||||
# Embedding functions can block for a long time -- a heavy local model or an
|
||||
# HTTP request to a remote embeddings API. Running them on asyncio's default
|
||||
# executor lets them starve the unrelated blocking I/O that shares that pool,
|
||||
# so they get a dedicated one. See
|
||||
# https://github.com/lancedb/lancedb/issues/3310.
|
||||
_EMBEDDING_EXECUTOR = _new_embedding_executor()
|
||||
|
||||
|
||||
def embedding_executor() -> concurrent.futures.ThreadPoolExecutor:
|
||||
"""Return the executor dedicated to running blocking embedding calls."""
|
||||
return _EMBEDDING_EXECUTOR
|
||||
|
||||
|
||||
_FORK_WARNED = False
|
||||
|
||||
|
||||
@@ -47,6 +66,12 @@ def _reset_after_fork():
|
||||
# the new state. The Rust-side tokio runtime is reset analogously by a
|
||||
# pthread_atfork hook installed in the _lancedb extension.
|
||||
LOOP._start()
|
||||
# The embedding executor's worker threads are dead in the child as well.
|
||||
# Replace it with a fresh pool (threads are spawned lazily, so this is
|
||||
# cheap); we don't shut down the old one, since joining its dead workers
|
||||
# could hang.
|
||||
global _EMBEDDING_EXECUTOR
|
||||
_EMBEDDING_EXECUTOR = _new_embedding_executor()
|
||||
global _FORK_WARNED
|
||||
if not _FORK_WARNED:
|
||||
_FORK_WARNED = True
|
||||
|
||||
@@ -416,6 +416,8 @@ class DBConnection(EnforceOverrides):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
@@ -444,6 +446,14 @@ class DBConnection(EnforceOverrides):
|
||||
connection will be inherited by the table, but can be overridden here.
|
||||
See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
branch: str, optional
|
||||
If provided, open a handle scoped to this branch instead of the
|
||||
default branch. Reads and writes operate in the branch's context.
|
||||
version: int, optional
|
||||
If provided, open the table pinned to this version, producing a
|
||||
read-only handle. Composes with ``branch``: when both are given,
|
||||
opens that branch at the version; otherwise opens ``main`` at the
|
||||
version. Call ``checkout_latest`` to return to a writable state.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -958,6 +968,8 @@ class LanceDBConnection(DBConnection):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> LanceTable:
|
||||
"""Open a table in the database.
|
||||
|
||||
@@ -968,6 +980,14 @@ class LanceDBConnection(DBConnection):
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from. When non-empty, the
|
||||
table is resolved through the directory namespace client.
|
||||
branch: str, optional
|
||||
If provided, open a handle scoped to this branch instead of the
|
||||
default branch. Reads and writes operate in the branch's context.
|
||||
version: int, optional
|
||||
If provided, open the table pinned to this version, producing a
|
||||
read-only handle. Composes with ``branch``: when both are given,
|
||||
opens that branch at the version; otherwise opens ``main`` at the
|
||||
version. Call ``checkout_latest`` to return to a writable state.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -987,20 +1007,26 @@ class LanceDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
if namespace_path:
|
||||
return self._namespace_conn().open_table(
|
||||
tbl = self._namespace_conn().open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
else:
|
||||
tbl = LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
return LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
if branch is not None:
|
||||
tbl = tbl.branches.checkout(branch, version)
|
||||
elif version is not None:
|
||||
tbl.checkout(version)
|
||||
return tbl
|
||||
|
||||
def clone_table(
|
||||
self,
|
||||
@@ -1641,6 +1667,8 @@ class AsyncConnection(object):
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> AsyncTable:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
@@ -1676,6 +1704,14 @@ class AsyncConnection(object):
|
||||
managed_versioning: bool, optional
|
||||
Whether managed versioning is enabled for this table. If provided,
|
||||
avoids a redundant describe_table call when namespace_client is set.
|
||||
branch: str, optional
|
||||
If provided, open a handle scoped to this branch instead of the
|
||||
default branch. Reads and writes operate in the branch's context.
|
||||
version: int, optional
|
||||
If provided, open the table pinned to this version, producing a
|
||||
read-only handle. Composes with ``branch``: when both are given,
|
||||
opens that branch at the version; otherwise opens ``main`` at the
|
||||
version. Call ``checkout_latest`` to return to a writable state.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1692,7 +1728,14 @@ class AsyncConnection(object):
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
)
|
||||
return AsyncTable(table)
|
||||
tbl = AsyncTable(table)
|
||||
# "main" is the default branch, so treat it as no branch: remote rejects
|
||||
# every branch checkout (even "main"), and the version still applies.
|
||||
if branch is not None and branch != "main":
|
||||
tbl = await tbl.branches.checkout(branch, version)
|
||||
elif version is not None:
|
||||
await tbl.checkout(version)
|
||||
return tbl
|
||||
|
||||
async def clone_table(
|
||||
self,
|
||||
|
||||
@@ -144,7 +144,12 @@ def _query_to_namespace_request(
|
||||
if query.postfilter is not None:
|
||||
prefilter = not query.postfilter
|
||||
|
||||
k = query.limit if query.limit is not None else 10
|
||||
if query.limit is not None:
|
||||
k = query.limit
|
||||
elif query.vector is None and query.full_text_query is None:
|
||||
k = sys.maxsize
|
||||
else:
|
||||
k = 10
|
||||
|
||||
# Build request kwargs, only including non-None values for optional fields
|
||||
# that Pydantic doesn't accept as None
|
||||
@@ -544,6 +549,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> Table:
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
@@ -562,7 +569,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
|
||||
return LanceTable(
|
||||
tbl = LanceTable(
|
||||
self,
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
@@ -570,6 +577,11 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
_async=async_table,
|
||||
)
|
||||
if branch is not None:
|
||||
tbl = tbl.branches.checkout(branch, version)
|
||||
elif version is not None:
|
||||
tbl.checkout(version)
|
||||
return tbl
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
@@ -954,7 +966,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
return await self._inner.create_table(
|
||||
table = await self._inner.create_table(
|
||||
name,
|
||||
data,
|
||||
schema=schema,
|
||||
@@ -966,6 +978,11 @@ class AsyncLanceNamespaceDBConnection:
|
||||
embedding_functions=embedding_functions,
|
||||
storage_options=storage_options,
|
||||
)
|
||||
return table._set_namespace_context(
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
async def open_table(
|
||||
self,
|
||||
@@ -974,12 +991,14 @@ class AsyncLanceNamespaceDBConnection:
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> AsyncTable:
|
||||
"""Open an existing table from the namespace."""
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
try:
|
||||
return await self._inner.open_table(
|
||||
table = await self._inner.open_table(
|
||||
name,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
@@ -990,6 +1009,17 @@ class AsyncLanceNamespaceDBConnection:
|
||||
table_id = namespace_path + [name]
|
||||
raise TableNotFoundError(f"Table not found: {'$'.join(table_id)}")
|
||||
raise
|
||||
# "main" is the default branch, so treat it as no branch (mirrors the
|
||||
# sync remote path); the version still applies.
|
||||
if branch is not None and branch != "main":
|
||||
table = await table.branches.checkout(branch, version)
|
||||
elif version is not None:
|
||||
await table.checkout(version)
|
||||
return table._set_namespace_context(
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
async def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop a table from the namespace."""
|
||||
|
||||
@@ -383,6 +383,8 @@ class RemoteDBConnection(DBConnection):
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
branch: Optional[str] = None,
|
||||
version: Optional[int] = None,
|
||||
) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
|
||||
@@ -393,6 +395,14 @@ class RemoteDBConnection(DBConnection):
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
None or empty list represents root namespace.
|
||||
branch: str, optional
|
||||
Branching is not yet supported on remote tables, so only the
|
||||
default branch is accepted (``None`` or ``"main"``); any other
|
||||
value raises ``NotImplementedError``.
|
||||
version: int, optional
|
||||
If provided, open the table pinned to this version, producing a
|
||||
read-only handle. Call ``checkout_latest`` to return to a writable
|
||||
state.
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -400,6 +410,11 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
# Remote supports version time-travel but not branches: reject a non-main
|
||||
# branch, but allow a version-only open (or "main").
|
||||
if branch is not None and branch != "main":
|
||||
raise NotImplementedError("branching is not yet supported on remote tables")
|
||||
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if index_cache_size is not None:
|
||||
@@ -409,12 +424,15 @@ class RemoteDBConnection(DBConnection):
|
||||
)
|
||||
|
||||
table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
|
||||
return RemoteTable(
|
||||
tbl = RemoteTable(
|
||||
table,
|
||||
self.db_name,
|
||||
connection_state=self.serialize,
|
||||
namespace_path=namespace_path,
|
||||
)
|
||||
if version is not None:
|
||||
tbl.checkout(version)
|
||||
return tbl
|
||||
|
||||
def clone_table(
|
||||
self,
|
||||
|
||||
@@ -27,6 +27,9 @@ class LanceDBClientError(RuntimeError):
|
||||
self.request_id = request_id
|
||||
self.status_code = status_code
|
||||
|
||||
def __reduce__(self) -> tuple[type, tuple]:
|
||||
return (self.__class__, (str(self), self.request_id, self.status_code))
|
||||
|
||||
|
||||
class HttpError(LanceDBClientError):
|
||||
"""An error that occurred during an HTTP request.
|
||||
@@ -101,3 +104,19 @@ class RetryError(LanceDBClientError):
|
||||
self.max_request_failures = max_request_failures
|
||||
self.max_connect_failures = max_connect_failures
|
||||
self.max_read_failures = max_read_failures
|
||||
|
||||
def __reduce__(self) -> tuple[type, tuple]:
|
||||
return (
|
||||
self.__class__,
|
||||
(
|
||||
str(self),
|
||||
self.request_id,
|
||||
self.request_failures,
|
||||
self.connect_failures,
|
||||
self.read_failures,
|
||||
self.max_request_failures,
|
||||
self.max_connect_failures,
|
||||
self.max_read_failures,
|
||||
self.status_code,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -30,7 +30,7 @@ from lancedb.scannable import _register_optional_converters, to_scannable
|
||||
|
||||
from . import __version__
|
||||
from lancedb.arrow import peek_reader
|
||||
from lancedb.background_loop import LOOP
|
||||
from lancedb.background_loop import LOOP, embedding_executor
|
||||
from .dependencies import (
|
||||
_check_for_hugging_face,
|
||||
_check_for_lance,
|
||||
@@ -92,6 +92,12 @@ BlobMode = Literal["lazy", "bytes", "descriptions"]
|
||||
_VALID_BLOB_MODES = ("lazy", "bytes", "descriptions")
|
||||
|
||||
|
||||
def _should_push_down_query_table(
|
||||
namespace_client: Optional[Any], pushdown_operations: set
|
||||
) -> bool:
|
||||
return namespace_client is not None and "QueryTable" in pushdown_operations
|
||||
|
||||
|
||||
def _validate_blob_mode(blob_mode: BlobMode) -> None:
|
||||
if blob_mode not in _VALID_BLOB_MODES:
|
||||
modes = ", ".join(repr(mode) for mode in _VALID_BLOB_MODES)
|
||||
@@ -778,6 +784,15 @@ class Table(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def branches(self) -> "Branches":
|
||||
"""Branch management for the table.
|
||||
|
||||
Branches are isolated, writable lines of history forked from another
|
||||
branch (or version). Writes on a branch do not affect ``main``.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""The number of rows in this Table"""
|
||||
return self.count_rows(None)
|
||||
@@ -2106,22 +2121,27 @@ class LanceTable(Table):
|
||||
"Please install with `pip install pylance`."
|
||||
)
|
||||
|
||||
branch = self.current_branch()
|
||||
version = None if branch is not None else self.version
|
||||
if self._namespace_client is not None:
|
||||
table_id = self._namespace_path + [self.name]
|
||||
return lance.dataset(
|
||||
version=self.version,
|
||||
ds = lance.dataset(
|
||||
version=version,
|
||||
storage_options=self._conn.storage_options,
|
||||
namespace_client=self._namespace_client,
|
||||
table_id=table_id,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return lance.dataset(
|
||||
self._dataset_path,
|
||||
version=self.version,
|
||||
storage_options=self._conn.storage_options,
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
ds = lance.dataset(
|
||||
self._dataset_path,
|
||||
version=version,
|
||||
storage_options=self._conn.storage_options,
|
||||
**kwargs,
|
||||
)
|
||||
if branch is not None:
|
||||
ds = ds.checkout_version((branch, self.version))
|
||||
return ds
|
||||
|
||||
@property
|
||||
def schema(self) -> pa.Schema:
|
||||
@@ -2187,6 +2207,19 @@ class LanceTable(Table):
|
||||
"""
|
||||
return Tags(self._table)
|
||||
|
||||
@property
|
||||
def branches(self) -> "Branches":
|
||||
"""Branch management for the table.
|
||||
|
||||
``create``/``checkout`` return a new table handle scoped to the branch;
|
||||
writes on it do not affect ``main``.
|
||||
"""
|
||||
return Branches(self)
|
||||
|
||||
def current_branch(self) -> Optional[str]:
|
||||
"""The branch this table handle is scoped to, or ``None`` for ``main``."""
|
||||
return self._table.current_branch()
|
||||
|
||||
def checkout(self, version: Union[int, str]):
|
||||
"""Checkout a version of the table. This is an in-place operation.
|
||||
|
||||
@@ -2333,6 +2366,11 @@ class LanceTable(Table):
|
||||
Returns
|
||||
-------
|
||||
pa.Table"""
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
):
|
||||
return self._execute_query(Query()).read_all()
|
||||
|
||||
return LOOP.run(self._table.to_arrow())
|
||||
|
||||
def to_polars(self, batch_size=None) -> "pl.LazyFrame":
|
||||
@@ -3446,9 +3484,14 @@ class LanceTable(Table):
|
||||
batch_size: Optional[int] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
# Branch queries run locally: the server-side query protocol can't
|
||||
# carry a branch yet.
|
||||
# TODO: push down server-side once it can (with remote table support).
|
||||
if (
|
||||
"QueryTable" in self._pushdown_operations
|
||||
and self._namespace_client is not None
|
||||
_should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
)
|
||||
and self.current_branch() is None
|
||||
):
|
||||
from lancedb.namespace import _execute_server_side_query
|
||||
|
||||
@@ -4182,7 +4225,14 @@ class AsyncTable:
|
||||
[AsyncTable.create_index][lancedb.table.AsyncTable.create_index].
|
||||
"""
|
||||
|
||||
def __init__(self, table: LanceDBTable):
|
||||
def __init__(
|
||||
self,
|
||||
table: LanceDBTable,
|
||||
*,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
):
|
||||
"""Create a new AsyncTable object.
|
||||
|
||||
You should not create AsyncTable objects directly.
|
||||
@@ -4191,6 +4241,21 @@ class AsyncTable:
|
||||
[AsyncConnection.open_table][lancedb.AsyncConnection.open_table] to obtain
|
||||
Table objects."""
|
||||
self._inner = table
|
||||
self._namespace_path = namespace_path or []
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
|
||||
def _set_namespace_context(
|
||||
self,
|
||||
*,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
) -> "AsyncTable":
|
||||
self._namespace_path = namespace_path or []
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
return self._inner.__repr__()
|
||||
@@ -4353,12 +4418,20 @@ class AsyncTable:
|
||||
"Please install with `pip install pylance`."
|
||||
)
|
||||
|
||||
return lance.dataset(
|
||||
# lance.dataset() can't open a branch directly, so open the base table
|
||||
# and check out the branch ref (a None branch resolves to main).
|
||||
branch = self.current_branch()
|
||||
table_version = await self.version()
|
||||
version = None if branch is not None else table_version
|
||||
ds = lance.dataset(
|
||||
await self.uri(),
|
||||
version=await self.version(),
|
||||
version=version,
|
||||
storage_options=await self.latest_storage_options(),
|
||||
**kwargs,
|
||||
)
|
||||
if branch is not None:
|
||||
ds = ds.checkout_version((branch, table_version))
|
||||
return ds
|
||||
|
||||
async def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pd.DataFrame":
|
||||
"""Return the table as a pandas DataFrame.
|
||||
@@ -4391,6 +4464,11 @@ class AsyncTable:
|
||||
-------
|
||||
pa.Table
|
||||
"""
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
):
|
||||
return (await self._execute_query(Query())).read_all()
|
||||
|
||||
return await self.query().to_arrow()
|
||||
|
||||
async def create_index(
|
||||
@@ -4908,10 +4986,13 @@ class AsyncTable:
|
||||
if embedding is not None:
|
||||
loop = asyncio.get_running_loop()
|
||||
# This function is likely to block, since it either calls an expensive
|
||||
# function or makes an HTTP request to an embeddings REST API.
|
||||
# function or makes an HTTP request to an embeddings REST API. Run it
|
||||
# on a dedicated executor so it can't starve the default executor that
|
||||
# other blocking I/O shares. See
|
||||
# https://github.com/lancedb/lancedb/issues/3310.
|
||||
return (
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
embedding_executor(),
|
||||
embedding.function.compute_query_embeddings_with_retry,
|
||||
query,
|
||||
)
|
||||
@@ -5065,6 +5146,14 @@ class AsyncTable:
|
||||
batch_size: Optional[int] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
):
|
||||
from lancedb.namespace import _execute_server_side_query
|
||||
|
||||
table_id = self._namespace_path + [self.name]
|
||||
return _execute_server_side_query(self._namespace_client, table_id, query)
|
||||
|
||||
# The sync table calls into this method, so we need to map the
|
||||
# query to the async version of the query and run that here. This is only
|
||||
# used for that code path right now.
|
||||
@@ -5473,6 +5562,19 @@ class AsyncTable:
|
||||
"""
|
||||
return AsyncTags(self._inner)
|
||||
|
||||
@property
|
||||
def branches(self) -> AsyncBranches:
|
||||
"""Branch management for the table.
|
||||
|
||||
Branches are isolated, writable lines of history forked from another
|
||||
branch (or version). Writes on a branch do not affect ``main``.
|
||||
"""
|
||||
return AsyncBranches(self._inner)
|
||||
|
||||
def current_branch(self) -> Optional[str]:
|
||||
"""The branch this table handle is scoped to, or ``None`` for ``main``."""
|
||||
return self._inner.current_branch()
|
||||
|
||||
async def optimize(
|
||||
self,
|
||||
*,
|
||||
@@ -5634,8 +5736,6 @@ class IndexStatistics:
|
||||
The distance type used by the index.
|
||||
num_indices: Optional[int]
|
||||
The number of parts the index is split into.
|
||||
loss: Optional[float]
|
||||
The KMeans loss for the index, for only vector indices.
|
||||
"""
|
||||
|
||||
num_indexed_rows: int
|
||||
@@ -5655,7 +5755,6 @@ class IndexStatistics:
|
||||
]
|
||||
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
||||
num_indices: Optional[int] = None
|
||||
loss: Optional[float] = None
|
||||
|
||||
# This exists for backwards compatibility with an older API, which returned
|
||||
# a dictionary instead of a class.
|
||||
@@ -5808,6 +5907,75 @@ class Tags:
|
||||
LOOP.run(self._table.tags.update(tag, version))
|
||||
|
||||
|
||||
class Branches:
|
||||
"""
|
||||
Table branch manager.
|
||||
"""
|
||||
|
||||
def __init__(self, parent: "LanceTable"):
|
||||
self._parent = parent
|
||||
self._table = parent._table
|
||||
|
||||
def list(self) -> Dict[str, Any]:
|
||||
"""List all branches, mapping name to branch metadata."""
|
||||
return LOOP.run(self._table.branches.list())
|
||||
|
||||
def create(
|
||||
self,
|
||||
name: str,
|
||||
from_ref: Optional[str] = None,
|
||||
from_version: Optional[int] = None,
|
||||
) -> "LanceTable":
|
||||
"""Create a branch and return a handle scoped to it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
Name of the new branch.
|
||||
from_ref: str, optional
|
||||
Source branch to fork from. Defaults to ``main``.
|
||||
from_version: int, optional
|
||||
A specific version on ``from_ref`` to fork from. Defaults to latest.
|
||||
"""
|
||||
async_table = LOOP.run(
|
||||
self._table.branches.create(name, from_ref, from_version)
|
||||
)
|
||||
return self._wrap(async_table)
|
||||
|
||||
def checkout(self, name: str, version: Optional[int] = None) -> "LanceTable":
|
||||
"""Check out an existing branch and return a handle scoped to it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
Name of the branch to check out.
|
||||
version: int, optional
|
||||
A specific version on the branch to pin. When set, the returned
|
||||
handle is a read-only view of that version; when omitted it tracks
|
||||
the branch's latest and stays writable.
|
||||
"""
|
||||
async_table = LOOP.run(self._table.branches.checkout(name, version))
|
||||
return self._wrap(async_table)
|
||||
|
||||
def delete(self, name: str) -> None:
|
||||
"""Delete a branch."""
|
||||
LOOP.run(self._table.branches.delete(name))
|
||||
|
||||
def _wrap(self, async_table: "AsyncTable") -> "LanceTable":
|
||||
# Reuse the parent's connection + namespace context; from_inner would drop
|
||||
# it and break identity/query routing for namespace-backed tables.
|
||||
parent = self._parent
|
||||
return LanceTable(
|
||||
parent._conn,
|
||||
async_table.name,
|
||||
namespace_path=parent._namespace_path,
|
||||
namespace_client=parent._namespace_client,
|
||||
pushdown_operations=parent._pushdown_operations,
|
||||
location=parent._location,
|
||||
_async=async_table,
|
||||
)
|
||||
|
||||
|
||||
class AsyncTags:
|
||||
"""
|
||||
Async table tag manager.
|
||||
@@ -5875,3 +6043,56 @@ class AsyncTags:
|
||||
The new table version to tag.
|
||||
"""
|
||||
await self._table.tags.update(tag, version)
|
||||
|
||||
|
||||
class AsyncBranches:
|
||||
"""Async table branch manager."""
|
||||
|
||||
def __init__(self, table):
|
||||
self._table = table
|
||||
|
||||
async def list(self) -> Dict[str, Any]:
|
||||
"""List all branches, mapping name to branch metadata."""
|
||||
return await self._table.branches.list()
|
||||
|
||||
async def create(
|
||||
self,
|
||||
name: str,
|
||||
from_ref: Optional[str] = None,
|
||||
from_version: Optional[int] = None,
|
||||
) -> "AsyncTable":
|
||||
"""Create a branch and return a handle scoped to it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
Name of the new branch.
|
||||
from_ref: str, optional
|
||||
Source branch to fork from. Defaults to ``main``.
|
||||
from_version: int, optional
|
||||
A specific version on ``from_ref`` to fork from. Defaults to latest.
|
||||
"""
|
||||
# "main" and None are two spellings of the root branch in lance; normalize
|
||||
# so from_ref="main" behaves identically to the default.
|
||||
if from_ref == "main":
|
||||
from_ref = None
|
||||
inner = await self._table.branches.create(name, from_ref, from_version)
|
||||
return AsyncTable(inner)
|
||||
|
||||
async def checkout(self, name: str, version: Optional[int] = None) -> "AsyncTable":
|
||||
"""Check out an existing branch and return a handle scoped to it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
Name of the branch to check out.
|
||||
version: int, optional
|
||||
A specific version on the branch to pin. When set, the returned
|
||||
handle is a read-only view of that version; when omitted it tracks
|
||||
the branch's latest and stays writable.
|
||||
"""
|
||||
return AsyncTable(await self._table.branches.checkout(name, version))
|
||||
|
||||
async def delete(self, name: str) -> None:
|
||||
"""Delete a branch."""
|
||||
await self._table.branches.delete(name)
|
||||
|
||||
@@ -385,6 +385,21 @@ def _(value: np.ndarray):
|
||||
return value_to_sql(value.tolist())
|
||||
|
||||
|
||||
@value_to_sql.register(np.bool_)
|
||||
def _(value: np.bool_):
|
||||
return value_to_sql(bool(value))
|
||||
|
||||
|
||||
@value_to_sql.register(np.integer)
|
||||
def _(value: np.integer):
|
||||
return value_to_sql(int(value))
|
||||
|
||||
|
||||
@value_to_sql.register(np.floating)
|
||||
def _(value: np.floating):
|
||||
return value_to_sql(float(value))
|
||||
|
||||
|
||||
def deprecated(func):
|
||||
"""This is a decorator which can be used to mark functions
|
||||
as deprecated. It will result in a warning being emitted
|
||||
|
||||
56
python/python/tests/test_errors.py
Normal file
56
python/python/tests/test_errors.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import pickle
|
||||
|
||||
from lancedb.remote.errors import HttpError, LanceDBClientError, RetryError
|
||||
|
||||
|
||||
def test_pickle_lancedb_client_error():
|
||||
err = LanceDBClientError("something went wrong", "req-123", 400)
|
||||
restored = pickle.loads(pickle.dumps(err))
|
||||
assert str(restored) == "something went wrong"
|
||||
assert restored.request_id == "req-123"
|
||||
assert restored.status_code == 400
|
||||
|
||||
|
||||
def test_pickle_lancedb_client_error_no_status_code():
|
||||
err = LanceDBClientError("fail", "req-456")
|
||||
restored = pickle.loads(pickle.dumps(err))
|
||||
assert str(restored) == "fail"
|
||||
assert restored.request_id == "req-456"
|
||||
assert restored.status_code is None
|
||||
|
||||
|
||||
def test_pickle_http_error():
|
||||
err = HttpError("not found", "req-789", 404)
|
||||
restored = pickle.loads(pickle.dumps(err))
|
||||
assert isinstance(restored, HttpError)
|
||||
assert str(restored) == "not found"
|
||||
assert restored.request_id == "req-789"
|
||||
assert restored.status_code == 404
|
||||
|
||||
|
||||
def test_pickle_retry_error():
|
||||
err = RetryError(
|
||||
"max retries exceeded",
|
||||
"req-abc",
|
||||
request_failures=3,
|
||||
connect_failures=1,
|
||||
read_failures=2,
|
||||
max_request_failures=5,
|
||||
max_connect_failures=3,
|
||||
max_read_failures=3,
|
||||
status_code=503,
|
||||
)
|
||||
restored = pickle.loads(pickle.dumps(err))
|
||||
assert isinstance(restored, RetryError)
|
||||
assert str(restored) == "max retries exceeded"
|
||||
assert restored.request_id == "req-abc"
|
||||
assert restored.request_failures == 3
|
||||
assert restored.connect_failures == 1
|
||||
assert restored.read_failures == 2
|
||||
assert restored.max_request_failures == 5
|
||||
assert restored.max_connect_failures == 3
|
||||
assert restored.max_read_failures == 3
|
||||
assert restored.status_code == 503
|
||||
@@ -226,7 +226,6 @@ async def test_create_vector_index(some_table: AsyncTable):
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
assert stats.loss >= 0.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -250,7 +249,6 @@ async def test_create_4bit_ivfpq_index(some_table: AsyncTable):
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
assert stats.loss >= 0.0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -5,10 +5,67 @@
|
||||
|
||||
import tempfile
|
||||
import shutil
|
||||
import sys
|
||||
import pytest
|
||||
import pyarrow as pa
|
||||
import lancedb
|
||||
from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError
|
||||
from lancedb.table import AsyncTable, LanceTable
|
||||
|
||||
|
||||
PUSHDOWN_DATA = pa.table(
|
||||
{"id": list(range(12)), "text": [f"row-{idx}" for idx in range(12)]}
|
||||
)
|
||||
|
||||
|
||||
def _ipc_file(table: pa.Table = PUSHDOWN_DATA) -> bytes:
|
||||
sink = pa.BufferOutputStream()
|
||||
with pa.ipc.new_file(sink, table.schema) as writer:
|
||||
writer.write_table(table)
|
||||
return sink.getvalue().to_pybytes()
|
||||
|
||||
|
||||
class _FailingSyncInner:
|
||||
name = "hist"
|
||||
|
||||
def current_branch(self):
|
||||
# The pushdown gate only routes server-side when on the default branch.
|
||||
return None
|
||||
|
||||
async def schema(self):
|
||||
return PUSHDOWN_DATA.schema
|
||||
|
||||
async def to_arrow(self):
|
||||
raise RuntimeError("direct table to_arrow should not be used")
|
||||
|
||||
|
||||
class _FailingAsyncInner:
|
||||
def name(self):
|
||||
return "hist"
|
||||
|
||||
async def schema(self):
|
||||
return PUSHDOWN_DATA.schema
|
||||
|
||||
def query(self):
|
||||
raise AssertionError("direct async query should not be used")
|
||||
|
||||
|
||||
class _NamespaceClient:
|
||||
def __init__(self):
|
||||
self.requests = []
|
||||
|
||||
def query_table(self, request):
|
||||
self.requests.append(request)
|
||||
return _ipc_file()
|
||||
|
||||
|
||||
def _namespace_lance_table(namespace_client: _NamespaceClient) -> LanceTable:
|
||||
table = LanceTable.__new__(LanceTable)
|
||||
table._table = _FailingSyncInner()
|
||||
table._namespace_path = ["geneva"]
|
||||
table._namespace_client = namespace_client
|
||||
table._pushdown_operations = {"QueryTable"}
|
||||
return table
|
||||
|
||||
|
||||
class TestNamespaceConnection:
|
||||
@@ -736,6 +793,22 @@ class TestPushdownOperations:
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
def test_lance_table_to_arrow_uses_query_pushdown(self):
|
||||
namespace_client = _NamespaceClient()
|
||||
table = _namespace_lance_table(namespace_client)
|
||||
|
||||
assert table.to_arrow().equals(PUSHDOWN_DATA)
|
||||
assert table.to_pandas()["id"].tolist() == list(range(12))
|
||||
assert len(namespace_client.requests) == 2
|
||||
assert [request.id for request in namespace_client.requests] == [
|
||||
["geneva", "hist"],
|
||||
["geneva", "hist"],
|
||||
]
|
||||
assert [request.k for request in namespace_client.requests] == [
|
||||
sys.maxsize,
|
||||
sys.maxsize,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAsyncPushdownOperations:
|
||||
@@ -771,3 +844,39 @@ class TestAsyncPushdownOperations:
|
||||
"""Test that pushdown operations default to empty on async connection."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
async def test_async_table_to_arrow_uses_query_pushdown(self):
|
||||
namespace_client = _NamespaceClient()
|
||||
|
||||
table = AsyncTable(
|
||||
_FailingAsyncInner(),
|
||||
namespace_path=["geneva"],
|
||||
namespace_client=namespace_client,
|
||||
pushdown_operations={"QueryTable"},
|
||||
)
|
||||
|
||||
assert (await table.to_arrow()).equals(PUSHDOWN_DATA)
|
||||
assert (await table.to_pandas())["id"].tolist() == list(range(12))
|
||||
assert len(namespace_client.requests) == 2
|
||||
assert [request.id for request in namespace_client.requests] == [
|
||||
["geneva", "hist"],
|
||||
["geneva", "hist"],
|
||||
]
|
||||
assert [request.k for request in namespace_client.requests] == [
|
||||
sys.maxsize,
|
||||
sys.maxsize,
|
||||
]
|
||||
|
||||
|
||||
def test_local_table_to_arrow_and_to_pandas_are_unchanged(tmp_path):
|
||||
db = lancedb.connect(str(tmp_path / "db"))
|
||||
table = db.create_table(
|
||||
"local",
|
||||
data=[
|
||||
{"id": 1, "vector": [1.0, 2.0]},
|
||||
{"id": 2, "vector": [3.0, 4.0]},
|
||||
],
|
||||
)
|
||||
|
||||
assert table.to_arrow().column("id").to_pylist() == [1, 2]
|
||||
assert table.to_pandas()["id"].tolist() == [1, 2]
|
||||
|
||||
@@ -154,6 +154,52 @@ async def test_async_checkout():
|
||||
assert await table.count_rows() == 300
|
||||
|
||||
|
||||
def test_remote_open_table_branch_and_version():
|
||||
def handler(request):
|
||||
# describe (table open + version validation) always succeeds
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(
|
||||
json.dumps({"version": 2, "schema": {"fields": []}}).encode()
|
||||
)
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
# version-only (and "main" + version) is allowed: remote supports
|
||||
# version time-travel even though it has no branches
|
||||
assert db.open_table("test", version=2) is not None
|
||||
assert db.open_table("test", branch="main", version=2) is not None
|
||||
|
||||
# a non-main branch is rejected, with or without a version
|
||||
with pytest.raises(NotImplementedError, match="branching"):
|
||||
db.open_table("test", branch="exp")
|
||||
with pytest.raises(NotImplementedError, match="branching"):
|
||||
db.open_table("test", branch="exp", version=2)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_remote_open_table_branch_and_version():
|
||||
def handler(request):
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(
|
||||
json.dumps({"version": 2, "schema": {"fields": []}}).encode()
|
||||
)
|
||||
|
||||
async with mock_lancedb_connection_async(handler) as db:
|
||||
# version-only (and "main" + version) is allowed: "main" is the default
|
||||
# branch, so it must not hit the unsupported remote branch path
|
||||
assert await db.open_table("test", version=2) is not None
|
||||
assert await db.open_table("test", branch="main", version=2) is not None
|
||||
|
||||
# a non-main branch is rejected, with or without a version
|
||||
with pytest.raises(NotImplementedError, match="branching"):
|
||||
await db.open_table("test", branch="exp")
|
||||
with pytest.raises(NotImplementedError, match="branching"):
|
||||
await db.open_table("test", branch="exp", version=2)
|
||||
|
||||
|
||||
def test_table_len_sync():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=create":
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import warnings
|
||||
from datetime import date, datetime, timedelta
|
||||
from time import sleep
|
||||
@@ -927,6 +928,346 @@ async def test_async_tags(mem_db_async: AsyncConnection):
|
||||
)
|
||||
|
||||
|
||||
def test_branches(tmp_path):
|
||||
db = lancedb.connect(tmp_path, read_consistency_interval=timedelta(0))
|
||||
table = db.create_table(
|
||||
"test",
|
||||
data=[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
],
|
||||
)
|
||||
assert table.count_rows() == 2
|
||||
|
||||
# fork an isolated, writable branch from main
|
||||
branch = table.branches.create("exp")
|
||||
assert branch.count_rows() == 2
|
||||
branch.add(data=[{"vector": [10.0, 11.0], "item": "baz", "price": 30.0}])
|
||||
|
||||
# writes on the branch do not touch main
|
||||
assert branch.count_rows() == 3
|
||||
assert table.count_rows() == 2
|
||||
|
||||
# the branch is listed, with main (None) as its parent
|
||||
branches = table.branches.list()
|
||||
assert "exp" in branches
|
||||
assert branches["exp"]["parent_branch"] is None
|
||||
|
||||
# from_ref="main" is equivalent to the default
|
||||
table.branches.create("exp2", from_ref="main")
|
||||
assert table.branches.list()["exp2"]["parent_branch"] is None
|
||||
|
||||
# checkout returns a handle scoped to the branch's latest
|
||||
checked_out = table.branches.checkout("exp")
|
||||
assert checked_out.count_rows() == 3
|
||||
|
||||
# delete removes it
|
||||
table.branches.delete("exp")
|
||||
table.branches.delete("exp2")
|
||||
assert "exp" not in table.branches.list()
|
||||
|
||||
|
||||
def test_branch_handle_tracks_concurrent_writes(tmp_path):
|
||||
db = lancedb.connect(tmp_path, read_consistency_interval=timedelta(0))
|
||||
table = db.create_table("t", [{"id": 1}])
|
||||
|
||||
# two independent handles on the same branch
|
||||
writer = table.branches.create("exp")
|
||||
reader = db.open_table("t", branch="exp")
|
||||
assert reader.count_rows() == 1
|
||||
|
||||
# a concurrent write on the branch is visible to the other handle
|
||||
writer.add([{"id": 2}])
|
||||
assert reader.count_rows() == 2
|
||||
# main is unaffected
|
||||
assert table.count_rows() == 1
|
||||
|
||||
|
||||
def test_branch_name_validation(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("t", [{"id": 1}])
|
||||
|
||||
with pytest.raises(ValueError, match="non-empty"):
|
||||
table.branches.create("")
|
||||
with pytest.raises(ValueError, match="non-empty"):
|
||||
table.branches.checkout("")
|
||||
with pytest.raises(ValueError, match="non-empty"):
|
||||
table.branches.delete("")
|
||||
|
||||
|
||||
def test_branches_preserve_namespace(tmp_path):
|
||||
pytest.importorskip(
|
||||
"lance"
|
||||
) # namespace_path routes through lance's DirectoryNamespace
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("t", [{"id": 1}], namespace_path=["ns1"])
|
||||
assert table.namespace == ["ns1"]
|
||||
|
||||
branch = table.branches.create("exp")
|
||||
assert branch.namespace == ["ns1"]
|
||||
assert branch.id == table.id
|
||||
|
||||
# opening the branch directly also preserves namespace identity
|
||||
opened = db.open_table("t", namespace_path=["ns1"], branch="exp")
|
||||
assert opened.namespace == ["ns1"]
|
||||
|
||||
|
||||
def test_open_table_with_branch(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("t", [{"i": 1}])
|
||||
table.branches.create("exp").add([{"i": 2}])
|
||||
|
||||
# open_table(branch=...) returns a handle scoped to the branch
|
||||
assert db.open_table("t", branch="exp").count_rows() == 2
|
||||
# opening without branch still tracks main
|
||||
assert db.open_table("t").count_rows() == 1
|
||||
|
||||
|
||||
def test_open_table_with_branch_version(tmp_path):
|
||||
db = lancedb.connect(tmp_path, read_consistency_interval=timedelta(0))
|
||||
|
||||
# main: a single fork-point row
|
||||
t = db.create_table("t", [{"i": 0}])
|
||||
main_v1 = t.version
|
||||
|
||||
# fork "exp", then advance exp AND main independently past the fork so they
|
||||
# diverge while sharing version numbers
|
||||
exp = t.branches.create("exp")
|
||||
exp.add([{"i": 1}]) # exp: {0, 1}
|
||||
exp_v2 = exp.version
|
||||
exp.add([{"i": 2}]) # exp HEAD: {0, 1, 2}
|
||||
t.add([{"i": 100}, {"i": 101}, {"i": 102}]) # main HEAD: {0, 100, 101, 102}
|
||||
assert exp_v2 == t.version, "branch and main must share the version number"
|
||||
|
||||
# open exp at the shared version: the data must be exp's, not main's. count
|
||||
# alone cannot prove this (main@v2 also exists), so assert provenance by
|
||||
# content.
|
||||
pinned = db.open_table("t", branch="exp", version=exp_v2)
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert pinned.count_rows() == 2 # not exp HEAD (3), not main@v2 (4)
|
||||
assert pinned.count_rows("i = 1") == 1 # exp's post-fork row is visible
|
||||
assert pinned.count_rows("i = 100") == 0 # main's divergent rows are invisible
|
||||
|
||||
# the same coordinate is reachable directly via branches.checkout(name, version)
|
||||
pinned_direct = t.branches.checkout("exp", exp_v2)
|
||||
assert pinned_direct.current_branch() == "exp"
|
||||
assert pinned_direct.count_rows() == 2
|
||||
|
||||
# the HEADs are unaffected
|
||||
assert db.open_table("t", branch="exp").count_rows() == 3
|
||||
assert db.open_table("t").count_rows() == 4
|
||||
|
||||
# version-only (no branch) time-travels main itself: its fork-point version
|
||||
# holds only main's first row, and the shared version number resolves to
|
||||
# main's data, not the branch's ("opens main at the version")
|
||||
old_main = db.open_table("t", version=main_v1)
|
||||
assert old_main.current_branch() is None
|
||||
assert old_main.count_rows() == 1
|
||||
shared_on_main = db.open_table("t", version=exp_v2)
|
||||
assert shared_on_main.current_branch() is None
|
||||
assert shared_on_main.count_rows() == 4
|
||||
|
||||
# detached head: writing to a pinned version is rejected
|
||||
with pytest.raises((ValueError, RuntimeError), match="cannot be modified"):
|
||||
pinned.add([{"i": 9}])
|
||||
|
||||
# a nonexistent version is rejected -- on main, and on a branch (a distinct
|
||||
# resolution path, on the branch's manifests)
|
||||
with pytest.raises((ValueError, RuntimeError)):
|
||||
db.open_table("t", version=9999)
|
||||
with pytest.raises((ValueError, RuntimeError)):
|
||||
db.open_table("t", branch="exp", version=9999)
|
||||
|
||||
# checkout_latest re-attaches the pinned handle to the BRANCH's HEAD
|
||||
# (writable again), not main's HEAD, and not staying pinned
|
||||
pinned.checkout_latest()
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert pinned.count_rows() == 3 # exp HEAD, not main's 4
|
||||
pinned.add([{"i": 3}])
|
||||
assert pinned.count_rows() == 4 # writable again
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_namespace_open_table_with_branch(tmp_path):
|
||||
pytest.importorskip("lance") # "dir" impl is lance.namespace.DirectoryNamespace
|
||||
db = lancedb.connect_namespace_async("dir", {"root": str(tmp_path)})
|
||||
await db.create_namespace(["ns1"])
|
||||
table = await db.create_table("t", [{"id": 1}], namespace_path=["ns1"])
|
||||
branch = await table.branches.create("exp")
|
||||
await branch.add([{"id": 2}])
|
||||
|
||||
# open_table(branch=...) on the async namespace connection must work
|
||||
opened = await db.open_table("t", namespace_path=["ns1"], branch="exp")
|
||||
assert await opened.count_rows() == 2
|
||||
|
||||
|
||||
def test_namespace_open_table_with_branch_version(tmp_path):
|
||||
pytest.importorskip("lance") # "dir" impl is lance.namespace.DirectoryNamespace
|
||||
db = lancedb.connect_namespace("dir", {"root": str(tmp_path)})
|
||||
db.create_namespace(["ns1"])
|
||||
t = db.create_table("t", [{"i": 0}], namespace_path=["ns1"])
|
||||
|
||||
# fork "exp", then advance exp AND main past the fork so they diverge while
|
||||
# sharing version numbers
|
||||
exp = t.branches.create("exp")
|
||||
exp.add([{"i": 1}])
|
||||
exp_v2 = exp.version
|
||||
exp.add([{"i": 2}])
|
||||
t.add([{"i": 100}, {"i": 101}, {"i": 102}])
|
||||
assert exp_v2 == t.version, "branch and main must share the version number"
|
||||
|
||||
# open_table(branch=, version=) on the namespace connection reads the
|
||||
# branch's data at that version, not main's
|
||||
pinned = db.open_table("t", namespace_path=["ns1"], branch="exp", version=exp_v2)
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert pinned.count_rows() == 2 # not exp HEAD (3), not main@v2 (4)
|
||||
assert pinned.count_rows("i = 1") == 1 # exp's post-fork row is visible
|
||||
assert pinned.count_rows("i = 100") == 0 # main's divergent rows are invisible
|
||||
assert db.open_table("t", namespace_path=["ns1"], branch="exp").count_rows() == 3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_namespace_open_table_with_branch_version(tmp_path):
|
||||
pytest.importorskip("lance") # "dir" impl is lance.namespace.DirectoryNamespace
|
||||
db = lancedb.connect_namespace_async("dir", {"root": str(tmp_path)})
|
||||
await db.create_namespace(["ns1"])
|
||||
t = await db.create_table("t", [{"i": 0}], namespace_path=["ns1"])
|
||||
|
||||
# fork "exp", then advance exp AND main past the fork so they diverge while
|
||||
# sharing version numbers
|
||||
exp = await t.branches.create("exp")
|
||||
await exp.add([{"i": 1}])
|
||||
exp_v2 = await exp.version()
|
||||
await exp.add([{"i": 2}])
|
||||
await t.add([{"i": 100}, {"i": 101}, {"i": 102}])
|
||||
assert exp_v2 == await t.version(), "branch and main must share the version number"
|
||||
|
||||
# open_table(branch=, version=) on the async namespace connection reads the
|
||||
# branch's data at that version, not main's
|
||||
pinned = await db.open_table(
|
||||
"t", namespace_path=["ns1"], branch="exp", version=exp_v2
|
||||
)
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert await pinned.count_rows() == 2 # not exp HEAD (3), not main@v2 (4)
|
||||
assert await pinned.count_rows("i = 1") == 1 # exp's post-fork row is visible
|
||||
assert await pinned.count_rows("i = 100") == 0 # main's rows are invisible
|
||||
assert (
|
||||
await (
|
||||
await db.open_table("t", namespace_path=["ns1"], branch="exp")
|
||||
).count_rows()
|
||||
== 3
|
||||
)
|
||||
|
||||
|
||||
def test_branch_to_lance_targets_branch(tmp_path):
|
||||
pytest.importorskip("lance")
|
||||
db = lancedb.connect(tmp_path)
|
||||
table = db.create_table("t", [{"i": 1}])
|
||||
branch = table.branches.create("exp")
|
||||
branch.add([{"i": 2}]) # branch: 2 rows, main: 1 row
|
||||
|
||||
assert branch.to_lance().count_rows() == 2
|
||||
assert table.to_lance().count_rows() == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_branches(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path)
|
||||
table = await db.create_table(
|
||||
"test",
|
||||
data=[
|
||||
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
|
||||
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0},
|
||||
],
|
||||
)
|
||||
assert await table.count_rows() == 2
|
||||
|
||||
branch = await table.branches.create("exp")
|
||||
assert await branch.count_rows() == 2
|
||||
await branch.add(data=[{"vector": [10.0, 11.0], "item": "baz", "price": 30.0}])
|
||||
|
||||
assert await branch.count_rows() == 3
|
||||
assert await table.count_rows() == 2
|
||||
|
||||
branches = await table.branches.list()
|
||||
assert "exp" in branches
|
||||
assert branches["exp"]["parent_branch"] is None
|
||||
|
||||
await table.branches.create("exp2", from_ref="main")
|
||||
assert (await table.branches.list())["exp2"]["parent_branch"] is None
|
||||
|
||||
checked_out = await table.branches.checkout("exp")
|
||||
assert await checked_out.count_rows() == 3
|
||||
|
||||
await table.branches.delete("exp")
|
||||
await table.branches.delete("exp2")
|
||||
assert "exp" not in await table.branches.list()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_open_table_with_branch_version(tmp_path):
|
||||
db = await lancedb.connect_async(tmp_path, read_consistency_interval=timedelta(0))
|
||||
|
||||
# main: a single fork-point row
|
||||
t = await db.create_table("t", [{"i": 0}])
|
||||
main_v1 = await t.version()
|
||||
|
||||
# fork "exp", then advance exp AND main independently past the fork so they
|
||||
# diverge while sharing version numbers
|
||||
exp = await t.branches.create("exp")
|
||||
await exp.add([{"i": 1}]) # exp: {0, 1}
|
||||
exp_v2 = await exp.version()
|
||||
await exp.add([{"i": 2}]) # exp HEAD: {0, 1, 2}
|
||||
await t.add([{"i": 100}, {"i": 101}, {"i": 102}]) # main HEAD: {0, 100, 101, 102}
|
||||
assert exp_v2 == await t.version(), "branch and main must share the version number"
|
||||
|
||||
# open exp at the shared version: the data must be exp's, not main's. count
|
||||
# alone cannot prove this (main@v2 also exists), so assert provenance by
|
||||
# content.
|
||||
pinned = await db.open_table("t", branch="exp", version=exp_v2)
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert await pinned.count_rows() == 2 # not exp HEAD (3), not main@v2 (4)
|
||||
assert await pinned.count_rows("i = 1") == 1 # exp's post-fork row is visible
|
||||
assert await pinned.count_rows("i = 100") == 0 # main's rows are invisible
|
||||
|
||||
# the same coordinate is reachable directly via branches.checkout(name, version)
|
||||
pinned_direct = await t.branches.checkout("exp", exp_v2)
|
||||
assert pinned_direct.current_branch() == "exp"
|
||||
assert await pinned_direct.count_rows() == 2
|
||||
|
||||
# the HEADs are unaffected
|
||||
assert await (await db.open_table("t", branch="exp")).count_rows() == 3
|
||||
assert await (await db.open_table("t")).count_rows() == 4
|
||||
|
||||
# version-only (no branch) time-travels main itself: its fork-point version
|
||||
# holds only main's first row, and the shared version number resolves to
|
||||
# main's data, not the branch's ("opens main at the version")
|
||||
old_main = await db.open_table("t", version=main_v1)
|
||||
assert old_main.current_branch() is None
|
||||
assert await old_main.count_rows() == 1
|
||||
shared_on_main = await db.open_table("t", version=exp_v2)
|
||||
assert shared_on_main.current_branch() is None
|
||||
assert await shared_on_main.count_rows() == 4
|
||||
|
||||
# detached head: writing to a pinned version is rejected
|
||||
with pytest.raises((ValueError, RuntimeError), match="cannot be modified"):
|
||||
await pinned.add([{"i": 9}])
|
||||
|
||||
# a nonexistent version is rejected -- on main, and on a branch
|
||||
with pytest.raises((ValueError, RuntimeError)):
|
||||
await db.open_table("t", version=9999)
|
||||
with pytest.raises((ValueError, RuntimeError)):
|
||||
await db.open_table("t", branch="exp", version=9999)
|
||||
|
||||
# checkout_latest re-attaches the pinned handle to the BRANCH's HEAD
|
||||
# (writable again), not main's HEAD, and not staying pinned
|
||||
await pinned.checkout_latest()
|
||||
assert pinned.current_branch() == "exp"
|
||||
assert await pinned.count_rows() == 3 # exp HEAD, not main's 4
|
||||
await pinned.add([{"i": 3}])
|
||||
assert await pinned.count_rows() == 4 # writable again
|
||||
|
||||
|
||||
@patch("lancedb.table.AsyncTable.create_index")
|
||||
def test_create_index_method(mock_create_index, mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
@@ -2837,3 +3178,38 @@ def test_sanitize_data_metadata_not_stripped():
|
||||
assert result_schema.metadata is not None
|
||||
assert result_schema.metadata[b"existing_key"] == b"existing_value"
|
||||
assert result_schema.metadata[b"new_key"] == b"new_value"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_search_runs_embedding_on_dedicated_executor(
|
||||
mem_db_async: AsyncConnection,
|
||||
):
|
||||
# Regression test for #3310: AsyncTable.search() must run the (potentially
|
||||
# blocking) query-embedding call on the dedicated embedding executor, not
|
||||
# asyncio's default executor -- which is shared with other blocking I/O and
|
||||
# can be starved by a slow embedding call under concurrent load.
|
||||
func = MockTextEmbeddingFunction.create()
|
||||
|
||||
class Schema(LanceModel):
|
||||
text: str = func.SourceField()
|
||||
vector: Vector(func.ndims()) = func.VectorField()
|
||||
|
||||
table = await mem_db_async.create_table("embed_executor", schema=Schema)
|
||||
await table.add([{"text": "hello world"}])
|
||||
|
||||
captured_threads: List[str] = []
|
||||
original = MockTextEmbeddingFunction.generate_embeddings
|
||||
|
||||
def record_thread(self, texts):
|
||||
captured_threads.append(threading.current_thread().name)
|
||||
return original(self, texts)
|
||||
|
||||
# Patch only around the search so we capture the query-embedding call, not
|
||||
# the add-time source-embedding call.
|
||||
with patch.object(MockTextEmbeddingFunction, "generate_embeddings", record_thread):
|
||||
await (await table.search("a query string")).limit(1).to_list()
|
||||
|
||||
assert captured_threads, "search did not invoke the embedding function"
|
||||
assert all(name.startswith("lancedb-embedding") for name in captured_threads), (
|
||||
f"embedding ran off the dedicated executor: {captured_threads}"
|
||||
)
|
||||
|
||||
@@ -149,6 +149,21 @@ def test_value_to_sql_dict():
|
||||
assert value_to_sql({}) == "named_struct()"
|
||||
|
||||
|
||||
def test_value_to_sql_numpy_scalars():
|
||||
# numpy scalars (e.g. pulled from an ndarray or a pandas column) must
|
||||
# convert the same way as their native Python counterparts. np.float64
|
||||
# already worked by virtue of subclassing float, but the integer / bool
|
||||
# / float32 scalars previously raised NotImplementedError.
|
||||
import numpy as np
|
||||
|
||||
assert value_to_sql(np.int32(5)) == "5"
|
||||
assert value_to_sql(np.int64(5)) == "5"
|
||||
assert value_to_sql(np.float32(1.5)) == "1.5"
|
||||
assert value_to_sql(np.float64(1.5)) == "1.5"
|
||||
assert value_to_sql(np.bool_(True)) == "TRUE"
|
||||
assert value_to_sql(np.bool_(False)) == "FALSE"
|
||||
|
||||
|
||||
def test_append_vector_columns():
|
||||
registry = EmbeddingFunctionRegistry.get_instance()
|
||||
registry.register("test")(MockTextEmbeddingFunction)
|
||||
|
||||
@@ -17,7 +17,7 @@ use arrow::{
|
||||
};
|
||||
use lancedb::table::{
|
||||
AddDataMode, ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform,
|
||||
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
||||
OptimizeAction, OptimizeOptions, Ref, Table as LanceDbTable,
|
||||
};
|
||||
use pyo3::{
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
@@ -711,10 +711,6 @@ impl Table {
|
||||
dict.set_item("num_indices", num_indices)?;
|
||||
}
|
||||
|
||||
if let Some(loss) = stats.loss {
|
||||
dict.set_item("loss", loss)?;
|
||||
}
|
||||
|
||||
Ok(Some(dict.unbind()))
|
||||
})
|
||||
} else {
|
||||
@@ -864,6 +860,15 @@ impl Table {
|
||||
Ok(Tags::new(self.inner_ref()?.clone()))
|
||||
}
|
||||
|
||||
pub fn current_branch(&self) -> PyResult<Option<String>> {
|
||||
Ok(self.inner_ref()?.current_branch())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn branches(&self) -> PyResult<Branches> {
|
||||
Ok(Branches::new(self.inner_ref()?.clone()))
|
||||
}
|
||||
|
||||
#[pyo3(signature = (offsets))]
|
||||
pub fn take_offsets(self_: PyRef<'_, Self>, offsets: Vec<u64>) -> PyResult<TakeQuery> {
|
||||
Ok(TakeQuery::new(
|
||||
@@ -1265,3 +1270,71 @@ impl Tags {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub struct Branches {
|
||||
inner: LanceDbTable,
|
||||
}
|
||||
|
||||
impl Branches {
|
||||
pub fn new(table: LanceDbTable) -> Self {
|
||||
Self { inner: table }
|
||||
}
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl Branches {
|
||||
pub fn list(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let res = inner.list_branches().await.infer_error()?;
|
||||
Python::attach(|py| {
|
||||
let py_dict = PyDict::new(py);
|
||||
for (name, contents) in res {
|
||||
let value = PyDict::new(py);
|
||||
value.set_item("parent_branch", contents.parent_branch)?;
|
||||
value.set_item("parent_version", contents.parent_version)?;
|
||||
value.set_item("manifest_size", contents.manifest_size)?;
|
||||
py_dict.set_item(name, value)?;
|
||||
}
|
||||
Ok(py_dict.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (name, from_ref=None, from_version=None))]
|
||||
pub fn create(
|
||||
self_: PyRef<'_, Self>,
|
||||
name: String,
|
||||
from_ref: Option<String>,
|
||||
from_version: Option<u64>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let from = Ref::Version(from_ref, from_version);
|
||||
let table = inner.create_branch(&name, from).await.infer_error()?;
|
||||
Ok(Table::new(table))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (name, version=None))]
|
||||
pub fn checkout(
|
||||
self_: PyRef<'_, Self>,
|
||||
name: String,
|
||||
version: Option<u64>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let table = inner.checkout_branch(&name, version).await.infer_error()?;
|
||||
Ok(Table::new(table))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn delete(self_: PyRef<'_, Self>, name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.delete_branch(&name).await.infer_error()?;
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.30.1-beta.1"
|
||||
version = "0.30.1-beta.2"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::sync::Arc;
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_schema::SchemaRef;
|
||||
use lance::dataset::ReadParams;
|
||||
use lance::dataset::refs::MAIN_BRANCH;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
@@ -119,6 +120,8 @@ pub struct OpenTableBuilder {
|
||||
parent: Arc<dyn Database>,
|
||||
request: OpenTableRequest,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
branch: Option<String>,
|
||||
version: Option<u64>,
|
||||
}
|
||||
|
||||
impl OpenTableBuilder {
|
||||
@@ -139,6 +142,8 @@ impl OpenTableBuilder {
|
||||
managed_versioning: None,
|
||||
},
|
||||
embedding_registry,
|
||||
branch: None,
|
||||
version: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -259,14 +264,48 @@ impl OpenTableBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Open the table scoped to the given branch instead of the default branch.
|
||||
///
|
||||
/// Reads and writes on the returned table operate in the branch's context.
|
||||
pub fn branch(mut self, branch: impl Into<String>) -> Self {
|
||||
self.branch = Some(branch.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Open the table pinned to a specific version, producing a read-only "view".
|
||||
///
|
||||
/// Composes with [`Self::branch`]: when a branch is also set, this opens that
|
||||
/// branch at the given version; otherwise it opens `main` at that version.
|
||||
/// The returned table is a detached head, so operations that modify the table
|
||||
/// will fail until [`Table::checkout_latest`] is called.
|
||||
///
|
||||
/// ```
|
||||
/// # use lancedb::Connection;
|
||||
/// # async fn f(conn: &Connection) -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let table = conn.open_table("t").branch("exp").version(3).execute().await?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub fn version(mut self, version: u64) -> Self {
|
||||
self.version = Some(version);
|
||||
self
|
||||
}
|
||||
|
||||
/// Open the table
|
||||
pub async fn execute(self) -> Result<Table> {
|
||||
let table = self.parent.open_table(self.request).await?;
|
||||
Ok(Table::new_with_embedding_registry(
|
||||
table,
|
||||
self.parent,
|
||||
self.embedding_registry,
|
||||
))
|
||||
let table = Table::new_with_embedding_registry(table, self.parent, self.embedding_registry);
|
||||
// "main" is the default branch, so treat it as no branch.
|
||||
let branch = self.branch.filter(|b| b.as_str() != MAIN_BRANCH);
|
||||
match branch {
|
||||
Some(branch) => table.checkout_branch(&branch, self.version).await,
|
||||
None => {
|
||||
if let Some(version) = self.version {
|
||||
table.checkout(version).await?;
|
||||
}
|
||||
Ok(table)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -740,6 +740,64 @@ mod tests {
|
||||
assert!(table_names.contains(&"test_table".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_branch_query_under_pushdown_stays_local() {
|
||||
// With QueryTable pushdown enabled, a query on the main branch routes to
|
||||
// the namespace server, but a branch handle must run locally: the
|
||||
// server-side request carries no branch and would return main's rows.
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let root_path = tmp_dir.path().to_str().unwrap().to_string();
|
||||
|
||||
let mut properties = HashMap::new();
|
||||
properties.insert("root".to_string(), root_path);
|
||||
|
||||
let conn = connect_namespace("dir", properties)
|
||||
.pushdown_operation(NamespaceClientPushdownOperation::QueryTable)
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to connect to namespace");
|
||||
|
||||
conn.create_namespace(CreateNamespaceRequest {
|
||||
id: Some(vec!["test_ns".into()]),
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.expect("Failed to create namespace");
|
||||
|
||||
// main has 5 rows
|
||||
let table = conn
|
||||
.create_table("ref_test", create_test_data())
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to create table");
|
||||
let main_version = table.version().await.unwrap();
|
||||
|
||||
// fork a branch off main, then add 5 more rows so it differs from main
|
||||
let branch = table
|
||||
.create_branch("exp", main_version)
|
||||
.await
|
||||
.expect("Failed to create branch");
|
||||
branch
|
||||
.add(create_test_data())
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to append to branch");
|
||||
|
||||
// the branch query must run locally and see the branch's 10 rows --
|
||||
// not get routed to the server (which carries no branch) and see main's 5
|
||||
let results = branch
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.expect("Failed to query branch")
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.expect("Failed to collect results");
|
||||
let count: usize = results.iter().map(|b| b.num_rows()).sum();
|
||||
assert_eq!(count, 10);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_namespace_describe_table() {
|
||||
// Setup: Create a temporary directory for the namespace
|
||||
|
||||
@@ -203,11 +203,11 @@ impl Shuffler {
|
||||
|
||||
// Finish writing files
|
||||
for (file_idx, mut writer) in file_writers.into_iter().enumerate() {
|
||||
let num_written = writer.finish().await?;
|
||||
let write_summary = writer.finish().await?;
|
||||
log::debug!(
|
||||
"Shuffle job {}: wrote {} rows to file {}",
|
||||
self.id,
|
||||
num_written,
|
||||
write_summary.num_rows,
|
||||
file_idx
|
||||
);
|
||||
}
|
||||
|
||||
@@ -372,7 +372,6 @@ pub(crate) struct IndexMetadata {
|
||||
pub metric_type: Option<DistanceType>,
|
||||
// Sometimes the index type is provided at this level.
|
||||
pub index_type: Option<IndexType>,
|
||||
pub loss: Option<f64>,
|
||||
}
|
||||
|
||||
// This struct is used to deserialize the JSON data returned from the Lance API
|
||||
@@ -404,6 +403,4 @@ pub struct IndexStatistics {
|
||||
pub distance_type: Option<DistanceType>,
|
||||
/// The number of parts this index is split into.
|
||||
pub num_indices: Option<u32>,
|
||||
/// The loss value used by the index.
|
||||
pub loss: Option<f64>,
|
||||
}
|
||||
|
||||
@@ -983,6 +983,49 @@ mod tests {
|
||||
assert_eq!(table.name(), "table1");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open_table_branch_and_version() {
|
||||
// Remote supports version time-travel but not branches. A version-only
|
||||
// open (or one on the default "main" branch) must succeed; a non-main
|
||||
// branch must be rejected, with or without a version.
|
||||
let conn = Connection::new_with_handler(|request| {
|
||||
assert_eq!(request.url().path(), "/v1/table/t/describe/");
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(
|
||||
r#"{"table": "t", "version": 2, "schema": {"fields": [
|
||||
{"name": "a", "type": { "type": "int32" }, "nullable": false}
|
||||
]}}"#,
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
// version-only: allowed (open + checkout(version) both round-trip)
|
||||
conn.open_table("t").version(2).execute().await.unwrap();
|
||||
|
||||
// "main" is the default branch, so it counts as no branch
|
||||
conn.open_table("t")
|
||||
.branch("main")
|
||||
.version(2)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// a non-main branch is rejected, with or without a version
|
||||
assert!(matches!(
|
||||
conn.open_table("t").branch("exp").execute().await,
|
||||
Err(Error::NotSupported { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
conn.open_table("t")
|
||||
.branch("exp")
|
||||
.version(2)
|
||||
.execute()
|
||||
.await,
|
||||
Err(Error::NotSupported { .. })
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_open_table_not_found() {
|
||||
let conn = Connection::new_with_handler(|_| {
|
||||
|
||||
@@ -1384,6 +1384,38 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
.map_err(unwrap_shared_error)
|
||||
}
|
||||
|
||||
async fn create_branch(
|
||||
&self,
|
||||
_name: &str,
|
||||
_from: lance::dataset::refs::Ref,
|
||||
) -> Result<Arc<dyn BaseTable>> {
|
||||
Err(Error::NotSupported {
|
||||
message: "branching is not yet supported on remote tables".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn checkout_branch(&self, _name: &str) -> Result<Arc<dyn BaseTable>> {
|
||||
Err(Error::NotSupported {
|
||||
message: "branching is not yet supported on remote tables".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn list_branches(&self) -> Result<HashMap<String, lance::dataset::refs::BranchContents>> {
|
||||
Err(Error::NotSupported {
|
||||
message: "branching is not yet supported on remote tables".into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn delete_branch(&self, _name: &str) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "branching is not yet supported on remote tables".into(),
|
||||
})
|
||||
}
|
||||
|
||||
fn current_branch(&self) -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
async fn count_rows(&self, filter: Option<Filter>) -> Result<usize> {
|
||||
let mut request = self.post_read(&format!("/v1/table/{}/count_rows/", self.identifier));
|
||||
|
||||
@@ -1827,16 +1859,57 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
})
|
||||
}
|
||||
|
||||
async fn set_lsm_write_spec(&self, _spec: crate::table::LsmWriteSpec) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "set_lsm_write_spec is not supported on LanceDB cloud.".into(),
|
||||
})
|
||||
async fn set_lsm_write_spec(&self, spec: crate::table::LsmWriteSpec) -> Result<()> {
|
||||
use crate::table::LsmWriteSpec;
|
||||
self.check_mutable().await?;
|
||||
|
||||
// Map the spec onto the server's request DTO. `sharding` is internally
|
||||
// tagged on `mode` to mirror sophon's `Sharding` enum; `maintained_indexes`
|
||||
// and `writer_config_defaults` are sent verbatim (an empty list means "no
|
||||
// maintained indexes", not "default to all").
|
||||
let sharding = match &spec {
|
||||
LsmWriteSpec::Bucket {
|
||||
column,
|
||||
num_buckets,
|
||||
..
|
||||
} => serde_json::json!({
|
||||
"mode": "bucket",
|
||||
"column": column,
|
||||
"num_buckets": num_buckets,
|
||||
}),
|
||||
LsmWriteSpec::Identity { column, .. } => serde_json::json!({
|
||||
"mode": "identity",
|
||||
"column": column,
|
||||
}),
|
||||
LsmWriteSpec::Unsharded { .. } => serde_json::json!({ "mode": "unsharded" }),
|
||||
};
|
||||
let body = serde_json::json!({
|
||||
"sharding": sharding,
|
||||
"maintained_indexes": spec.maintained_indexes(),
|
||||
"writer_config_defaults": spec.writer_config_defaults(),
|
||||
});
|
||||
|
||||
let request = self
|
||||
.client
|
||||
.post(&format!(
|
||||
"/v1/table/{}/set_lsm_write_spec/",
|
||||
self.identifier
|
||||
))
|
||||
.json(&body);
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn unset_lsm_write_spec(&self) -> Result<()> {
|
||||
Err(Error::NotSupported {
|
||||
message: "unset_lsm_write_spec is not supported on LanceDB cloud.".into(),
|
||||
})
|
||||
self.check_mutable().await?;
|
||||
let request = self.client.post(&format!(
|
||||
"/v1/table/{}/unset_lsm_write_spec/",
|
||||
self.identifier
|
||||
));
|
||||
let (request_id, response) = self.send(request, true).await?;
|
||||
self.check_table_response(&request_id, response).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn tags(&self) -> Result<Box<dyn Tags + '_>> {
|
||||
@@ -4080,7 +4153,6 @@ mod tests {
|
||||
index_type: IndexType::IvfPq,
|
||||
distance_type: Some(DistanceType::L2),
|
||||
num_indices: None,
|
||||
loss: None,
|
||||
};
|
||||
assert_eq!(indices, expected);
|
||||
|
||||
@@ -4428,6 +4500,91 @@ mod tests {
|
||||
assert!(matches!(e, Error::IndexNotFound { .. }));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_set_lsm_write_spec_unsharded() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/set_lsm_write_spec/"
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
assert_eq!(body["sharding"], serde_json::json!({ "mode": "unsharded" }));
|
||||
assert_eq!(body["maintained_indexes"], serde_json::json!(["id_idx"]));
|
||||
assert_eq!(
|
||||
body["writer_config_defaults"],
|
||||
serde_json::json!({ "max_memtable_rows": "1000" })
|
||||
);
|
||||
http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"maintained_indexes":["id_idx"]}"#)
|
||||
.unwrap()
|
||||
});
|
||||
let spec = crate::table::LsmWriteSpec::unsharded()
|
||||
.with_maintained_indexes(["id_idx"])
|
||||
.with_writer_config_defaults([("max_memtable_rows", "1000")]);
|
||||
table.set_lsm_write_spec(spec).await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_set_lsm_write_spec_bucket() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/set_lsm_write_spec/"
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
assert_eq!(
|
||||
body["sharding"],
|
||||
serde_json::json!({ "mode": "bucket", "column": "id", "num_buckets": 16 })
|
||||
);
|
||||
assert_eq!(body["maintained_indexes"], serde_json::json!([]));
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table
|
||||
.set_lsm_write_spec(crate::table::LsmWriteSpec::bucket("id", 16))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_set_lsm_write_spec_identity() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/set_lsm_write_spec/"
|
||||
);
|
||||
let body = request.body().unwrap().as_bytes().unwrap();
|
||||
let body: serde_json::Value = serde_json::from_slice(body).unwrap();
|
||||
assert_eq!(
|
||||
body["sharding"],
|
||||
serde_json::json!({ "mode": "identity", "column": "tenant" })
|
||||
);
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table
|
||||
.set_lsm_write_spec(crate::table::LsmWriteSpec::identity("tenant"))
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_unset_lsm_write_spec() {
|
||||
let table = Table::new_with_handler("my_table", |request| {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.url().path(),
|
||||
"/v1/table/my_table/unset_lsm_write_spec/"
|
||||
);
|
||||
http::Response::builder().status(200).body("{}").unwrap()
|
||||
});
|
||||
table.unset_lsm_write_spec().await.unwrap();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_wait_for_index() {
|
||||
let table = _make_table_with_indices(0);
|
||||
|
||||
@@ -86,7 +86,7 @@ pub use add_data::{AddDataBuilder, AddDataMode, AddResult, NaNVectorBehavior};
|
||||
pub use chrono::Duration;
|
||||
pub use delete::DeleteResult;
|
||||
use futures::future::join_all;
|
||||
pub use lance::dataset::refs::{TagContents, Tags as LanceTags};
|
||||
pub use lance::dataset::refs::{BranchContents, Ref, TagContents, Tags as LanceTags};
|
||||
pub use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||
use lance::dataset::statistics::DatasetStatisticsExt;
|
||||
pub use lance_index::optimize::OptimizeOptions;
|
||||
@@ -625,6 +625,37 @@ pub trait BaseTable: std::fmt::Display + std::fmt::Debug + Send + Sync {
|
||||
async fn restore(&self) -> Result<()>;
|
||||
/// List the versions of the table.
|
||||
async fn list_versions(&self) -> Result<Vec<Version>>;
|
||||
/// Create a new branch from `from` and return a handle scoped to it.
|
||||
async fn create_branch(
|
||||
&self,
|
||||
name: &str,
|
||||
from: lance::dataset::refs::Ref,
|
||||
) -> Result<Arc<dyn BaseTable>>;
|
||||
/// Check out an existing branch and return a handle scoped to it.
|
||||
async fn checkout_branch(&self, name: &str) -> Result<Arc<dyn BaseTable>>;
|
||||
/// Check out an existing branch at an optional version, returning a handle.
|
||||
///
|
||||
/// `None` tracks the branch's latest; `Some(v)` pins it to that version
|
||||
/// (read-only). The default implementation composes [`Self::checkout_branch`]
|
||||
/// and [`Self::checkout`]; implementations may override it to resolve the
|
||||
/// `(branch, version)` coordinate in a single manifest read.
|
||||
async fn checkout_branch_version(
|
||||
&self,
|
||||
name: &str,
|
||||
version: Option<u64>,
|
||||
) -> Result<Arc<dyn BaseTable>> {
|
||||
let branch = self.checkout_branch(name).await?;
|
||||
if let Some(version) = version {
|
||||
branch.checkout(version).await?;
|
||||
}
|
||||
Ok(branch)
|
||||
}
|
||||
/// List the branches of the table.
|
||||
async fn list_branches(&self) -> Result<HashMap<String, BranchContents>>;
|
||||
/// Delete a branch.
|
||||
async fn delete_branch(&self, name: &str) -> Result<()>;
|
||||
/// The branch this handle is scoped to, or `None` for `main`.
|
||||
fn current_branch(&self) -> Option<String>;
|
||||
/// Get the table definition.
|
||||
async fn table_definition(&self) -> Result<TableDefinition>;
|
||||
/// Get the table URI (storage location)
|
||||
@@ -1625,6 +1656,57 @@ impl Table {
|
||||
self.inner.tags().await
|
||||
}
|
||||
|
||||
/// Create a new branch from `from` (a version, tag, or branch)
|
||||
pub async fn create_branch(
|
||||
&self,
|
||||
name: &str,
|
||||
from: impl Into<lance::dataset::refs::Ref>,
|
||||
) -> Result<Self> {
|
||||
let inner = self.inner.create_branch(name, from.into()).await?;
|
||||
Ok(Self {
|
||||
inner,
|
||||
database: self.database.clone(),
|
||||
embedding_registry: self.embedding_registry.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Check out an existing branch and return a handle scoped to it.
|
||||
///
|
||||
/// With `version` set, the returned handle is pinned to that version of the
|
||||
/// branch: a read-only, detached view (as with [`Self::checkout`]). With
|
||||
/// `version` as `None` it tracks the branch's latest and stays writable.
|
||||
///
|
||||
/// ```
|
||||
/// # use lancedb::Table;
|
||||
/// # async fn f(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let exp_at_v3 = table.checkout_branch("exp", Some(3)).await?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
pub async fn checkout_branch(&self, name: &str, version: Option<u64>) -> Result<Self> {
|
||||
let inner = self.inner.checkout_branch_version(name, version).await?;
|
||||
Ok(Self {
|
||||
inner,
|
||||
database: self.database.clone(),
|
||||
embedding_registry: self.embedding_registry.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
/// List the branches of the table.
|
||||
pub async fn list_branches(&self) -> Result<HashMap<String, BranchContents>> {
|
||||
self.inner.list_branches().await
|
||||
}
|
||||
|
||||
/// Delete a branch.
|
||||
pub async fn delete_branch(&self, name: &str) -> Result<()> {
|
||||
self.inner.delete_branch(name).await
|
||||
}
|
||||
|
||||
/// The branch this handle is scoped to, or `None` for `main`.
|
||||
pub fn current_branch(&self) -> Option<String> {
|
||||
self.inner.current_branch()
|
||||
}
|
||||
|
||||
/// Retrieve statistics on the table
|
||||
pub async fn stats(&self) -> Result<TableStatistics> {
|
||||
self.inner.stats().await
|
||||
@@ -1861,6 +1943,30 @@ impl NativeTable {
|
||||
self
|
||||
}
|
||||
|
||||
/// Build a sibling `NativeTable` with the same identity but a different
|
||||
/// (independent) dataset wrapper — used to hand out branch-scoped handles.
|
||||
fn with_dataset(&self, dataset: dataset::DatasetConsistencyWrapper) -> Self {
|
||||
Self {
|
||||
name: self.name.clone(),
|
||||
namespace: self.namespace.clone(),
|
||||
id: self.id.clone(),
|
||||
uri: self.uri.clone(),
|
||||
dataset,
|
||||
read_consistency_interval: self.read_consistency_interval,
|
||||
namespace_client: self.namespace_client.clone(),
|
||||
pushdown_operations: self.pushdown_operations.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_branch_name(name: &str, field: &str) -> Result<()> {
|
||||
if name.is_empty() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: format!("{field} must be a non-empty string"),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Opens an existing Table using a namespace client.
|
||||
///
|
||||
/// This method uses `DatasetBuilder::from_namespace` to open the table, which
|
||||
@@ -2652,6 +2758,72 @@ impl BaseTable for NativeTable {
|
||||
self.dataset.reload().await
|
||||
}
|
||||
|
||||
async fn create_branch(
|
||||
&self,
|
||||
name: &str,
|
||||
from: lance::dataset::refs::Ref,
|
||||
) -> Result<Arc<dyn BaseTable>> {
|
||||
Self::validate_branch_name(name, "branch name")?;
|
||||
if let lance::dataset::refs::Ref::Version(Some(from_branch), _) = &from {
|
||||
Self::validate_branch_name(from_branch, "from_ref")?;
|
||||
}
|
||||
let mut ds = (*self.dataset.get().await?).clone();
|
||||
let branch_ds = ds.create_branch(name, from, None).await?;
|
||||
let dataset = dataset::DatasetConsistencyWrapper::new_latest(
|
||||
branch_ds,
|
||||
self.read_consistency_interval,
|
||||
);
|
||||
Ok(Arc::new(self.with_dataset(dataset)))
|
||||
}
|
||||
|
||||
async fn checkout_branch(&self, name: &str) -> Result<Arc<dyn BaseTable>> {
|
||||
Self::validate_branch_name(name, "branch name")?;
|
||||
let branch_ds = self.dataset.get().await?.checkout_branch(name).await?;
|
||||
let dataset = dataset::DatasetConsistencyWrapper::new_latest(
|
||||
branch_ds,
|
||||
self.read_consistency_interval,
|
||||
);
|
||||
Ok(Arc::new(self.with_dataset(dataset)))
|
||||
}
|
||||
|
||||
async fn checkout_branch_version(
|
||||
&self,
|
||||
name: &str,
|
||||
version: Option<u64>,
|
||||
) -> Result<Arc<dyn BaseTable>> {
|
||||
let Some(version) = version else {
|
||||
return self.checkout_branch(name).await;
|
||||
};
|
||||
Self::validate_branch_name(name, "branch name")?;
|
||||
// Resolve (branch, version) in a single manifest read.
|
||||
let branch_ds = self
|
||||
.dataset
|
||||
.get()
|
||||
.await?
|
||||
.checkout_version((name, version))
|
||||
.await?;
|
||||
let dataset = dataset::DatasetConsistencyWrapper::new_time_travel(
|
||||
branch_ds,
|
||||
self.read_consistency_interval,
|
||||
);
|
||||
Ok(Arc::new(self.with_dataset(dataset)))
|
||||
}
|
||||
|
||||
async fn list_branches(&self) -> Result<HashMap<String, BranchContents>> {
|
||||
Ok(self.dataset.get().await?.list_branches().await?)
|
||||
}
|
||||
|
||||
async fn delete_branch(&self, name: &str) -> Result<()> {
|
||||
Self::validate_branch_name(name, "branch name")?;
|
||||
let mut ds = (*self.dataset.get().await?).clone();
|
||||
ds.delete_branch(name).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn current_branch(&self) -> Option<String> {
|
||||
self.dataset.current_branch()
|
||||
}
|
||||
|
||||
async fn list_versions(&self) -> Result<Vec<Version>> {
|
||||
Ok(self.dataset.get().await?.versions().await?)
|
||||
}
|
||||
@@ -3019,20 +3191,12 @@ impl BaseTable for NativeTable {
|
||||
.ok_or_else(|| Error::InvalidInput {
|
||||
message: "index statistics was missing index type".to_string(),
|
||||
})?;
|
||||
let loss = stats
|
||||
.indices
|
||||
.iter()
|
||||
.map(|index| index.loss.unwrap_or_default())
|
||||
.sum::<f64>();
|
||||
|
||||
let loss = first_index.loss.map(|first_loss| first_loss + loss);
|
||||
Ok(Some(IndexStatistics {
|
||||
num_indexed_rows: stats.num_indexed_rows,
|
||||
num_unindexed_rows: stats.num_unindexed_rows,
|
||||
index_type,
|
||||
distance_type: first_index.metric_type,
|
||||
num_indices: stats.num_indices,
|
||||
loss,
|
||||
}))
|
||||
}
|
||||
|
||||
@@ -3378,6 +3542,351 @@ mod tests {
|
||||
assert_eq!(table.version().await.unwrap(), 4);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branches() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let conn = ConnectBuilder::new(uri)
|
||||
.read_consistency_interval(Duration::from_secs(0))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// main: one row at v1
|
||||
let table = conn
|
||||
.create_table("my_table", some_sample_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 1);
|
||||
assert_eq!(table.current_branch(), None);
|
||||
let main_version = table.version().await.unwrap();
|
||||
|
||||
// branch off main's current version; it starts with main's data
|
||||
let branch = table.create_branch("exp", main_version).await.unwrap();
|
||||
assert_eq!(branch.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(branch.count_rows(None).await.unwrap(), 1);
|
||||
|
||||
// writes on the branch are isolated from main
|
||||
branch.add(some_sample_data()).execute().await.unwrap();
|
||||
assert_eq!(branch.count_rows(None).await.unwrap(), 2);
|
||||
assert_eq!(
|
||||
table.count_rows(None).await.unwrap(),
|
||||
1,
|
||||
"main must be untouched by branch writes"
|
||||
);
|
||||
|
||||
// the branch shows up in the listing
|
||||
let branches = table.list_branches().await.unwrap();
|
||||
assert!(branches.contains_key("exp"));
|
||||
|
||||
// checking out the branch from the main handle sees the branch's latest data
|
||||
let checked_out = table.checkout_branch("exp", None).await.unwrap();
|
||||
assert_eq!(checked_out.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(checked_out.count_rows(None).await.unwrap(), 2);
|
||||
|
||||
// open_table(...).branch(...) opens directly onto the branch
|
||||
let opened = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(opened.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(opened.count_rows(None).await.unwrap(), 2);
|
||||
|
||||
// delete removes it from the listing
|
||||
table.delete_branch("exp").await.unwrap();
|
||||
let branches = table.list_branches().await.unwrap();
|
||||
assert!(!branches.contains_key("exp"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_version_checkout() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
let conn = ConnectBuilder::new(uri)
|
||||
.read_consistency_interval(Duration::from_secs(0))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// main: a single fork-point row (i = 0)
|
||||
let table = conn
|
||||
.create_table("my_table", sample_rows(vec![0]))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let fork_point = table.version().await.unwrap();
|
||||
|
||||
// Fork "exp", then advance exp AND main independently past the fork so
|
||||
// they diverge while sharing version numbers.
|
||||
let branch = table.create_branch("exp", fork_point).await.unwrap();
|
||||
let exp_fork = branch.version().await.unwrap(); // exp's shallow-clone version
|
||||
branch.add(sample_rows(vec![1])).execute().await.unwrap(); // exp: {0, 1}
|
||||
let exp_v2 = branch.version().await.unwrap();
|
||||
branch.add(sample_rows(vec![2])).execute().await.unwrap(); // exp HEAD: {0, 1, 2}
|
||||
|
||||
// main's own commit reaches the SAME version number with different data
|
||||
table
|
||||
.add(sample_rows(vec![100, 101, 102]))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap(); // main HEAD: {0, 100, 101, 102}
|
||||
let main_v2 = table.version().await.unwrap();
|
||||
assert_eq!(
|
||||
exp_v2, main_v2,
|
||||
"branch and main must share the version number for this test to mean anything"
|
||||
);
|
||||
|
||||
// Open exp at the shared version. The data must be exp's, not main's:
|
||||
// count alone cannot prove this (main@v2 differs), so assert provenance
|
||||
// by content.
|
||||
let pinned = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.version(exp_v2)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(pinned.current_branch().as_deref(), Some("exp"));
|
||||
// isolated from exp's HEAD (3 rows) and from main@v2 (4 rows)
|
||||
assert_eq!(pinned.count_rows(None).await.unwrap(), 2);
|
||||
// exp's post-fork row is visible; main's divergent rows are not
|
||||
assert_eq!(
|
||||
pinned.count_rows(Some("i = 1".to_string())).await.unwrap(),
|
||||
1
|
||||
);
|
||||
assert_eq!(
|
||||
pinned
|
||||
.count_rows(Some("i = 100".to_string()))
|
||||
.await
|
||||
.unwrap(),
|
||||
0
|
||||
);
|
||||
|
||||
// the same coordinate is reachable directly via checkout_branch(name, version)
|
||||
let pinned_direct = table.checkout_branch("exp", Some(exp_v2)).await.unwrap();
|
||||
assert_eq!(pinned_direct.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(pinned_direct.count_rows(None).await.unwrap(), 2);
|
||||
|
||||
// the HEADs are unaffected
|
||||
let head = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(head.count_rows(None).await.unwrap(), 3);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 4);
|
||||
|
||||
// a pinned version is a detached head: writes are rejected
|
||||
assert!(pinned.add(sample_rows(vec![9])).execute().await.is_err());
|
||||
|
||||
// version-only (no branch) time-travels main itself: its fork-point
|
||||
// version holds only main's first row, and the shared version number
|
||||
// resolves to main's data, not the branch's ("opens main at the version")
|
||||
let old_main = conn
|
||||
.open_table("my_table")
|
||||
.version(fork_point)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(old_main.current_branch(), None);
|
||||
assert_eq!(old_main.count_rows(None).await.unwrap(), 1);
|
||||
let shared_on_main = conn
|
||||
.open_table("my_table")
|
||||
.version(exp_v2)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(shared_on_main.current_branch(), None);
|
||||
assert_eq!(shared_on_main.count_rows(None).await.unwrap(), 4);
|
||||
|
||||
// a nonexistent version is rejected
|
||||
assert!(
|
||||
conn.open_table("my_table")
|
||||
.version(9999)
|
||||
.execute()
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
// a nonexistent version on a branch is rejected too: this resolves on
|
||||
// the branch's path, a distinct miss from the main lookup above
|
||||
assert!(
|
||||
conn.open_table("my_table")
|
||||
.branch("exp")
|
||||
.version(9999)
|
||||
.execute()
|
||||
.await
|
||||
.is_err()
|
||||
);
|
||||
|
||||
// opening the branch at its fork point (the shallow-clone manifest)
|
||||
// shows just the cloned state: main's fork-point row
|
||||
let exp_at_fork = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.version(exp_fork)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(exp_at_fork.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(exp_at_fork.count_rows(None).await.unwrap(), 1);
|
||||
|
||||
// checkout_latest re-attaches the pinned handle to the BRANCH's HEAD
|
||||
// (writable again), not main's HEAD, and not staying pinned
|
||||
pinned.checkout_latest().await.unwrap();
|
||||
assert_eq!(pinned.current_branch().as_deref(), Some("exp"));
|
||||
assert_eq!(pinned.count_rows(None).await.unwrap(), 3); // exp HEAD, not main's 4
|
||||
pinned.add(sample_rows(vec![3])).execute().await.unwrap();
|
||||
assert_eq!(pinned.count_rows(None).await.unwrap(), 4); // writable again
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_version_two_branches() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let conn = ConnectBuilder::new(uri)
|
||||
.read_consistency_interval(Duration::from_secs(0))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table = conn
|
||||
.create_table("my_table", sample_rows(vec![0]))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let fork_point = table.version().await.unwrap();
|
||||
|
||||
// two branches off the same point, each advanced once so they reach the
|
||||
// SAME version number with divergent data
|
||||
let exp1 = table.create_branch("exp1", fork_point).await.unwrap();
|
||||
let exp2 = table.create_branch("exp2", fork_point).await.unwrap();
|
||||
exp1.add(sample_rows(vec![10])).execute().await.unwrap();
|
||||
exp2.add(sample_rows(vec![20])).execute().await.unwrap();
|
||||
let v1 = exp1.version().await.unwrap();
|
||||
let v2 = exp2.version().await.unwrap();
|
||||
assert_eq!(v1, v2, "both branches must reach the same version number");
|
||||
|
||||
// that shared version number resolves to each branch's own data
|
||||
let at1 = table.checkout_branch("exp1", Some(v1)).await.unwrap();
|
||||
assert_eq!(at1.count_rows(Some("i = 10".to_string())).await.unwrap(), 1);
|
||||
assert_eq!(at1.count_rows(Some("i = 20".to_string())).await.unwrap(), 0);
|
||||
let at2 = table.checkout_branch("exp2", Some(v2)).await.unwrap();
|
||||
assert_eq!(at2.count_rows(Some("i = 20".to_string())).await.unwrap(), 1);
|
||||
assert_eq!(at2.count_rows(Some("i = 10".to_string())).await.unwrap(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_name_validation() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", some_sample_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// every entry point rejects an empty name instead of passing it down
|
||||
assert!(matches!(
|
||||
table.create_branch("", 1u64).await,
|
||||
Err(Error::InvalidInput { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
table.checkout_branch("", None).await,
|
||||
Err(Error::InvalidInput { .. })
|
||||
));
|
||||
assert!(matches!(
|
||||
table.delete_branch("").await,
|
||||
Err(Error::InvalidInput { .. })
|
||||
));
|
||||
// an empty source branch is rejected too
|
||||
assert!(matches!(
|
||||
table
|
||||
.create_branch(
|
||||
"ok",
|
||||
lance::dataset::refs::Ref::Version(Some(String::new()), None)
|
||||
)
|
||||
.await,
|
||||
Err(Error::InvalidInput { .. })
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_handle_tracks_concurrent_writes() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
// interval = 0 so every read checks storage for new commits
|
||||
let conn = ConnectBuilder::new(uri)
|
||||
.read_consistency_interval(Duration::from_secs(0))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", some_sample_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let v1 = table.version().await.unwrap();
|
||||
|
||||
// two independent handles on the same branch
|
||||
let writer = table.create_branch("exp", v1).await.unwrap();
|
||||
let reader = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(reader.count_rows(None).await.unwrap(), 1);
|
||||
|
||||
// a concurrent write on the branch is visible to the other handle, which
|
||||
// tracks the branch's HEAD (not main's)
|
||||
writer.add(some_sample_data()).execute().await.unwrap();
|
||||
assert_eq!(reader.count_rows(None).await.unwrap(), 2);
|
||||
// main is untouched
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_branch_handle_without_consistency_interval_is_pinned() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
|
||||
// default interval (None): handles do not auto-refresh
|
||||
let conn = ConnectBuilder::new(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", some_sample_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let v1 = table.version().await.unwrap();
|
||||
|
||||
let writer = table.create_branch("exp", v1).await.unwrap();
|
||||
let reader = conn
|
||||
.open_table("my_table")
|
||||
.branch("exp")
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(reader.count_rows(None).await.unwrap(), 1);
|
||||
|
||||
// without a consistency interval the reader stays on the version it
|
||||
// opened, exactly like a main-branch handle...
|
||||
writer.add(some_sample_data()).execute().await.unwrap();
|
||||
assert_eq!(reader.count_rows(None).await.unwrap(), 1);
|
||||
|
||||
// ...until it explicitly refreshes
|
||||
reader.checkout_latest().await.unwrap();
|
||||
assert_eq!(reader.count_rows(None).await.unwrap(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_index() {
|
||||
use arrow_array::RecordBatch;
|
||||
@@ -3435,7 +3944,6 @@ mod tests {
|
||||
assert_eq!(stats.num_unindexed_rows, 0);
|
||||
assert_eq!(stats.index_type, crate::index::IndexType::IvfPq);
|
||||
assert_eq!(stats.distance_type, Some(crate::DistanceType::L2));
|
||||
assert!(stats.loss.is_some());
|
||||
|
||||
table.drop_index(index_name).await.unwrap();
|
||||
assert_eq!(table.list_indices().await.unwrap().len(), 0);
|
||||
@@ -3513,7 +4021,7 @@ mod tests {
|
||||
use lance_index::vector::VectorIndex as LanceVectorIndex;
|
||||
|
||||
let indices = native_table.load_indices().await.unwrap();
|
||||
let index_uuid = indices[0].index_uuid.clone();
|
||||
let index_uuid = uuid::Uuid::parse_str(&indices[0].index_uuid).unwrap();
|
||||
|
||||
let dataset_guard = native_table.dataset.get().await.unwrap();
|
||||
let dataset = (*dataset_guard).clone();
|
||||
@@ -3729,6 +4237,19 @@ mod tests {
|
||||
Box::new(RecordBatchIterator::new(vec![batch], schema))
|
||||
}
|
||||
|
||||
/// A single-batch reader holding the given `i` (Int32) values. Lets a test
|
||||
/// write distinguishable rows so it can assert data provenance, not row count.
|
||||
fn sample_rows(values: Vec<i32>) -> Box<dyn arrow_array::RecordBatchReader + Send> {
|
||||
let batch = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, false)])),
|
||||
vec![Arc::new(Int32Array::from(values))],
|
||||
)
|
||||
.unwrap();
|
||||
let schema = batch.schema().clone();
|
||||
|
||||
Box::new(RecordBatchIterator::new(vec![Ok(batch)], schema))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_scalar_index() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
@@ -76,6 +76,23 @@ impl DatasetConsistencyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new wrapper pinned to the dataset's current version.
|
||||
///
|
||||
/// `dataset` must already be checked out at the desired version; this pins
|
||||
/// to `dataset.version()` without re-resolving. The wrapper is read-only
|
||||
/// (time-travel) until [`as_latest`](Self::as_latest) re-attaches it to the
|
||||
/// latest version.
|
||||
pub fn new_time_travel(dataset: Dataset, read_consistency_interval: Option<Duration>) -> Self {
|
||||
let version = dataset.version().version;
|
||||
let wrapper = Self::new_latest(dataset, read_consistency_interval);
|
||||
wrapper
|
||||
.state
|
||||
.lock()
|
||||
.unwrap_or_else(|e| e.into_inner())
|
||||
.pinned_version = Some(version);
|
||||
wrapper
|
||||
}
|
||||
|
||||
/// The MemWAL `ShardWriter` cache co-located with this dataset.
|
||||
pub(crate) fn shard_writer(&self) -> &Arc<ShardWriterCache> {
|
||||
&self.shard_writer
|
||||
@@ -144,8 +161,19 @@ impl DatasetConsistencyWrapper {
|
||||
}
|
||||
|
||||
/// Checkout a branch and track its HEAD for new versions.
|
||||
pub async fn as_branch(&self, _branch: impl Into<String>) -> Result<()> {
|
||||
todo!("Branch support not yet implemented")
|
||||
pub async fn as_branch(&self, branch: impl Into<String>) -> Result<()> {
|
||||
let branch = branch.into();
|
||||
let dataset = { self.state.lock()?.dataset.clone() };
|
||||
let new_dataset = dataset.checkout_branch(&branch).await?;
|
||||
|
||||
let mut state = self.state.lock()?;
|
||||
state.dataset = Arc::new(new_dataset);
|
||||
state.pinned_version = None;
|
||||
drop(state);
|
||||
if let ConsistencyMode::Eventual(bg_cache) = &self.consistency {
|
||||
bg_cache.invalidate();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check that the dataset is in a mutable mode (Latest).
|
||||
@@ -161,6 +189,17 @@ impl DatasetConsistencyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
/// The branch this wrapper is currently tracking, or `None` for `main`.
|
||||
pub fn current_branch(&self) -> Option<String> {
|
||||
self.state
|
||||
.lock()
|
||||
.unwrap_or_else(|e| e.into_inner())
|
||||
.dataset
|
||||
.manifest()
|
||||
.branch
|
||||
.clone()
|
||||
}
|
||||
|
||||
/// Returns the version, if in time travel mode, or None otherwise.
|
||||
pub fn time_travel_version(&self) -> Option<u64> {
|
||||
self.state
|
||||
@@ -737,4 +776,31 @@ mod tests {
|
||||
let result = wrapper.reload().await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_as_branch_is_writable_and_tracked() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let uri = dir.path().to_str().unwrap();
|
||||
|
||||
// v1 on main, then shallow-clone a branch off it
|
||||
let mut ds = create_test_dataset(uri).await;
|
||||
let v1 = ds.version().version;
|
||||
ds.create_branch("exp", v1, None).await.unwrap();
|
||||
|
||||
// wrapper starts on main: latest, writable, no branch
|
||||
let wrapper = DatasetConsistencyWrapper::new_latest(ds, None);
|
||||
assert_eq!(wrapper.current_branch(), None);
|
||||
|
||||
// switch to the branch
|
||||
wrapper.as_branch("exp").await.unwrap();
|
||||
assert_eq!(wrapper.current_branch().as_deref(), Some("exp"));
|
||||
|
||||
// a branch is writable (unlike a pinned/time-travel checkout)
|
||||
wrapper.ensure_mutable().unwrap();
|
||||
assert_eq!(wrapper.time_travel_version(), None);
|
||||
|
||||
// get() returns the branch dataset
|
||||
let on_branch = wrapper.get().await.unwrap();
|
||||
assert_eq!(on_branch.manifest().branch.as_deref(), Some("exp"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,11 +41,14 @@ pub async fn execute_query(
|
||||
query: &AnyQuery,
|
||||
options: QueryExecutionOptions,
|
||||
) -> Result<DatasetRecordBatchStream> {
|
||||
// If QueryTable pushdown is enabled and namespace client is configured, use server-side query execution
|
||||
// QueryTable pushdown runs the query server-side, but only on the main
|
||||
// branch: the namespace request carries no branch yet, so a branch handle
|
||||
// must fall through to local execution.
|
||||
if table
|
||||
.pushdown_operations
|
||||
.contains(&NamespaceClientPushdownOperation::QueryTable)
|
||||
&& let Some(ref namespace_client) = table.namespace_client
|
||||
&& table.dataset.current_branch().is_none()
|
||||
{
|
||||
return execute_namespace_query(table, namespace_client.clone(), query, options).await;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user