mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-29 20:10:39 +00:00
Compare commits
25 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6de8f42dcd | ||
|
|
5c3bd68e58 | ||
|
|
4be85444f0 | ||
|
|
68c07f333f | ||
|
|
814a379e08 | ||
|
|
f31561c5bb | ||
|
|
e0c5ceac03 | ||
|
|
e93bb3355a | ||
|
|
b75991eb07 | ||
|
|
97ca9bb943 | ||
|
|
fa1b04f341 | ||
|
|
367abe99d2 | ||
|
|
52ce2c995c | ||
|
|
e71a00998c | ||
|
|
39a2ac0a1c | ||
|
|
bc7b344fa4 | ||
|
|
f91d2f5fec | ||
|
|
cf81b6419f | ||
|
|
0498ac1f2f | ||
|
|
aeb1c3ee6a | ||
|
|
f9ae46c0e7 | ||
|
|
84bf022fb1 | ||
|
|
310967eceb | ||
|
|
154dbeee2a | ||
|
|
c9c08ac8b9 |
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.27.0-beta.1"
|
current_version = "0.27.0-beta.4"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ runs:
|
|||||||
if: ${{ inputs.arm-build == 'false' }}
|
if: ${{ inputs.arm-build == 'false' }}
|
||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
|
maturin-version: "1.12.4"
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
@@ -44,6 +45,7 @@ runs:
|
|||||||
if: ${{ inputs.arm-build == 'true' }}
|
if: ${{ inputs.arm-build == 'true' }}
|
||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
|
maturin-version: "1.12.4"
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
|
|||||||
1
.github/workflows/build_mac_wheel/action.yml
vendored
1
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -20,6 +20,7 @@ runs:
|
|||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
|
maturin-version: "1.12.4"
|
||||||
# TODO: pass through interpreter
|
# TODO: pass through interpreter
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ runs:
|
|||||||
uses: PyO3/maturin-action@v1
|
uses: PyO3/maturin-action@v1
|
||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
|
maturin-version: "1.12.4"
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
working-directory: python
|
working-directory: python
|
||||||
|
|||||||
3
.github/workflows/npm-publish.yml
vendored
3
.github/workflows/npm-publish.yml
vendored
@@ -356,7 +356,8 @@ jobs:
|
|||||||
if [[ $DRY_RUN == "true" ]]; then
|
if [[ $DRY_RUN == "true" ]]; then
|
||||||
ARGS="$ARGS --dry-run"
|
ARGS="$ARGS --dry-run"
|
||||||
fi
|
fi
|
||||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
VERSION=$(node -p "require('./package.json').version")
|
||||||
|
if [[ $VERSION == *-* ]]; then
|
||||||
ARGS="$ARGS --tag preview"
|
ARGS="$ARGS --tag preview"
|
||||||
fi
|
fi
|
||||||
npm publish $ARGS
|
npm publish $ARGS
|
||||||
|
|||||||
4
.github/workflows/python.yml
vendored
4
.github/workflows/python.yml
vendored
@@ -10,6 +10,10 @@ on:
|
|||||||
- python/**
|
- python/**
|
||||||
- rust/**
|
- rust/**
|
||||||
- .github/workflows/python.yml
|
- .github/workflows/python.yml
|
||||||
|
- .github/workflows/build_linux_wheel/**
|
||||||
|
- .github/workflows/build_mac_wheel/**
|
||||||
|
- .github/workflows/build_windows_wheel/**
|
||||||
|
- .github/workflows/run_tests/**
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||||
|
|||||||
4
.github/workflows/rust.yml
vendored
4
.github/workflows/rust.yml
vendored
@@ -100,7 +100,9 @@ jobs:
|
|||||||
lfs: true
|
lfs: true
|
||||||
- uses: Swatinem/rust-cache@v2
|
- uses: Swatinem/rust-cache@v2
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
run: |
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y protobuf-compiler libssl-dev
|
||||||
- uses: rui314/setup-mold@v1
|
- uses: rui314/setup-mold@v1
|
||||||
- name: Make Swap
|
- name: Make Swap
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
525
Cargo.lock
generated
525
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
46
Cargo.toml
46
Cargo.toml
@@ -5,7 +5,7 @@ exclude = ["python"]
|
|||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
edition = "2021"
|
edition = "2024"
|
||||||
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
repository = "https://github.com/lancedb/lancedb"
|
repository = "https://github.com/lancedb/lancedb"
|
||||||
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.91.0"
|
rust-version = "1.91.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-core = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-core = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datagen = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datagen = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-file = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-file = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-io = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-io = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-index = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-index = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-linalg = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-linalg = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace-impls = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace-impls = { "version" = "=3.0.0-rc.3", default-features = false, "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-table = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-table = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-testing = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-testing = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datafusion = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datafusion = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-encoding = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-encoding = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-arrow = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
|
lance-arrow = { "version" = "=3.0.0-rc.3", "tag" = "v3.0.0-rc.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
ahash = "0.8"
|
ahash = "0.8"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "57.2", optional = false }
|
arrow = { version = "57.2", optional = false }
|
||||||
@@ -40,13 +40,15 @@ arrow-schema = "57.2"
|
|||||||
arrow-select = "57.2"
|
arrow-select = "57.2"
|
||||||
arrow-cast = "57.2"
|
arrow-cast = "57.2"
|
||||||
async-trait = "0"
|
async-trait = "0"
|
||||||
datafusion = { version = "51.0", default-features = false }
|
datafusion = { version = "52.1", default-features = false }
|
||||||
datafusion-catalog = "51.0"
|
datafusion-catalog = "52.1"
|
||||||
datafusion-common = { version = "51.0", default-features = false }
|
datafusion-common = { version = "52.1", default-features = false }
|
||||||
datafusion-execution = "51.0"
|
datafusion-execution = "52.1"
|
||||||
datafusion-expr = "51.0"
|
datafusion-expr = "52.1"
|
||||||
datafusion-physical-plan = "51.0"
|
datafusion-functions = "52.1"
|
||||||
datafusion-physical-expr = "51.0"
|
datafusion-physical-plan = "52.1"
|
||||||
|
datafusion-physical-expr = "52.1"
|
||||||
|
datafusion-sql = "52.1"
|
||||||
env_logger = "0.11"
|
env_logger = "0.11"
|
||||||
half = { "version" = "2.7.1", default-features = false, features = [
|
half = { "version" = "2.7.1", default-features = false, features = [
|
||||||
"num-traits",
|
"num-traits",
|
||||||
|
|||||||
@@ -52,14 +52,21 @@ plugins:
|
|||||||
options:
|
options:
|
||||||
docstring_style: numpy
|
docstring_style: numpy
|
||||||
heading_level: 3
|
heading_level: 3
|
||||||
show_source: true
|
|
||||||
show_symbol_type_in_heading: true
|
|
||||||
show_signature_annotations: true
|
show_signature_annotations: true
|
||||||
show_root_heading: true
|
show_root_heading: true
|
||||||
|
show_docstring_examples: true
|
||||||
|
show_docstring_attributes: false
|
||||||
|
show_docstring_other_parameters: true
|
||||||
|
show_symbol_type_heading: true
|
||||||
|
show_labels: false
|
||||||
|
show_if_no_docstring: true
|
||||||
|
show_source: false
|
||||||
members_order: source
|
members_order: source
|
||||||
docstring_section_style: list
|
docstring_section_style: list
|
||||||
signature_crossrefs: true
|
signature_crossrefs: true
|
||||||
separate_signature: true
|
separate_signature: true
|
||||||
|
filters:
|
||||||
|
- "!^_"
|
||||||
import:
|
import:
|
||||||
# for cross references
|
# for cross references
|
||||||
- https://arrow.apache.org/docs/objects.inv
|
- https://arrow.apache.org/docs/objects.inv
|
||||||
@@ -113,7 +120,7 @@ markdown_extensions:
|
|||||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||||
- markdown.extensions.toc:
|
- markdown.extensions.toc:
|
||||||
toc_depth: 3
|
toc_depth: 4
|
||||||
permalink: true
|
permalink: true
|
||||||
permalink_title: Anchor link to this section
|
permalink_title: Anchor link to this section
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-core</artifactId>
|
<artifactId>lancedb-core</artifactId>
|
||||||
<version>0.27.0-beta.1</version>
|
<version>0.27.0-beta.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,14 @@
|
|||||||
|
|
||||||
## Properties
|
## Properties
|
||||||
|
|
||||||
|
### numDeletedRows
|
||||||
|
|
||||||
|
```ts
|
||||||
|
numDeletedRows: number;
|
||||||
|
```
|
||||||
|
|
||||||
|
***
|
||||||
|
|
||||||
### version
|
### version
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# LanceDB Java SDK
|
# LanceDB Java Enterprise Client
|
||||||
|
|
||||||
## Configuration and Initialization
|
## Configuration and Initialization
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.27.0-beta.1</version>
|
<version>0.27.0-beta.4</version>
|
||||||
<relativePath>../pom.xml</relativePath>
|
<relativePath>../pom.xml</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
<groupId>com.lancedb</groupId>
|
<groupId>com.lancedb</groupId>
|
||||||
<artifactId>lancedb-parent</artifactId>
|
<artifactId>lancedb-parent</artifactId>
|
||||||
<version>0.27.0-beta.1</version>
|
<version>0.27.0-beta.4</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<name>${project.artifactId}</name>
|
<name>${project.artifactId}</name>
|
||||||
<description>LanceDB Java SDK Parent POM</description>
|
<description>LanceDB Java SDK Parent POM</description>
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-nodejs"
|
name = "lancedb-nodejs"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
version = "0.27.0-beta.1"
|
version = "0.27.0-beta.4"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
description.workspace = true
|
description.workspace = true
|
||||||
repository.workspace = true
|
repository.workspace = true
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
tableFromIPC,
|
tableFromIPC,
|
||||||
DataType,
|
DataType,
|
||||||
Dictionary,
|
Dictionary,
|
||||||
|
Uint8: ArrowUint8,
|
||||||
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
// biome-ignore lint/suspicious/noExplicitAny: <explanation>
|
||||||
} = <any>arrow;
|
} = <any>arrow;
|
||||||
type Schema = ApacheArrow["Schema"];
|
type Schema = ApacheArrow["Schema"];
|
||||||
@@ -362,6 +363,38 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
).toEqual(new Float64().toString());
|
).toEqual(new Float64().toString());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("will infer FixedSizeList<Float32> from Float32Array values", async function () {
|
||||||
|
const table = makeArrowTable([
|
||||||
|
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||||
|
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
const vectorType = table.getChild("vector")?.type;
|
||||||
|
expect(vectorType.listSize).toBe(3);
|
||||||
|
expect(vectorType.children[0].type.toString()).toEqual(
|
||||||
|
new Float32().toString(),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("will infer FixedSizeList<Uint8> from Uint8Array values", async function () {
|
||||||
|
const table = makeArrowTable([
|
||||||
|
{ id: "a", vector: new Uint8Array([1, 2, 3]) },
|
||||||
|
{ id: "b", vector: new Uint8Array([4, 5, 6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe(
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
const vectorType = table.getChild("vector")?.type;
|
||||||
|
expect(vectorType.listSize).toBe(3);
|
||||||
|
expect(vectorType.children[0].type.toString()).toEqual(
|
||||||
|
new ArrowUint8().toString(),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it("will use dictionary encoded strings if asked", async function () {
|
it("will use dictionary encoded strings if asked", async function () {
|
||||||
const table = makeArrowTable([{ str: "hello" }]);
|
const table = makeArrowTable([{ str: "hello" }]);
|
||||||
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
|
expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true);
|
||||||
|
|||||||
@@ -1697,6 +1697,65 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
|||||||
expect(results2[0].text).toBe(data[1].text);
|
expect(results2[0].text).toBe(data[1].text);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("full text search fast search", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const data = [{ text: "hello world", vector: [0.1, 0.2, 0.3], id: 1 }];
|
||||||
|
const table = await db.createTable("test", data);
|
||||||
|
await table.createIndex("text", {
|
||||||
|
config: Index.fts(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Insert unindexed data after creating the index.
|
||||||
|
await table.add([{ text: "xyz", vector: [0.4, 0.5, 0.6], id: 2 }]);
|
||||||
|
|
||||||
|
const withFlatSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(withFlatSearch.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const fastSearchResults = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(fastSearchResults.length).toBe(0);
|
||||||
|
|
||||||
|
const nearestToTextFastSearch = await table
|
||||||
|
.query()
|
||||||
|
.nearestToText("xyz")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(nearestToTextFastSearch.length).toBe(0);
|
||||||
|
|
||||||
|
// fastSearch should be chainable with other methods.
|
||||||
|
const chainedFastSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.select(["text"])
|
||||||
|
.limit(5)
|
||||||
|
.toArray();
|
||||||
|
expect(chainedFastSearch.length).toBe(0);
|
||||||
|
|
||||||
|
await table.optimize();
|
||||||
|
|
||||||
|
const indexedFastSearch = await table
|
||||||
|
.search("xyz", "fts")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(indexedFastSearch.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const indexedNearestToTextFastSearch = await table
|
||||||
|
.query()
|
||||||
|
.nearestToText("xyz")
|
||||||
|
.fastSearch()
|
||||||
|
.limit(10)
|
||||||
|
.toArray();
|
||||||
|
expect(indexedNearestToTextFastSearch.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
test("prewarm full text search index", async () => {
|
test("prewarm full text search index", async () => {
|
||||||
const db = await connect(tmpDir.name);
|
const db = await connect(tmpDir.name);
|
||||||
const data = [
|
const data = [
|
||||||
@@ -2145,3 +2204,36 @@ describe("when creating an empty table", () => {
|
|||||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Ensure we can create float32 arrays without using Arrow
|
||||||
|
// by utilizing native JS TypedArray support
|
||||||
|
//
|
||||||
|
// https://github.com/lancedb/lancedb/issues/3115
|
||||||
|
describe("when creating a table with Float32Array vectors", () => {
|
||||||
|
let tmpDir: tmp.DirResult;
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||||
|
});
|
||||||
|
afterEach(() => {
|
||||||
|
tmpDir.removeCallback();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should persist Float32Array as FixedSizeList<Float32> in the LanceDB schema", async () => {
|
||||||
|
const db = await connect(tmpDir.name);
|
||||||
|
const table = await db.createTable("test", [
|
||||||
|
{ id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) },
|
||||||
|
{ id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const schema = await table.schema();
|
||||||
|
const vectorField = schema.fields.find((f) => f.name === "vector");
|
||||||
|
expect(vectorField).toBeDefined();
|
||||||
|
expect(vectorField!.type).toBeInstanceOf(FixedSizeList);
|
||||||
|
|
||||||
|
const fsl = vectorField!.type as FixedSizeList;
|
||||||
|
expect(fsl.listSize).toBe(3);
|
||||||
|
expect(fsl.children[0].type.typeId).toBe(Type.Float);
|
||||||
|
// precision: HALF=0, SINGLE=1, DOUBLE=2
|
||||||
|
expect((fsl.children[0].type as Float32).precision).toBe(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ import {
|
|||||||
Float32,
|
Float32,
|
||||||
Float64,
|
Float64,
|
||||||
Int,
|
Int,
|
||||||
|
Int8,
|
||||||
|
Int16,
|
||||||
Int32,
|
Int32,
|
||||||
Int64,
|
Int64,
|
||||||
LargeBinary,
|
LargeBinary,
|
||||||
@@ -35,6 +37,8 @@ import {
|
|||||||
Timestamp,
|
Timestamp,
|
||||||
Type,
|
Type,
|
||||||
Uint8,
|
Uint8,
|
||||||
|
Uint16,
|
||||||
|
Uint32,
|
||||||
Utf8,
|
Utf8,
|
||||||
Vector,
|
Vector,
|
||||||
makeVector as arrowMakeVector,
|
makeVector as arrowMakeVector,
|
||||||
@@ -529,7 +533,8 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
|||||||
!(value instanceof Date) &&
|
!(value instanceof Date) &&
|
||||||
!(value instanceof Set) &&
|
!(value instanceof Set) &&
|
||||||
!(value instanceof Map) &&
|
!(value instanceof Map) &&
|
||||||
!(value instanceof Buffer)
|
!(value instanceof Buffer) &&
|
||||||
|
!ArrayBuffer.isView(value)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -588,6 +593,13 @@ function inferType(
|
|||||||
return new Bool();
|
return new Bool();
|
||||||
} else if (value instanceof Buffer) {
|
} else if (value instanceof Buffer) {
|
||||||
return new Binary();
|
return new Binary();
|
||||||
|
} else if (ArrayBuffer.isView(value) && !(value instanceof DataView)) {
|
||||||
|
const info = typedArrayToArrowType(value);
|
||||||
|
if (info !== undefined) {
|
||||||
|
const child = new Field("item", info.elementType, true);
|
||||||
|
return new FixedSizeList(info.length, child);
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
} else if (Array.isArray(value)) {
|
} else if (Array.isArray(value)) {
|
||||||
if (value.length === 0) {
|
if (value.length === 0) {
|
||||||
return undefined; // Without any values we can't infer the type
|
return undefined; // Without any values we can't infer the type
|
||||||
@@ -746,6 +758,32 @@ function makeListVector(lists: unknown[][]): Vector<unknown> {
|
|||||||
return listBuilder.finish().toVector();
|
return listBuilder.finish().toVector();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map a JS TypedArray instance to the corresponding Arrow element DataType
|
||||||
|
* and its length. Returns undefined if the value is not a recognized TypedArray.
|
||||||
|
*/
|
||||||
|
function typedArrayToArrowType(
|
||||||
|
value: ArrayBufferView,
|
||||||
|
): { elementType: DataType; length: number } | undefined {
|
||||||
|
if (value instanceof Float32Array)
|
||||||
|
return { elementType: new Float32(), length: value.length };
|
||||||
|
if (value instanceof Float64Array)
|
||||||
|
return { elementType: new Float64(), length: value.length };
|
||||||
|
if (value instanceof Uint8Array)
|
||||||
|
return { elementType: new Uint8(), length: value.length };
|
||||||
|
if (value instanceof Uint16Array)
|
||||||
|
return { elementType: new Uint16(), length: value.length };
|
||||||
|
if (value instanceof Uint32Array)
|
||||||
|
return { elementType: new Uint32(), length: value.length };
|
||||||
|
if (value instanceof Int8Array)
|
||||||
|
return { elementType: new Int8(), length: value.length };
|
||||||
|
if (value instanceof Int16Array)
|
||||||
|
return { elementType: new Int16(), length: value.length };
|
||||||
|
if (value instanceof Int32Array)
|
||||||
|
return { elementType: new Int32(), length: value.length };
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
/** Helper function to convert an Array of JS values to an Arrow Vector */
|
/** Helper function to convert an Array of JS values to an Arrow Vector */
|
||||||
function makeVector(
|
function makeVector(
|
||||||
values: unknown[],
|
values: unknown[],
|
||||||
@@ -814,6 +852,16 @@ function makeVector(
|
|||||||
"makeVector cannot infer the type if all values are null or undefined",
|
"makeVector cannot infer the type if all values are null or undefined",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if (ArrayBuffer.isView(sampleValue) && !(sampleValue instanceof DataView)) {
|
||||||
|
const info = typedArrayToArrowType(sampleValue);
|
||||||
|
if (info !== undefined) {
|
||||||
|
const fslType = new FixedSizeList(
|
||||||
|
info.length,
|
||||||
|
new Field("item", info.elementType, true),
|
||||||
|
);
|
||||||
|
return vectorFromArray(values, fslType);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (Array.isArray(sampleValue)) {
|
if (Array.isArray(sampleValue)) {
|
||||||
// Default Arrow inference doesn't handle list types
|
// Default Arrow inference doesn't handle list types
|
||||||
return makeListVector(values as unknown[][]);
|
return makeListVector(values as unknown[][]);
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-darwin-arm64",
|
"name": "@lancedb/lancedb-darwin-arm64",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["darwin"],
|
"os": ["darwin"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.darwin-arm64.node",
|
"main": "lancedb.darwin-arm64.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-gnu.node",
|
"main": "lancedb.linux-arm64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["arm64"],
|
"cpu": ["arm64"],
|
||||||
"main": "lancedb.linux-arm64-musl.node",
|
"main": "lancedb.linux-arm64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-gnu.node",
|
"main": "lancedb.linux-x64-gnu.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["linux"],
|
"os": ["linux"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.linux-x64-musl.node",
|
"main": "lancedb.linux-x64-musl.node",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": [
|
"os": [
|
||||||
"win32"
|
"win32"
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"os": ["win32"],
|
"os": ["win32"],
|
||||||
"cpu": ["x64"],
|
"cpu": ["x64"],
|
||||||
"main": "lancedb.win32-x64-msvc.node",
|
"main": "lancedb.win32-x64-msvc.node",
|
||||||
|
|||||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
|||||||
{
|
{
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "@lancedb/lancedb",
|
"name": "@lancedb/lancedb",
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"cpu": [
|
"cpu": [
|
||||||
"x64",
|
"x64",
|
||||||
"arm64"
|
"arm64"
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
"ann"
|
"ann"
|
||||||
],
|
],
|
||||||
"private": false,
|
"private": false,
|
||||||
"version": "0.27.0-beta.1",
|
"version": "0.27.0-beta.4",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
|
|||||||
@@ -8,10 +8,10 @@ use lancedb::database::{CreateTableMode, Database};
|
|||||||
use napi::bindgen_prelude::*;
|
use napi::bindgen_prelude::*;
|
||||||
use napi_derive::*;
|
use napi_derive::*;
|
||||||
|
|
||||||
|
use crate::ConnectionOptions;
|
||||||
use crate::error::NapiErrorExt;
|
use crate::error::NapiErrorExt;
|
||||||
use crate::header::JsHeaderProvider;
|
use crate::header::JsHeaderProvider;
|
||||||
use crate::table::Table;
|
use crate::table::Table;
|
||||||
use crate::ConnectionOptions;
|
|
||||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
||||||
|
|
||||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||||
|
|||||||
@@ -3,12 +3,12 @@
|
|||||||
|
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
|
|
||||||
|
use lancedb::index::Index as LanceDbIndex;
|
||||||
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
|
use lancedb::index::scalar::{BTreeIndexBuilder, FtsIndexBuilder};
|
||||||
use lancedb::index::vector::{
|
use lancedb::index::vector::{
|
||||||
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
|
IvfFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder,
|
||||||
IvfRqIndexBuilder,
|
IvfRqIndexBuilder,
|
||||||
};
|
};
|
||||||
use lancedb::index::Index as LanceDbIndex;
|
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
|
|||||||
@@ -17,8 +17,8 @@ use lancedb::query::VectorQuery as LanceDbVectorQuery;
|
|||||||
use napi::bindgen_prelude::*;
|
use napi::bindgen_prelude::*;
|
||||||
use napi_derive::napi;
|
use napi_derive::napi;
|
||||||
|
|
||||||
use crate::error::convert_error;
|
|
||||||
use crate::error::NapiErrorExt;
|
use crate::error::NapiErrorExt;
|
||||||
|
use crate::error::convert_error;
|
||||||
use crate::iterator::RecordBatchIterator;
|
use crate::iterator::RecordBatchIterator;
|
||||||
use crate::rerankers::RerankHybridCallbackArgs;
|
use crate::rerankers::RerankHybridCallbackArgs;
|
||||||
use crate::rerankers::Reranker;
|
use crate::rerankers::Reranker;
|
||||||
@@ -551,15 +551,12 @@ fn parse_fts_query(query: Object) -> napi::Result<FullTextSearchQuery> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut query = FullTextSearchQuery::new_query(query);
|
let mut query = FullTextSearchQuery::new_query(query);
|
||||||
if let Some(cols) = columns {
|
if let Some(cols) = columns
|
||||||
if !cols.is_empty() {
|
&& !cols.is_empty()
|
||||||
query = query.with_columns(&cols).map_err(|e| {
|
{
|
||||||
napi::Error::from_reason(format!(
|
query = query.with_columns(&cols).map_err(|e| {
|
||||||
"Failed to set full text search columns: {}",
|
napi::Error::from_reason(format!("Failed to set full text search columns: {}", e))
|
||||||
e
|
})?;
|
||||||
))
|
|
||||||
})?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(query)
|
Ok(query)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
|
|||||||
napi_val: napi::sys::napi_value,
|
napi_val: napi::sys::napi_value,
|
||||||
) -> napi::Result<Self> {
|
) -> napi::Result<Self> {
|
||||||
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
let object: napi::bindgen_prelude::ClassInstance<Self> =
|
||||||
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
|
unsafe { napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)? };
|
||||||
Ok((*object).clone())
|
Ok((*object).clone())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -753,12 +753,14 @@ impl From<lancedb::table::AddResult> for AddResult {
|
|||||||
|
|
||||||
#[napi(object)]
|
#[napi(object)]
|
||||||
pub struct DeleteResult {
|
pub struct DeleteResult {
|
||||||
|
pub num_deleted_rows: i64,
|
||||||
pub version: i64,
|
pub version: i64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||||
fn from(value: lancedb::table::DeleteResult) -> Self {
|
fn from(value: lancedb::table::DeleteResult) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
num_deleted_rows: value.num_deleted_rows as i64,
|
||||||
version: value.version as i64,
|
version: value.version as i64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.30.0-beta.2"
|
current_version = "0.30.0-beta.5"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.30.0-beta.2"
|
version = "0.30.0-beta.5"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -16,9 +16,11 @@ crate-type = ["cdylib"]
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||||
async-trait = "0.1"
|
async-trait = "0.1"
|
||||||
|
bytes = "1"
|
||||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||||
lance-core.workspace = true
|
lance-core.workspace = true
|
||||||
lance-namespace.workspace = true
|
lance-namespace.workspace = true
|
||||||
|
lance-namespace-impls.workspace = true
|
||||||
lance-io.workspace = true
|
lance-io.workspace = true
|
||||||
env_logger.workspace = true
|
env_logger.workspace = true
|
||||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||||
@@ -28,6 +30,8 @@ pyo3-async-runtimes = { version = "0.26", features = [
|
|||||||
] }
|
] }
|
||||||
pin-project = "1.1.5"
|
pin-project = "1.1.5"
|
||||||
futures.workspace = true
|
futures.workspace = true
|
||||||
|
serde = "1"
|
||||||
|
serde_json = "1"
|
||||||
snafu.workspace = true
|
snafu.workspace = true
|
||||||
tokio = { version = "1.40", features = ["sync"] }
|
tokio = { version = "1.40", features = ["sync"] }
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# LanceDB
|
# LanceDB Python SDK
|
||||||
|
|
||||||
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
A Python library for [LanceDB](https://github.com/lancedb/lancedb).
|
||||||
|
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
pylance = [
|
pylance = [
|
||||||
"pylance>=1.0.0b14",
|
"pylance>=4.0.0b7",
|
||||||
]
|
]
|
||||||
tests = [
|
tests = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
@@ -59,9 +59,9 @@ tests = [
|
|||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs",
|
||||||
"pylance>=1.0.0b14",
|
"pylance>=4.0.0b7",
|
||||||
"requests",
|
"requests",
|
||||||
"datafusion<52",
|
"datafusion>=52,<53",
|
||||||
]
|
]
|
||||||
dev = [
|
dev = [
|
||||||
"ruff",
|
"ruff",
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from abc import abstractmethod
|
|||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union
|
||||||
|
|
||||||
if sys.version_info >= (3, 12):
|
if sys.version_info >= (3, 12):
|
||||||
from typing import override
|
from typing import override
|
||||||
@@ -1541,6 +1541,8 @@ class AsyncConnection(object):
|
|||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
) -> AsyncTable:
|
) -> AsyncTable:
|
||||||
"""Open a Lance Table in the database.
|
"""Open a Lance Table in the database.
|
||||||
|
|
||||||
@@ -1573,6 +1575,9 @@ class AsyncConnection(object):
|
|||||||
The explicit location (URI) of the table. If provided, the table will be
|
The explicit location (URI) of the table. If provided, the table will be
|
||||||
opened from this location instead of deriving it from the database URI
|
opened from this location instead of deriving it from the database URI
|
||||||
and table name.
|
and table name.
|
||||||
|
managed_versioning: bool, optional
|
||||||
|
Whether managed versioning is enabled for this table. If provided,
|
||||||
|
avoids a redundant describe_table call when namespace_client is set.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
@@ -1587,6 +1592,8 @@ class AsyncConnection(object):
|
|||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
return AsyncTable(table)
|
return AsyncTable(table)
|
||||||
|
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
import sys
|
||||||
from typing import Dict, Iterable, List, Optional, Union
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||||
|
|
||||||
if sys.version_info >= (3, 12):
|
if sys.version_info >= (3, 12):
|
||||||
from typing import override
|
from typing import override
|
||||||
@@ -240,7 +240,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
session : Optional[Session]
|
session : Optional[Session]
|
||||||
A session to use for this connection
|
A session to use for this connection
|
||||||
"""
|
"""
|
||||||
self._ns = namespace
|
self._namespace_client = namespace
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
self.storage_options = storage_options or {}
|
self.storage_options = storage_options or {}
|
||||||
self.session = session
|
self.session = session
|
||||||
@@ -269,7 +269,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
if namespace is None:
|
if namespace is None:
|
||||||
namespace = []
|
namespace = []
|
||||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||||
response = self._ns.list_tables(request)
|
response = self._namespace_client.list_tables(request)
|
||||||
return response.tables if response.tables else []
|
return response.tables if response.tables else []
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@@ -309,7 +309,9 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
# Try to describe the table first to see if it exists
|
# Try to describe the table first to see if it exists
|
||||||
try:
|
try:
|
||||||
describe_request = DescribeTableRequest(id=table_id)
|
describe_request = DescribeTableRequest(id=table_id)
|
||||||
describe_response = self._ns.describe_table(describe_request)
|
describe_response = self._namespace_client.describe_table(
|
||||||
|
describe_request
|
||||||
|
)
|
||||||
location = describe_response.location
|
location = describe_response.location
|
||||||
namespace_storage_options = describe_response.storage_options
|
namespace_storage_options = describe_response.storage_options
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -323,7 +325,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
location=None,
|
location=None,
|
||||||
properties=self.storage_options if self.storage_options else None,
|
properties=self.storage_options if self.storage_options else None,
|
||||||
)
|
)
|
||||||
declare_response = self._ns.declare_table(declare_request)
|
declare_response = self._namespace_client.declare_table(declare_request)
|
||||||
|
|
||||||
if not declare_response.location:
|
if not declare_response.location:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -353,7 +355,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
# Only create if namespace returned storage_options (not None)
|
# Only create if namespace returned storage_options (not None)
|
||||||
if storage_options_provider is None and namespace_storage_options is not None:
|
if storage_options_provider is None and namespace_storage_options is not None:
|
||||||
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
||||||
namespace=self._ns,
|
namespace=self._namespace_client,
|
||||||
table_id=table_id,
|
table_id=table_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -371,6 +373,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
storage_options=merged_storage_options,
|
storage_options=merged_storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=self._namespace_client,
|
||||||
)
|
)
|
||||||
|
|
||||||
return tbl
|
return tbl
|
||||||
@@ -389,7 +392,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
namespace = []
|
namespace = []
|
||||||
table_id = namespace + [name]
|
table_id = namespace + [name]
|
||||||
request = DescribeTableRequest(id=table_id)
|
request = DescribeTableRequest(id=table_id)
|
||||||
response = self._ns.describe_table(request)
|
response = self._namespace_client.describe_table(request)
|
||||||
|
|
||||||
# Merge storage options: self.storage_options < user options < namespace options
|
# Merge storage options: self.storage_options < user options < namespace options
|
||||||
merged_storage_options = dict(self.storage_options)
|
merged_storage_options = dict(self.storage_options)
|
||||||
@@ -402,10 +405,14 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
# Only create if namespace returned storage_options (not None)
|
# Only create if namespace returned storage_options (not None)
|
||||||
if storage_options_provider is None and response.storage_options is not None:
|
if storage_options_provider is None and response.storage_options is not None:
|
||||||
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
||||||
namespace=self._ns,
|
namespace=self._namespace_client,
|
||||||
table_id=table_id,
|
table_id=table_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Pass managed_versioning to avoid redundant describe_table call in Rust.
|
||||||
|
# Convert None to False since we already have the answer from describe_table.
|
||||||
|
managed_versioning = response.managed_versioning is True
|
||||||
|
|
||||||
return self._lance_table_from_uri(
|
return self._lance_table_from_uri(
|
||||||
name,
|
name,
|
||||||
response.location,
|
response.location,
|
||||||
@@ -413,6 +420,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
storage_options=merged_storage_options,
|
storage_options=merged_storage_options,
|
||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
|
namespace_client=self._namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
@@ -422,7 +431,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
namespace = []
|
namespace = []
|
||||||
table_id = namespace + [name]
|
table_id = namespace + [name]
|
||||||
request = DropTableRequest(id=table_id)
|
request = DropTableRequest(id=table_id)
|
||||||
self._ns.drop_table(request)
|
self._namespace_client.drop_table(request)
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def rename_table(
|
def rename_table(
|
||||||
@@ -484,7 +493,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
request = ListNamespacesRequest(
|
request = ListNamespacesRequest(
|
||||||
id=namespace, page_token=page_token, limit=limit
|
id=namespace, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
response = self._ns.list_namespaces(request)
|
response = self._namespace_client.list_namespaces(request)
|
||||||
return ListNamespacesResponse(
|
return ListNamespacesResponse(
|
||||||
namespaces=response.namespaces if response.namespaces else [],
|
namespaces=response.namespaces if response.namespaces else [],
|
||||||
page_token=response.page_token,
|
page_token=response.page_token,
|
||||||
@@ -520,7 +529,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
mode=_normalize_create_namespace_mode(mode),
|
mode=_normalize_create_namespace_mode(mode),
|
||||||
properties=properties,
|
properties=properties,
|
||||||
)
|
)
|
||||||
response = self._ns.create_namespace(request)
|
response = self._namespace_client.create_namespace(request)
|
||||||
return CreateNamespaceResponse(
|
return CreateNamespaceResponse(
|
||||||
properties=response.properties if hasattr(response, "properties") else None
|
properties=response.properties if hasattr(response, "properties") else None
|
||||||
)
|
)
|
||||||
@@ -555,7 +564,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
mode=_normalize_drop_namespace_mode(mode),
|
mode=_normalize_drop_namespace_mode(mode),
|
||||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||||
)
|
)
|
||||||
response = self._ns.drop_namespace(request)
|
response = self._namespace_client.drop_namespace(request)
|
||||||
return DropNamespaceResponse(
|
return DropNamespaceResponse(
|
||||||
properties=(
|
properties=(
|
||||||
response.properties if hasattr(response, "properties") else None
|
response.properties if hasattr(response, "properties") else None
|
||||||
@@ -581,7 +590,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
Response containing the namespace properties.
|
Response containing the namespace properties.
|
||||||
"""
|
"""
|
||||||
request = DescribeNamespaceRequest(id=namespace)
|
request = DescribeNamespaceRequest(id=namespace)
|
||||||
response = self._ns.describe_namespace(request)
|
response = self._namespace_client.describe_namespace(request)
|
||||||
return DescribeNamespaceResponse(
|
return DescribeNamespaceResponse(
|
||||||
properties=response.properties if hasattr(response, "properties") else None
|
properties=response.properties if hasattr(response, "properties") else None
|
||||||
)
|
)
|
||||||
@@ -615,7 +624,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
if namespace is None:
|
if namespace is None:
|
||||||
namespace = []
|
namespace = []
|
||||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||||
response = self._ns.list_tables(request)
|
response = self._namespace_client.list_tables(request)
|
||||||
return ListTablesResponse(
|
return ListTablesResponse(
|
||||||
tables=response.tables if response.tables else [],
|
tables=response.tables if response.tables else [],
|
||||||
page_token=response.page_token,
|
page_token=response.page_token,
|
||||||
@@ -630,6 +639,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
storage_options: Optional[Dict[str, str]] = None,
|
storage_options: Optional[Dict[str, str]] = None,
|
||||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
) -> LanceTable:
|
) -> LanceTable:
|
||||||
# Open a table directly from a URI using the location parameter
|
# Open a table directly from a URI using the location parameter
|
||||||
# Note: storage_options should already be merged by the caller
|
# Note: storage_options should already be merged by the caller
|
||||||
@@ -643,6 +654,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Open the table using the temporary connection with the location parameter
|
# Open the table using the temporary connection with the location parameter
|
||||||
|
# Pass namespace_client to enable managed versioning support
|
||||||
|
# Pass managed_versioning to avoid redundant describe_table call
|
||||||
return LanceTable.open(
|
return LanceTable.open(
|
||||||
temp_conn,
|
temp_conn,
|
||||||
name,
|
name,
|
||||||
@@ -651,6 +664,8 @@ class LanceNamespaceDBConnection(DBConnection):
|
|||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=table_uri,
|
location=table_uri,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -685,7 +700,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
session : Optional[Session]
|
session : Optional[Session]
|
||||||
A session to use for this connection
|
A session to use for this connection
|
||||||
"""
|
"""
|
||||||
self._ns = namespace
|
self._namespace_client = namespace
|
||||||
self.read_consistency_interval = read_consistency_interval
|
self.read_consistency_interval = read_consistency_interval
|
||||||
self.storage_options = storage_options or {}
|
self.storage_options = storage_options or {}
|
||||||
self.session = session
|
self.session = session
|
||||||
@@ -713,7 +728,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
if namespace is None:
|
if namespace is None:
|
||||||
namespace = []
|
namespace = []
|
||||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||||
response = self._ns.list_tables(request)
|
response = self._namespace_client.list_tables(request)
|
||||||
return response.tables if response.tables else []
|
return response.tables if response.tables else []
|
||||||
|
|
||||||
async def create_table(
|
async def create_table(
|
||||||
@@ -750,7 +765,9 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
# Try to describe the table first to see if it exists
|
# Try to describe the table first to see if it exists
|
||||||
try:
|
try:
|
||||||
describe_request = DescribeTableRequest(id=table_id)
|
describe_request = DescribeTableRequest(id=table_id)
|
||||||
describe_response = self._ns.describe_table(describe_request)
|
describe_response = self._namespace_client.describe_table(
|
||||||
|
describe_request
|
||||||
|
)
|
||||||
location = describe_response.location
|
location = describe_response.location
|
||||||
namespace_storage_options = describe_response.storage_options
|
namespace_storage_options = describe_response.storage_options
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -764,7 +781,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
location=None,
|
location=None,
|
||||||
properties=self.storage_options if self.storage_options else None,
|
properties=self.storage_options if self.storage_options else None,
|
||||||
)
|
)
|
||||||
declare_response = self._ns.declare_table(declare_request)
|
declare_response = self._namespace_client.declare_table(declare_request)
|
||||||
|
|
||||||
if not declare_response.location:
|
if not declare_response.location:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -797,7 +814,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
and namespace_storage_options is not None
|
and namespace_storage_options is not None
|
||||||
):
|
):
|
||||||
provider = LanceNamespaceStorageOptionsProvider(
|
provider = LanceNamespaceStorageOptionsProvider(
|
||||||
namespace=self._ns,
|
namespace=self._namespace_client,
|
||||||
table_id=table_id,
|
table_id=table_id,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -817,6 +834,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
storage_options=merged_storage_options,
|
storage_options=merged_storage_options,
|
||||||
storage_options_provider=provider,
|
storage_options_provider=provider,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=self._namespace_client,
|
||||||
)
|
)
|
||||||
|
|
||||||
lance_table = await asyncio.to_thread(_create_table)
|
lance_table = await asyncio.to_thread(_create_table)
|
||||||
@@ -837,7 +855,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
namespace = []
|
namespace = []
|
||||||
table_id = namespace + [name]
|
table_id = namespace + [name]
|
||||||
request = DescribeTableRequest(id=table_id)
|
request = DescribeTableRequest(id=table_id)
|
||||||
response = self._ns.describe_table(request)
|
response = self._namespace_client.describe_table(request)
|
||||||
|
|
||||||
# Merge storage options: self.storage_options < user options < namespace options
|
# Merge storage options: self.storage_options < user options < namespace options
|
||||||
merged_storage_options = dict(self.storage_options)
|
merged_storage_options = dict(self.storage_options)
|
||||||
@@ -849,10 +867,14 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
# Create a storage options provider if not provided by user
|
# Create a storage options provider if not provided by user
|
||||||
if storage_options_provider is None and response.storage_options is not None:
|
if storage_options_provider is None and response.storage_options is not None:
|
||||||
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
storage_options_provider = LanceNamespaceStorageOptionsProvider(
|
||||||
namespace=self._ns,
|
namespace=self._namespace_client,
|
||||||
table_id=table_id,
|
table_id=table_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Capture managed_versioning from describe response.
|
||||||
|
# Convert None to False since we already have the answer from describe_table.
|
||||||
|
managed_versioning = response.managed_versioning is True
|
||||||
|
|
||||||
# Open table in a thread
|
# Open table in a thread
|
||||||
def _open_table():
|
def _open_table():
|
||||||
temp_conn = LanceDBConnection(
|
temp_conn = LanceDBConnection(
|
||||||
@@ -870,6 +892,8 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=response.location,
|
location=response.location,
|
||||||
|
namespace_client=self._namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
|
|
||||||
lance_table = await asyncio.to_thread(_open_table)
|
lance_table = await asyncio.to_thread(_open_table)
|
||||||
@@ -881,7 +905,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
namespace = []
|
namespace = []
|
||||||
table_id = namespace + [name]
|
table_id = namespace + [name]
|
||||||
request = DropTableRequest(id=table_id)
|
request = DropTableRequest(id=table_id)
|
||||||
self._ns.drop_table(request)
|
self._namespace_client.drop_table(request)
|
||||||
|
|
||||||
async def rename_table(
|
async def rename_table(
|
||||||
self,
|
self,
|
||||||
@@ -943,7 +967,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
request = ListNamespacesRequest(
|
request = ListNamespacesRequest(
|
||||||
id=namespace, page_token=page_token, limit=limit
|
id=namespace, page_token=page_token, limit=limit
|
||||||
)
|
)
|
||||||
response = self._ns.list_namespaces(request)
|
response = self._namespace_client.list_namespaces(request)
|
||||||
return ListNamespacesResponse(
|
return ListNamespacesResponse(
|
||||||
namespaces=response.namespaces if response.namespaces else [],
|
namespaces=response.namespaces if response.namespaces else [],
|
||||||
page_token=response.page_token,
|
page_token=response.page_token,
|
||||||
@@ -978,7 +1002,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
mode=_normalize_create_namespace_mode(mode),
|
mode=_normalize_create_namespace_mode(mode),
|
||||||
properties=properties,
|
properties=properties,
|
||||||
)
|
)
|
||||||
response = self._ns.create_namespace(request)
|
response = self._namespace_client.create_namespace(request)
|
||||||
return CreateNamespaceResponse(
|
return CreateNamespaceResponse(
|
||||||
properties=response.properties if hasattr(response, "properties") else None
|
properties=response.properties if hasattr(response, "properties") else None
|
||||||
)
|
)
|
||||||
@@ -1012,7 +1036,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
mode=_normalize_drop_namespace_mode(mode),
|
mode=_normalize_drop_namespace_mode(mode),
|
||||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||||
)
|
)
|
||||||
response = self._ns.drop_namespace(request)
|
response = self._namespace_client.drop_namespace(request)
|
||||||
return DropNamespaceResponse(
|
return DropNamespaceResponse(
|
||||||
properties=(
|
properties=(
|
||||||
response.properties if hasattr(response, "properties") else None
|
response.properties if hasattr(response, "properties") else None
|
||||||
@@ -1039,7 +1063,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
Response containing the namespace properties.
|
Response containing the namespace properties.
|
||||||
"""
|
"""
|
||||||
request = DescribeNamespaceRequest(id=namespace)
|
request = DescribeNamespaceRequest(id=namespace)
|
||||||
response = self._ns.describe_namespace(request)
|
response = self._namespace_client.describe_namespace(request)
|
||||||
return DescribeNamespaceResponse(
|
return DescribeNamespaceResponse(
|
||||||
properties=response.properties if hasattr(response, "properties") else None
|
properties=response.properties if hasattr(response, "properties") else None
|
||||||
)
|
)
|
||||||
@@ -1072,7 +1096,7 @@ class AsyncLanceNamespaceDBConnection:
|
|||||||
if namespace is None:
|
if namespace is None:
|
||||||
namespace = []
|
namespace = []
|
||||||
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
request = ListTablesRequest(id=namespace, page_token=page_token, limit=limit)
|
||||||
response = self._ns.list_tables(request)
|
response = self._namespace_client.list_tables(request)
|
||||||
return ListTablesResponse(
|
return ListTablesResponse(
|
||||||
tables=response.tables if response.tables else [],
|
tables=response.tables if response.tables else [],
|
||||||
page_token=response.page_token,
|
page_token=response.page_token,
|
||||||
|
|||||||
@@ -606,6 +606,7 @@ class LanceQueryBuilder(ABC):
|
|||||||
query,
|
query,
|
||||||
ordering_field_name=ordering_field_name,
|
ordering_field_name=ordering_field_name,
|
||||||
fts_columns=fts_columns,
|
fts_columns=fts_columns,
|
||||||
|
fast_search=fast_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(query, list):
|
if isinstance(query, list):
|
||||||
@@ -1456,12 +1457,14 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
query: str | FullTextQuery,
|
query: str | FullTextQuery,
|
||||||
ordering_field_name: Optional[str] = None,
|
ordering_field_name: Optional[str] = None,
|
||||||
fts_columns: Optional[Union[str, List[str]]] = None,
|
fts_columns: Optional[Union[str, List[str]]] = None,
|
||||||
|
fast_search: bool = None,
|
||||||
):
|
):
|
||||||
super().__init__(table)
|
super().__init__(table)
|
||||||
self._query = query
|
self._query = query
|
||||||
self._phrase_query = False
|
self._phrase_query = False
|
||||||
self.ordering_field_name = ordering_field_name
|
self.ordering_field_name = ordering_field_name
|
||||||
self._reranker = None
|
self._reranker = None
|
||||||
|
self._fast_search = fast_search
|
||||||
if isinstance(fts_columns, str):
|
if isinstance(fts_columns, str):
|
||||||
fts_columns = [fts_columns]
|
fts_columns = [fts_columns]
|
||||||
self._fts_columns = fts_columns
|
self._fts_columns = fts_columns
|
||||||
@@ -1483,6 +1486,19 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
self._phrase_query = phrase_query
|
self._phrase_query = phrase_query
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def fast_search(self) -> LanceFtsQueryBuilder:
|
||||||
|
"""
|
||||||
|
Skip a flat search of unindexed data. This will improve
|
||||||
|
search performance but search results will not include unindexed data.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
LanceFtsQueryBuilder
|
||||||
|
The LanceFtsQueryBuilder object.
|
||||||
|
"""
|
||||||
|
self._fast_search = True
|
||||||
|
return self
|
||||||
|
|
||||||
def to_query_object(self) -> Query:
|
def to_query_object(self) -> Query:
|
||||||
return Query(
|
return Query(
|
||||||
columns=self._columns,
|
columns=self._columns,
|
||||||
@@ -1494,6 +1510,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
|||||||
query=self._query, columns=self._fts_columns
|
query=self._query, columns=self._fts_columns
|
||||||
),
|
),
|
||||||
offset=self._offset,
|
offset=self._offset,
|
||||||
|
fast_search=self._fast_search,
|
||||||
)
|
)
|
||||||
|
|
||||||
def output_schema(self) -> pa.Schema:
|
def output_schema(self) -> pa.Schema:
|
||||||
|
|||||||
@@ -218,8 +218,6 @@ class RemoteTable(Table):
|
|||||||
train: bool = True,
|
train: bool = True,
|
||||||
):
|
):
|
||||||
"""Create an index on the table.
|
"""Create an index on the table.
|
||||||
Currently, the only parameters that matter are
|
|
||||||
the metric and the vector column name.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -250,11 +248,6 @@ class RemoteTable(Table):
|
|||||||
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
>>> table.create_index("l2", "vector") # doctest: +SKIP
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if num_sub_vectors is not None:
|
|
||||||
logging.warning(
|
|
||||||
"num_sub_vectors is not supported on LanceDB cloud."
|
|
||||||
"This parameter will be tuned automatically."
|
|
||||||
)
|
|
||||||
if accelerator is not None:
|
if accelerator is not None:
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"GPU accelerator is not yet supported on LanceDB cloud."
|
"GPU accelerator is not yet supported on LanceDB cloud."
|
||||||
|
|||||||
@@ -1331,7 +1331,7 @@ class Table(ABC):
|
|||||||
1 2 [3.0, 4.0]
|
1 2 [3.0, 4.0]
|
||||||
2 3 [5.0, 6.0]
|
2 3 [5.0, 6.0]
|
||||||
>>> table.delete("x = 2")
|
>>> table.delete("x = 2")
|
||||||
DeleteResult(version=2)
|
DeleteResult(num_deleted_rows=1, version=2)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 1 [1.0, 2.0]
|
0 1 [1.0, 2.0]
|
||||||
@@ -1345,7 +1345,7 @@ class Table(ABC):
|
|||||||
>>> to_remove
|
>>> to_remove
|
||||||
'1, 5'
|
'1, 5'
|
||||||
>>> table.delete(f"x IN ({to_remove})")
|
>>> table.delete(f"x IN ({to_remove})")
|
||||||
DeleteResult(version=3)
|
DeleteResult(num_deleted_rows=1, version=3)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 3 [5.0, 6.0]
|
0 3 [5.0, 6.0]
|
||||||
@@ -1746,6 +1746,8 @@ class LanceTable(Table):
|
|||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
_async: AsyncTable = None,
|
_async: AsyncTable = None,
|
||||||
):
|
):
|
||||||
if namespace is None:
|
if namespace is None:
|
||||||
@@ -1753,6 +1755,7 @@ class LanceTable(Table):
|
|||||||
self._conn = connection
|
self._conn = connection
|
||||||
self._namespace = namespace
|
self._namespace = namespace
|
||||||
self._location = location # Store location for use in _dataset_path
|
self._location = location # Store location for use in _dataset_path
|
||||||
|
self._namespace_client = namespace_client
|
||||||
if _async is not None:
|
if _async is not None:
|
||||||
self._table = _async
|
self._table = _async
|
||||||
else:
|
else:
|
||||||
@@ -1764,6 +1767,8 @@ class LanceTable(Table):
|
|||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1806,6 +1811,8 @@ class LanceTable(Table):
|
|||||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||||
index_cache_size: Optional[int] = None,
|
index_cache_size: Optional[int] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
|
managed_versioning: Optional[bool] = None,
|
||||||
):
|
):
|
||||||
if namespace is None:
|
if namespace is None:
|
||||||
namespace = []
|
namespace = []
|
||||||
@@ -1817,6 +1824,8 @@ class LanceTable(Table):
|
|||||||
storage_options_provider=storage_options_provider,
|
storage_options_provider=storage_options_provider,
|
||||||
index_cache_size=index_cache_size,
|
index_cache_size=index_cache_size,
|
||||||
location=location,
|
location=location,
|
||||||
|
namespace_client=namespace_client,
|
||||||
|
managed_versioning=managed_versioning,
|
||||||
)
|
)
|
||||||
|
|
||||||
# check the dataset exists
|
# check the dataset exists
|
||||||
@@ -1848,6 +1857,16 @@ class LanceTable(Table):
|
|||||||
"Please install with `pip install pylance`."
|
"Please install with `pip install pylance`."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self._namespace_client is not None:
|
||||||
|
table_id = self._namespace + [self.name]
|
||||||
|
return lance.dataset(
|
||||||
|
version=self.version,
|
||||||
|
storage_options=self._conn.storage_options,
|
||||||
|
namespace=self._namespace_client,
|
||||||
|
table_id=table_id,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
return lance.dataset(
|
return lance.dataset(
|
||||||
self._dataset_path,
|
self._dataset_path,
|
||||||
version=self.version,
|
version=self.version,
|
||||||
@@ -2713,6 +2732,7 @@ class LanceTable(Table):
|
|||||||
data_storage_version: Optional[str] = None,
|
data_storage_version: Optional[str] = None,
|
||||||
enable_v2_manifest_paths: Optional[bool] = None,
|
enable_v2_manifest_paths: Optional[bool] = None,
|
||||||
location: Optional[str] = None,
|
location: Optional[str] = None,
|
||||||
|
namespace_client: Optional[Any] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new table.
|
Create a new table.
|
||||||
@@ -2773,6 +2793,7 @@ class LanceTable(Table):
|
|||||||
self._conn = db
|
self._conn = db
|
||||||
self._namespace = namespace
|
self._namespace = namespace
|
||||||
self._location = location
|
self._location = location
|
||||||
|
self._namespace_client = namespace_client
|
||||||
|
|
||||||
if data_storage_version is not None:
|
if data_storage_version is not None:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
@@ -4215,7 +4236,7 @@ class AsyncTable:
|
|||||||
1 2 [3.0, 4.0]
|
1 2 [3.0, 4.0]
|
||||||
2 3 [5.0, 6.0]
|
2 3 [5.0, 6.0]
|
||||||
>>> table.delete("x = 2")
|
>>> table.delete("x = 2")
|
||||||
DeleteResult(version=2)
|
DeleteResult(num_deleted_rows=1, version=2)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 1 [1.0, 2.0]
|
0 1 [1.0, 2.0]
|
||||||
@@ -4229,7 +4250,7 @@ class AsyncTable:
|
|||||||
>>> to_remove
|
>>> to_remove
|
||||||
'1, 5'
|
'1, 5'
|
||||||
>>> table.delete(f"x IN ({to_remove})")
|
>>> table.delete(f"x IN ({to_remove})")
|
||||||
DeleteResult(version=3)
|
DeleteResult(num_deleted_rows=1, version=3)
|
||||||
>>> table.to_pandas()
|
>>> table.to_pandas()
|
||||||
x vector
|
x vector
|
||||||
0 3 [5.0, 6.0]
|
0 3 [5.0, 6.0]
|
||||||
|
|||||||
@@ -324,6 +324,16 @@ def _(value: list):
|
|||||||
return "[" + ", ".join(map(value_to_sql, value)) + "]"
|
return "[" + ", ".join(map(value_to_sql, value)) + "]"
|
||||||
|
|
||||||
|
|
||||||
|
@value_to_sql.register(dict)
|
||||||
|
def _(value: dict):
|
||||||
|
# https://datafusion.apache.org/user-guide/sql/scalar_functions.html#named-struct
|
||||||
|
return (
|
||||||
|
"named_struct("
|
||||||
|
+ ", ".join(f"'{k}', {value_to_sql(v)}" for k, v in value.items())
|
||||||
|
+ ")"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@value_to_sql.register(np.ndarray)
|
@value_to_sql.register(np.ndarray)
|
||||||
def _(value: np.ndarray):
|
def _(value: np.ndarray):
|
||||||
return value_to_sql(value.tolist())
|
return value_to_sql(value.tolist())
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ from lancedb.query import (
|
|||||||
PhraseQuery,
|
PhraseQuery,
|
||||||
BooleanQuery,
|
BooleanQuery,
|
||||||
Occur,
|
Occur,
|
||||||
|
LanceFtsQueryBuilder,
|
||||||
)
|
)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyarrow as pa
|
import pyarrow as pa
|
||||||
@@ -882,3 +883,109 @@ def test_fts_query_to_json():
|
|||||||
'"must_not":[]}}'
|
'"must_not":[]}}'
|
||||||
)
|
)
|
||||||
assert json_str == expected
|
assert json_str == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_fts_fast_search(table):
|
||||||
|
table.create_fts_index("text", use_tantivy=False)
|
||||||
|
|
||||||
|
# Insert some unindexed data
|
||||||
|
table.add(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"text": "xyz",
|
||||||
|
"vector": [0 for _ in range(128)],
|
||||||
|
"id": 101,
|
||||||
|
"text2": "xyz",
|
||||||
|
"nested": {"text": "xyz"},
|
||||||
|
"count": 10,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Without fast_search, the query object should not have fast_search set
|
||||||
|
builder = table.search("xyz", query_type="fts").limit(10)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is None
|
||||||
|
|
||||||
|
# With fast_search, the query object should have fast_search=True
|
||||||
|
builder = table.search("xyz", query_type="fts").fast_search().limit(10)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
|
||||||
|
# fast_search should be chainable with other methods
|
||||||
|
builder = (
|
||||||
|
table.search("xyz", query_type="fts").fast_search().select(["text"]).limit(5)
|
||||||
|
)
|
||||||
|
query = builder.to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
assert query.limit == 5
|
||||||
|
assert query.columns == ["text"]
|
||||||
|
|
||||||
|
# fast_search should be enabled by keyword argument too
|
||||||
|
query = LanceFtsQueryBuilder(table, "xyz", fast_search=True).to_query_object()
|
||||||
|
assert query.fast_search is True
|
||||||
|
|
||||||
|
# Verify it executes without error and skips unindexed data
|
||||||
|
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||||
|
assert len(results) == 0
|
||||||
|
|
||||||
|
# Update index and verify it returns results
|
||||||
|
table.optimize()
|
||||||
|
results = table.search("xyz", query_type="fts").fast_search().limit(5).to_list()
|
||||||
|
assert len(results) > 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fts_fast_search_async(async_table):
|
||||||
|
await async_table.create_index("text", config=FTS())
|
||||||
|
|
||||||
|
# Insert some unindexed data
|
||||||
|
await async_table.add(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"text": "xyz",
|
||||||
|
"vector": [0 for _ in range(128)],
|
||||||
|
"id": 101,
|
||||||
|
"text2": "xyz",
|
||||||
|
"nested": {"text": "xyz"},
|
||||||
|
"count": 10,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Without fast_search, should return results
|
||||||
|
results = await async_table.query().nearest_to_text("xyz").limit(5).to_list()
|
||||||
|
assert len(results) > 0
|
||||||
|
|
||||||
|
# With fast_search, should return no results data unindexed
|
||||||
|
fast_results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(fast_results) == 0
|
||||||
|
|
||||||
|
# Update index and verify it returns results
|
||||||
|
await async_table.optimize()
|
||||||
|
|
||||||
|
fast_results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(fast_results) > 0
|
||||||
|
|
||||||
|
# fast_search should be chainable with other methods
|
||||||
|
results = (
|
||||||
|
await async_table.query()
|
||||||
|
.nearest_to_text("xyz")
|
||||||
|
.fast_search()
|
||||||
|
.select(["text"])
|
||||||
|
.limit(5)
|
||||||
|
.to_list()
|
||||||
|
)
|
||||||
|
assert len(results) > 0
|
||||||
|
|||||||
@@ -326,6 +326,24 @@ def test_add_struct(mem_db: DBConnection):
|
|||||||
table = mem_db.create_table("test2", schema=schema)
|
table = mem_db.create_table("test2", schema=schema)
|
||||||
table.add(data)
|
table.add(data)
|
||||||
|
|
||||||
|
struct_type = pa.struct(
|
||||||
|
[
|
||||||
|
("b", pa.int64()),
|
||||||
|
("a", pa.int64()),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
expected = pa.table(
|
||||||
|
{
|
||||||
|
"s_list": [
|
||||||
|
[
|
||||||
|
pa.scalar({"b": 1, "a": 2}, type=struct_type),
|
||||||
|
pa.scalar({"b": 4, "a": None}, type=struct_type),
|
||||||
|
]
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert table.to_arrow() == expected
|
||||||
|
|
||||||
|
|
||||||
def test_add_subschema(mem_db: DBConnection):
|
def test_add_subschema(mem_db: DBConnection):
|
||||||
schema = pa.schema(
|
schema = pa.schema(
|
||||||
|
|||||||
@@ -121,6 +121,32 @@ def test_value_to_sql_string(tmp_path):
|
|||||||
assert table.to_pandas().query("search == @value")["replace"].item() == value
|
assert table.to_pandas().query("search == @value")["replace"].item() == value
|
||||||
|
|
||||||
|
|
||||||
|
def test_value_to_sql_dict():
|
||||||
|
# Simple flat struct
|
||||||
|
assert value_to_sql({"a": 1, "b": "hello"}) == "named_struct('a', 1, 'b', 'hello')"
|
||||||
|
|
||||||
|
# Nested struct
|
||||||
|
assert (
|
||||||
|
value_to_sql({"outer": {"inner": 1}})
|
||||||
|
== "named_struct('outer', named_struct('inner', 1))"
|
||||||
|
)
|
||||||
|
|
||||||
|
# List inside struct
|
||||||
|
assert value_to_sql({"a": [1, 2]}) == "named_struct('a', [1, 2])"
|
||||||
|
|
||||||
|
# Mixed types
|
||||||
|
assert (
|
||||||
|
value_to_sql({"name": "test", "count": 42, "rate": 3.14, "active": True})
|
||||||
|
== "named_struct('name', 'test', 'count', 42, 'rate', 3.14, 'active', TRUE)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Null value inside struct
|
||||||
|
assert value_to_sql({"a": None}) == "named_struct('a', NULL)"
|
||||||
|
|
||||||
|
# Empty dict
|
||||||
|
assert value_to_sql({}) == "named_struct()"
|
||||||
|
|
||||||
|
|
||||||
def test_append_vector_columns():
|
def test_append_vector_columns():
|
||||||
registry = EmbeddingFunctionRegistry.get_instance()
|
registry = EmbeddingFunctionRegistry.get_instance()
|
||||||
registry.register("test")(MockTextEmbeddingFunction)
|
registry.register("test")(MockTextEmbeddingFunction)
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ use arrow::{
|
|||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use lancedb::arrow::SendableRecordBatchStream;
|
use lancedb::arrow::SendableRecordBatchStream;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
|
Bound, Py, PyAny, PyRef, PyResult, Python, exceptions::PyStopAsyncIteration, pyclass, pymethods,
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
|||||||
@@ -9,15 +9,16 @@ use lancedb::{
|
|||||||
database::{CreateTableMode, Database, ReadConsistency},
|
database::{CreateTableMode, Database, ReadConsistency},
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyclass, pyfunction, pymethods,
|
pyclass, pyfunction, pymethods,
|
||||||
types::{PyDict, PyDictMethods},
|
types::{PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::PythonErrorExt, storage_options::py_object_to_storage_options_provider, table::Table,
|
error::PythonErrorExt, namespace::extract_namespace_arc,
|
||||||
|
storage_options::py_object_to_storage_options_provider, table::Table,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[pyclass]
|
#[pyclass]
|
||||||
@@ -182,7 +183,8 @@ impl Connection {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None))]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None, namespace_client=None, managed_versioning=None))]
|
||||||
pub fn open_table(
|
pub fn open_table(
|
||||||
self_: PyRef<'_, Self>,
|
self_: PyRef<'_, Self>,
|
||||||
name: String,
|
name: String,
|
||||||
@@ -191,11 +193,13 @@ impl Connection {
|
|||||||
storage_options_provider: Option<Py<PyAny>>,
|
storage_options_provider: Option<Py<PyAny>>,
|
||||||
index_cache_size: Option<u32>,
|
index_cache_size: Option<u32>,
|
||||||
location: Option<String>,
|
location: Option<String>,
|
||||||
|
namespace_client: Option<Py<PyAny>>,
|
||||||
|
managed_versioning: Option<bool>,
|
||||||
) -> PyResult<Bound<'_, PyAny>> {
|
) -> PyResult<Bound<'_, PyAny>> {
|
||||||
let inner = self_.get_inner()?.clone();
|
let inner = self_.get_inner()?.clone();
|
||||||
|
|
||||||
let mut builder = inner.open_table(name);
|
let mut builder = inner.open_table(name);
|
||||||
builder = builder.namespace(namespace);
|
builder = builder.namespace(namespace.clone());
|
||||||
if let Some(storage_options) = storage_options {
|
if let Some(storage_options) = storage_options {
|
||||||
builder = builder.storage_options(storage_options);
|
builder = builder.storage_options(storage_options);
|
||||||
}
|
}
|
||||||
@@ -209,6 +213,20 @@ impl Connection {
|
|||||||
if let Some(location) = location {
|
if let Some(location) = location {
|
||||||
builder = builder.location(location);
|
builder = builder.location(location);
|
||||||
}
|
}
|
||||||
|
// Extract namespace client from Python object if provided
|
||||||
|
let ns_client = if let Some(ns_obj) = namespace_client {
|
||||||
|
let py = self_.py();
|
||||||
|
Some(extract_namespace_arc(py, ns_obj)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
if let Some(ns_client) = ns_client {
|
||||||
|
builder = builder.namespace_client(ns_client);
|
||||||
|
}
|
||||||
|
// Pass managed_versioning if provided to avoid redundant describe_table call
|
||||||
|
if let Some(enabled) = managed_versioning {
|
||||||
|
builder = builder.managed_versioning(enabled);
|
||||||
|
}
|
||||||
|
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let table = builder.execute().await.infer_error()?;
|
let table = builder.execute().await.infer_error()?;
|
||||||
|
|||||||
@@ -2,10 +2,10 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
PyErr, PyResult, Python,
|
||||||
exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
|
exceptions::{PyIOError, PyNotImplementedError, PyOSError, PyRuntimeError, PyValueError},
|
||||||
intern,
|
intern,
|
||||||
types::{PyAnyMethods, PyNone},
|
types::{PyAnyMethods, PyNone},
|
||||||
PyErr, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use lancedb::error::Error as LanceError;
|
use lancedb::error::Error as LanceError;
|
||||||
|
|||||||
@@ -3,17 +3,17 @@
|
|||||||
|
|
||||||
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
|
use lancedb::index::vector::{IvfFlatIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder};
|
||||||
use lancedb::index::{
|
use lancedb::index::{
|
||||||
|
Index as LanceDbIndex,
|
||||||
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
|
scalar::{BTreeIndexBuilder, FtsIndexBuilder},
|
||||||
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
vector::{IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder},
|
||||||
Index as LanceDbIndex,
|
|
||||||
};
|
};
|
||||||
use pyo3::types::PyStringMethods;
|
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
|
use pyo3::types::PyStringMethods;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, PyAny, PyResult, Python,
|
||||||
exceptions::{PyKeyError, PyValueError},
|
exceptions::{PyKeyError, PyValueError},
|
||||||
intern, pyclass, pymethods,
|
intern, pyclass, pymethods,
|
||||||
types::PyAnyMethods,
|
types::PyAnyMethods,
|
||||||
Bound, FromPyObject, PyAny, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
@@ -41,7 +41,12 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
let inner_opts = FtsIndexBuilder::default()
|
let inner_opts = FtsIndexBuilder::default()
|
||||||
.base_tokenizer(params.base_tokenizer)
|
.base_tokenizer(params.base_tokenizer)
|
||||||
.language(¶ms.language)
|
.language(¶ms.language)
|
||||||
.map_err(|_| PyValueError::new_err(format!("LanceDB does not support the requested language: '{}'", params.language)))?
|
.map_err(|_| {
|
||||||
|
PyValueError::new_err(format!(
|
||||||
|
"LanceDB does not support the requested language: '{}'",
|
||||||
|
params.language
|
||||||
|
))
|
||||||
|
})?
|
||||||
.with_position(params.with_position)
|
.with_position(params.with_position)
|
||||||
.lower_case(params.lower_case)
|
.lower_case(params.lower_case)
|
||||||
.max_token_length(params.max_token_length)
|
.max_token_length(params.max_token_length)
|
||||||
@@ -52,7 +57,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
.ngram_max_length(params.ngram_max_length)
|
.ngram_max_length(params.ngram_max_length)
|
||||||
.ngram_prefix_only(params.prefix_only);
|
.ngram_prefix_only(params.prefix_only);
|
||||||
Ok(LanceDbIndex::FTS(inner_opts))
|
Ok(LanceDbIndex::FTS(inner_opts))
|
||||||
},
|
}
|
||||||
"IvfFlat" => {
|
"IvfFlat" => {
|
||||||
let params = source.extract::<IvfFlatParams>()?;
|
let params = source.extract::<IvfFlatParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -64,10 +69,11 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
ivf_flat_builder = ivf_flat_builder.num_partitions(num_partitions);
|
||||||
}
|
}
|
||||||
if let Some(target_partition_size) = params.target_partition_size {
|
if let Some(target_partition_size) = params.target_partition_size {
|
||||||
ivf_flat_builder = ivf_flat_builder.target_partition_size(target_partition_size);
|
ivf_flat_builder =
|
||||||
|
ivf_flat_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
Ok(LanceDbIndex::IvfFlat(ivf_flat_builder))
|
||||||
},
|
}
|
||||||
"IvfPq" => {
|
"IvfPq" => {
|
||||||
let params = source.extract::<IvfPqParams>()?;
|
let params = source.extract::<IvfPqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -86,7 +92,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
ivf_pq_builder = ivf_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
|
Ok(LanceDbIndex::IvfPq(ivf_pq_builder))
|
||||||
},
|
}
|
||||||
"IvfSq" => {
|
"IvfSq" => {
|
||||||
let params = source.extract::<IvfSqParams>()?;
|
let params = source.extract::<IvfSqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -101,7 +107,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
|
ivf_sq_builder = ivf_sq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
|
Ok(LanceDbIndex::IvfSq(ivf_sq_builder))
|
||||||
},
|
}
|
||||||
"IvfRq" => {
|
"IvfRq" => {
|
||||||
let params = source.extract::<IvfRqParams>()?;
|
let params = source.extract::<IvfRqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -117,7 +123,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
|
ivf_rq_builder = ivf_rq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
|
Ok(LanceDbIndex::IvfRq(ivf_rq_builder))
|
||||||
},
|
}
|
||||||
"HnswPq" => {
|
"HnswPq" => {
|
||||||
let params = source.extract::<IvfHnswPqParams>()?;
|
let params = source.extract::<IvfHnswPqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -138,7 +144,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
hnsw_pq_builder = hnsw_pq_builder.num_sub_vectors(num_sub_vectors);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfHnswPq(hnsw_pq_builder))
|
Ok(LanceDbIndex::IvfHnswPq(hnsw_pq_builder))
|
||||||
},
|
}
|
||||||
"HnswSq" => {
|
"HnswSq" => {
|
||||||
let params = source.extract::<IvfHnswSqParams>()?;
|
let params = source.extract::<IvfHnswSqParams>()?;
|
||||||
let distance_type = parse_distance_type(params.distance_type)?;
|
let distance_type = parse_distance_type(params.distance_type)?;
|
||||||
@@ -155,7 +161,7 @@ pub fn extract_index_params(source: &Option<Bound<'_, PyAny>>) -> PyResult<Lance
|
|||||||
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
hnsw_sq_builder = hnsw_sq_builder.target_partition_size(target_partition_size);
|
||||||
}
|
}
|
||||||
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
Ok(LanceDbIndex::IvfHnswSq(hnsw_sq_builder))
|
||||||
},
|
}
|
||||||
not_supported => Err(PyValueError::new_err(format!(
|
not_supported => Err(PyValueError::new_err(format!(
|
||||||
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
|
"Invalid index type '{}'. Must be one of BTree, Bitmap, LabelList, FTS, IvfPq, IvfSq, IvfHnswPq, or IvfHnswSq",
|
||||||
not_supported
|
not_supported
|
||||||
|
|||||||
@@ -2,14 +2,14 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use arrow::RecordBatchStream;
|
use arrow::RecordBatchStream;
|
||||||
use connection::{connect, Connection};
|
use connection::{Connection, connect};
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
use index::IndexConfig;
|
use index::IndexConfig;
|
||||||
use permutation::{PyAsyncPermutationBuilder, PyPermutationReader};
|
use permutation::{PyAsyncPermutationBuilder, PyPermutationReader};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
pymodule,
|
Bound, PyResult, Python, pymodule,
|
||||||
types::{PyModule, PyModuleMethods},
|
types::{PyModule, PyModuleMethods},
|
||||||
wrap_pyfunction, Bound, PyResult, Python,
|
wrap_pyfunction,
|
||||||
};
|
};
|
||||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||||
use session::Session;
|
use session::Session;
|
||||||
@@ -23,6 +23,7 @@ pub mod connection;
|
|||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod header;
|
pub mod header;
|
||||||
pub mod index;
|
pub mod index;
|
||||||
|
pub mod namespace;
|
||||||
pub mod permutation;
|
pub mod permutation;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod session;
|
pub mod session;
|
||||||
|
|||||||
696
python/src/namespace.rs
Normal file
696
python/src/namespace.rs
Normal file
@@ -0,0 +1,696 @@
|
|||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
//! Namespace utilities for Python bindings
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use bytes::Bytes;
|
||||||
|
use lance_namespace::LanceNamespace as LanceNamespaceTrait;
|
||||||
|
use lance_namespace::models::*;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyDict;
|
||||||
|
|
||||||
|
/// Wrapper that allows any Python object implementing LanceNamespace protocol
|
||||||
|
/// to be used as a Rust LanceNamespace.
|
||||||
|
///
|
||||||
|
/// This is similar to PyLanceNamespace in lance's Python bindings - it wraps a Python
|
||||||
|
/// object and calls back into Python when namespace methods are invoked.
|
||||||
|
pub struct PyLanceNamespace {
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
namespace_id: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PyLanceNamespace {
|
||||||
|
/// Create a new PyLanceNamespace wrapper around a Python namespace object.
|
||||||
|
pub fn new(_py: Python<'_>, py_namespace: &Bound<'_, PyAny>) -> PyResult<Self> {
|
||||||
|
let namespace_id = py_namespace
|
||||||
|
.call_method0("namespace_id")?
|
||||||
|
.extract::<String>()?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
py_namespace: Arc::new(py_namespace.clone().unbind()),
|
||||||
|
namespace_id,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create an Arc<dyn LanceNamespace> from a Python namespace object.
|
||||||
|
pub fn create_arc(
|
||||||
|
py: Python<'_>,
|
||||||
|
py_namespace: &Bound<'_, PyAny>,
|
||||||
|
) -> PyResult<Arc<dyn LanceNamespaceTrait>> {
|
||||||
|
let wrapper = Self::new(py, py_namespace)?;
|
||||||
|
Ok(Arc::new(wrapper))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for PyLanceNamespace {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "PyLanceNamespace {{ id: {} }}", self.namespace_id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get or create the DictWithModelDump class in Python.
|
||||||
|
/// This class acts like a dict but also has model_dump() method.
|
||||||
|
/// This allows it to work with both:
|
||||||
|
/// - depythonize (which expects a dict/Mapping)
|
||||||
|
/// - Python code that calls .model_dump() (like DirectoryNamespace wrapper)
|
||||||
|
fn get_dict_with_model_dump_class(py: Python<'_>) -> PyResult<Bound<'_, PyAny>> {
|
||||||
|
// Use a module-level cache via __builtins__
|
||||||
|
let builtins = py.import("builtins")?;
|
||||||
|
if builtins.hasattr("_DictWithModelDump")? {
|
||||||
|
return builtins.getattr("_DictWithModelDump");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the class using exec
|
||||||
|
let locals = PyDict::new(py);
|
||||||
|
py.run(
|
||||||
|
c"class DictWithModelDump(dict):
|
||||||
|
def model_dump(self):
|
||||||
|
return dict(self)",
|
||||||
|
None,
|
||||||
|
Some(&locals),
|
||||||
|
)?;
|
||||||
|
let class = locals.get_item("DictWithModelDump")?.ok_or_else(|| {
|
||||||
|
pyo3::exceptions::PyRuntimeError::new_err("Failed to create DictWithModelDump class")
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// Cache it
|
||||||
|
builtins.setattr("_DictWithModelDump", &class)?;
|
||||||
|
Ok(class)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to call a Python namespace method with JSON serialization.
|
||||||
|
/// For methods that take a request and return a response.
|
||||||
|
/// Uses DictWithModelDump to pass a dict that also has model_dump() method,
|
||||||
|
/// making it compatible with both depythonize and Python wrappers.
|
||||||
|
async fn call_py_method<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: serde::de::DeserializeOwned + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response_json = tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump so it works with both depythonize and .model_dump()
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
|
||||||
|
// Convert response to dict, then to JSON
|
||||||
|
// Pydantic models have model_dump() method
|
||||||
|
let result_dict = if result.bind(py).hasattr("model_dump")? {
|
||||||
|
result.call_method0(py, "model_dump")?
|
||||||
|
} else {
|
||||||
|
result
|
||||||
|
};
|
||||||
|
let response_json: String = json_module
|
||||||
|
.call_method1("dumps", (result_dict,))?
|
||||||
|
.extract()?;
|
||||||
|
Ok::<_, PyErr>(response_json)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))?;
|
||||||
|
|
||||||
|
serde_json::from_str(&response_json).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to deserialize response from {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return () on success
|
||||||
|
async fn call_py_method_unit<Req>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<()>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
Ok::<_, PyErr>(())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return a primitive type
|
||||||
|
async fn call_py_method_primitive<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: for<'py> pyo3::FromPyObject<'py> + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
let value: Resp = result.extract(py)?;
|
||||||
|
Ok::<_, PyErr>(value)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that return Bytes
|
||||||
|
async fn call_py_method_bytes<Req>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
) -> lance_core::Result<Bytes>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Call the Python method
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg,))?;
|
||||||
|
let bytes_data: Vec<u8> = result.extract(py)?;
|
||||||
|
Ok::<_, PyErr>(Bytes::from(bytes_data))
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper for methods that take request + data and return a response
|
||||||
|
async fn call_py_method_with_data<Req, Resp>(
|
||||||
|
py_namespace: Arc<Py<PyAny>>,
|
||||||
|
method_name: &'static str,
|
||||||
|
request: Req,
|
||||||
|
data: Bytes,
|
||||||
|
) -> lance_core::Result<Resp>
|
||||||
|
where
|
||||||
|
Req: serde::Serialize + Send + 'static,
|
||||||
|
Resp: serde::de::DeserializeOwned + Send + 'static,
|
||||||
|
{
|
||||||
|
let request_json = serde_json::to_string(&request).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to serialize request for {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response_json = tokio::task::spawn_blocking(move || {
|
||||||
|
Python::attach(|py| {
|
||||||
|
let json_module = py.import("json")?;
|
||||||
|
let request_dict = json_module.call_method1("loads", (&request_json,))?;
|
||||||
|
|
||||||
|
// Wrap dict in DictWithModelDump
|
||||||
|
let dict_class = get_dict_with_model_dump_class(py)?;
|
||||||
|
let request_arg = dict_class.call1((request_dict,))?;
|
||||||
|
|
||||||
|
// Pass request and bytes to Python method
|
||||||
|
let py_bytes = pyo3::types::PyBytes::new(py, &data);
|
||||||
|
let result = py_namespace.call_method1(py, method_name, (request_arg, py_bytes))?;
|
||||||
|
|
||||||
|
// Convert response dict to JSON
|
||||||
|
let response_json: String = json_module.call_method1("dumps", (result,))?.extract()?;
|
||||||
|
Ok::<_, PyErr>(response_json)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.map_err(|e| lance_core::Error::io(format!("Task join error for {}: {}", method_name, e)))?
|
||||||
|
.map_err(|e: PyErr| lance_core::Error::io(format!("Python error in {}: {}", method_name, e)))?;
|
||||||
|
|
||||||
|
serde_json::from_str(&response_json).map_err(|e| {
|
||||||
|
lance_core::Error::io(format!(
|
||||||
|
"Failed to deserialize response from {}: {}",
|
||||||
|
method_name, e
|
||||||
|
))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl LanceNamespaceTrait for PyLanceNamespace {
|
||||||
|
fn namespace_id(&self) -> String {
|
||||||
|
self.namespace_id.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_namespaces(
|
||||||
|
&self,
|
||||||
|
request: ListNamespacesRequest,
|
||||||
|
) -> lance_core::Result<ListNamespacesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_namespaces", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_namespace(
|
||||||
|
&self,
|
||||||
|
request: DescribeNamespaceRequest,
|
||||||
|
) -> lance_core::Result<DescribeNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_namespace(
|
||||||
|
&self,
|
||||||
|
request: CreateNamespaceRequest,
|
||||||
|
) -> lance_core::Result<CreateNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_namespace(
|
||||||
|
&self,
|
||||||
|
request: DropNamespaceRequest,
|
||||||
|
) -> lance_core::Result<DropNamespaceResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_namespace", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn namespace_exists(&self, request: NamespaceExistsRequest) -> lance_core::Result<()> {
|
||||||
|
call_py_method_unit(self.py_namespace.clone(), "namespace_exists", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_tables(
|
||||||
|
&self,
|
||||||
|
request: ListTablesRequest,
|
||||||
|
) -> lance_core::Result<ListTablesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_tables", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn register_table(
|
||||||
|
&self,
|
||||||
|
request: RegisterTableRequest,
|
||||||
|
) -> lance_core::Result<RegisterTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "register_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn table_exists(&self, request: TableExistsRequest) -> lance_core::Result<()> {
|
||||||
|
call_py_method_unit(self.py_namespace.clone(), "table_exists", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_table(&self, request: DropTableRequest) -> lance_core::Result<DropTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn deregister_table(
|
||||||
|
&self,
|
||||||
|
request: DeregisterTableRequest,
|
||||||
|
) -> lance_core::Result<DeregisterTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "deregister_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn count_table_rows(&self, request: CountTableRowsRequest) -> lance_core::Result<i64> {
|
||||||
|
call_py_method_primitive(self.py_namespace.clone(), "count_table_rows", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table(
|
||||||
|
&self,
|
||||||
|
request: CreateTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<CreateTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"create_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn declare_table(
|
||||||
|
&self,
|
||||||
|
request: DeclareTableRequest,
|
||||||
|
) -> lance_core::Result<DeclareTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "declare_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn insert_into_table(
|
||||||
|
&self,
|
||||||
|
request: InsertIntoTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<InsertIntoTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"insert_into_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn merge_insert_into_table(
|
||||||
|
&self,
|
||||||
|
request: MergeInsertIntoTableRequest,
|
||||||
|
request_data: Bytes,
|
||||||
|
) -> lance_core::Result<MergeInsertIntoTableResponse> {
|
||||||
|
call_py_method_with_data(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"merge_insert_into_table",
|
||||||
|
request,
|
||||||
|
request_data,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "update_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_from_table(
|
||||||
|
&self,
|
||||||
|
request: DeleteFromTableRequest,
|
||||||
|
) -> lance_core::Result<DeleteFromTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "delete_from_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn query_table(&self, request: QueryTableRequest) -> lance_core::Result<Bytes> {
|
||||||
|
call_py_method_bytes(self.py_namespace.clone(), "query_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_index(
|
||||||
|
&self,
|
||||||
|
request: CreateTableIndexRequest,
|
||||||
|
) -> lance_core::Result<CreateTableIndexResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_index", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_indices(
|
||||||
|
&self,
|
||||||
|
request: ListTableIndicesRequest,
|
||||||
|
) -> lance_core::Result<ListTableIndicesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_indices", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table_index_stats(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableIndexStatsRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableIndexStatsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"describe_table_index_stats",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_transaction(
|
||||||
|
&self,
|
||||||
|
request: DescribeTransactionRequest,
|
||||||
|
) -> lance_core::Result<DescribeTransactionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_transaction", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_transaction(
|
||||||
|
&self,
|
||||||
|
request: AlterTransactionRequest,
|
||||||
|
) -> lance_core::Result<AlterTransactionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "alter_transaction", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_scalar_index(
|
||||||
|
&self,
|
||||||
|
request: CreateTableIndexRequest,
|
||||||
|
) -> lance_core::Result<CreateTableScalarIndexResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"create_table_scalar_index",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn drop_table_index(
|
||||||
|
&self,
|
||||||
|
request: DropTableIndexRequest,
|
||||||
|
) -> lance_core::Result<DropTableIndexResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "drop_table_index", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_all_tables(
|
||||||
|
&self,
|
||||||
|
request: ListTablesRequest,
|
||||||
|
) -> lance_core::Result<ListTablesResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_all_tables", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn restore_table(
|
||||||
|
&self,
|
||||||
|
request: RestoreTableRequest,
|
||||||
|
) -> lance_core::Result<RestoreTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "restore_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn rename_table(
|
||||||
|
&self,
|
||||||
|
request: RenameTableRequest,
|
||||||
|
) -> lance_core::Result<RenameTableResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "rename_table", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_versions(
|
||||||
|
&self,
|
||||||
|
request: ListTableVersionsRequest,
|
||||||
|
) -> lance_core::Result<ListTableVersionsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_versions", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_version(
|
||||||
|
&self,
|
||||||
|
request: CreateTableVersionRequest,
|
||||||
|
) -> lance_core::Result<CreateTableVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_version", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn describe_table_version(
|
||||||
|
&self,
|
||||||
|
request: DescribeTableVersionRequest,
|
||||||
|
) -> lance_core::Result<DescribeTableVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "describe_table_version", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn batch_delete_table_versions(
|
||||||
|
&self,
|
||||||
|
request: BatchDeleteTableVersionsRequest,
|
||||||
|
) -> lance_core::Result<BatchDeleteTableVersionsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"batch_delete_table_versions",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table_schema_metadata(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableSchemaMetadataRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableSchemaMetadataResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"update_table_schema_metadata",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_table_stats(
|
||||||
|
&self,
|
||||||
|
request: GetTableStatsRequest,
|
||||||
|
) -> lance_core::Result<GetTableStatsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "get_table_stats", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn explain_table_query_plan(
|
||||||
|
&self,
|
||||||
|
request: ExplainTableQueryPlanRequest,
|
||||||
|
) -> lance_core::Result<String> {
|
||||||
|
call_py_method_primitive(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"explain_table_query_plan",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn analyze_table_query_plan(
|
||||||
|
&self,
|
||||||
|
request: AnalyzeTableQueryPlanRequest,
|
||||||
|
) -> lance_core::Result<String> {
|
||||||
|
call_py_method_primitive(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"analyze_table_query_plan",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_add_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableAddColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableAddColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_add_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_alter_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableAlterColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableAlterColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_alter_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn alter_table_drop_columns(
|
||||||
|
&self,
|
||||||
|
request: AlterTableDropColumnsRequest,
|
||||||
|
) -> lance_core::Result<AlterTableDropColumnsResponse> {
|
||||||
|
call_py_method(
|
||||||
|
self.py_namespace.clone(),
|
||||||
|
"alter_table_drop_columns",
|
||||||
|
request,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_table_tags(
|
||||||
|
&self,
|
||||||
|
request: ListTableTagsRequest,
|
||||||
|
) -> lance_core::Result<ListTableTagsResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "list_table_tags", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn create_table_tag(
|
||||||
|
&self,
|
||||||
|
request: CreateTableTagRequest,
|
||||||
|
) -> lance_core::Result<CreateTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "create_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn delete_table_tag(
|
||||||
|
&self,
|
||||||
|
request: DeleteTableTagRequest,
|
||||||
|
) -> lance_core::Result<DeleteTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "delete_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn update_table_tag(
|
||||||
|
&self,
|
||||||
|
request: UpdateTableTagRequest,
|
||||||
|
) -> lance_core::Result<UpdateTableTagResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "update_table_tag", request).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn get_table_tag_version(
|
||||||
|
&self,
|
||||||
|
request: GetTableTagVersionRequest,
|
||||||
|
) -> lance_core::Result<GetTableTagVersionResponse> {
|
||||||
|
call_py_method(self.py_namespace.clone(), "get_table_tag_version", request).await
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert Python dict to HashMap<String, String>
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn dict_to_hashmap(dict: &Bound<'_, PyDict>) -> PyResult<HashMap<String, String>> {
|
||||||
|
let mut map = HashMap::new();
|
||||||
|
for (key, value) in dict.iter() {
|
||||||
|
let key_str: String = key.extract()?;
|
||||||
|
let value_str: String = value.extract()?;
|
||||||
|
map.insert(key_str, value_str);
|
||||||
|
}
|
||||||
|
Ok(map)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract an Arc<dyn LanceNamespace> from a Python namespace object.
|
||||||
|
///
|
||||||
|
/// This function wraps any Python namespace object with PyLanceNamespace.
|
||||||
|
/// The PyLanceNamespace wrapper uses DictWithModelDump to pass requests,
|
||||||
|
/// which works with both:
|
||||||
|
/// - Native namespaces (DirectoryNamespace, RestNamespace) that use depythonize (expects dict)
|
||||||
|
/// - Custom Python implementations that call .model_dump() on the request
|
||||||
|
pub fn extract_namespace_arc(
|
||||||
|
py: Python<'_>,
|
||||||
|
ns: Py<PyAny>,
|
||||||
|
) -> PyResult<Arc<dyn LanceNamespaceTrait>> {
|
||||||
|
let ns_ref = ns.bind(py);
|
||||||
|
PyLanceNamespace::create_arc(py, ns_ref)
|
||||||
|
}
|
||||||
@@ -16,10 +16,10 @@ use lancedb::{
|
|||||||
query::Select,
|
query::Select,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
||||||
exceptions::PyRuntimeError,
|
exceptions::PyRuntimeError,
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
types::{PyAnyMethods, PyDict, PyDictMethods, PyType},
|
||||||
Bound, PyAny, PyRef, PyRefMut, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
|
|||||||
@@ -4,9 +4,9 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use arrow::array::make_array;
|
|
||||||
use arrow::array::Array;
|
use arrow::array::Array;
|
||||||
use arrow::array::ArrayData;
|
use arrow::array::ArrayData;
|
||||||
|
use arrow::array::make_array;
|
||||||
use arrow::pyarrow::FromPyArrow;
|
use arrow::pyarrow::FromPyArrow;
|
||||||
use arrow::pyarrow::IntoPyArrow;
|
use arrow::pyarrow::IntoPyArrow;
|
||||||
use arrow::pyarrow::ToPyArrow;
|
use arrow::pyarrow::ToPyArrow;
|
||||||
@@ -22,23 +22,23 @@ use lancedb::query::{
|
|||||||
VectorQuery as LanceDbVectorQuery,
|
VectorQuery as LanceDbVectorQuery,
|
||||||
};
|
};
|
||||||
use lancedb::table::AnyQuery;
|
use lancedb::table::AnyQuery;
|
||||||
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
|
||||||
use pyo3::pyfunction;
|
|
||||||
use pyo3::pymethods;
|
|
||||||
use pyo3::types::PyList;
|
|
||||||
use pyo3::types::{PyDict, PyString};
|
|
||||||
use pyo3::Bound;
|
use pyo3::Bound;
|
||||||
use pyo3::IntoPyObject;
|
use pyo3::IntoPyObject;
|
||||||
use pyo3::PyAny;
|
use pyo3::PyAny;
|
||||||
use pyo3::PyRef;
|
use pyo3::PyRef;
|
||||||
use pyo3::PyResult;
|
use pyo3::PyResult;
|
||||||
use pyo3::Python;
|
use pyo3::Python;
|
||||||
use pyo3::{exceptions::PyRuntimeError, FromPyObject};
|
use pyo3::prelude::{PyAnyMethods, PyDictMethods};
|
||||||
|
use pyo3::pyfunction;
|
||||||
|
use pyo3::pymethods;
|
||||||
|
use pyo3::types::PyList;
|
||||||
|
use pyo3::types::{PyDict, PyString};
|
||||||
|
use pyo3::{FromPyObject, exceptions::PyRuntimeError};
|
||||||
|
use pyo3::{PyErr, pyclass};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions::{PyNotImplementedError, PyValueError},
|
exceptions::{PyNotImplementedError, PyValueError},
|
||||||
intern,
|
intern,
|
||||||
};
|
};
|
||||||
use pyo3::{pyclass, PyErr};
|
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
use crate::util::parse_distance_type;
|
use crate::util::parse_distance_type;
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
use lancedb::{ObjectStoreRegistry, Session as LanceSession};
|
||||||
use pyo3::{pyclass, pymethods, PyResult};
|
use pyo3::{PyResult, pyclass, pymethods};
|
||||||
|
|
||||||
/// A session for managing caches and object stores across LanceDB operations.
|
/// A session for managing caches and object stores across LanceDB operations.
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -66,13 +66,10 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|||||||
.inner
|
.inner
|
||||||
.bind(py)
|
.bind(py)
|
||||||
.call_method0("fetch_storage_options")
|
.call_method0("fetch_storage_options")
|
||||||
.map_err(|e| lance_core::Error::IO {
|
.map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
|
||||||
source: Box::new(std::io::Error::other(format!(
|
"Failed to call fetch_storage_options: {}",
|
||||||
"Failed to call fetch_storage_options: {}",
|
e
|
||||||
e
|
)))))?;
|
||||||
))),
|
|
||||||
location: snafu::location!(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// If result is None, return None
|
// If result is None, return None
|
||||||
if result.is_none() {
|
if result.is_none() {
|
||||||
@@ -81,26 +78,19 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|||||||
|
|
||||||
// Extract the result dict - should be a flat Map<String, String>
|
// Extract the result dict - should be a flat Map<String, String>
|
||||||
let result_dict = result.downcast::<PyDict>().map_err(|_| {
|
let result_dict = result.downcast::<PyDict>().map_err(|_| {
|
||||||
lance_core::Error::InvalidInput {
|
lance_core::Error::invalid_input(
|
||||||
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
|
"fetch_storage_options() must return a dict of string key-value pairs or None",
|
||||||
location: snafu::location!(),
|
)
|
||||||
}
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
// Convert all entries to HashMap<String, String>
|
// Convert all entries to HashMap<String, String>
|
||||||
let mut storage_options = HashMap::new();
|
let mut storage_options = HashMap::new();
|
||||||
for (key, value) in result_dict.iter() {
|
for (key, value) in result_dict.iter() {
|
||||||
let key_str: String = key.extract().map_err(|e| {
|
let key_str: String = key.extract().map_err(|e| {
|
||||||
lance_core::Error::InvalidInput {
|
lance_core::Error::invalid_input(format!("Storage option key must be a string: {}", e))
|
||||||
source: format!("Storage option key must be a string: {}", e).into(),
|
|
||||||
location: snafu::location!(),
|
|
||||||
}
|
|
||||||
})?;
|
})?;
|
||||||
let value_str: String = value.extract().map_err(|e| {
|
let value_str: String = value.extract().map_err(|e| {
|
||||||
lance_core::Error::InvalidInput {
|
lance_core::Error::invalid_input(format!("Storage option value must be a string: {}", e))
|
||||||
source: format!("Storage option value must be a string: {}", e).into(),
|
|
||||||
location: snafu::location!(),
|
|
||||||
}
|
|
||||||
})?;
|
})?;
|
||||||
storage_options.insert(key_str, value_str);
|
storage_options.insert(key_str, value_str);
|
||||||
}
|
}
|
||||||
@@ -109,13 +99,10 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.map_err(|e| lance_core::Error::IO {
|
.map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
|
||||||
source: Box::new(std::io::Error::other(format!(
|
"Task join error: {}",
|
||||||
"Task join error: {}",
|
e
|
||||||
e
|
)))))?
|
||||||
))),
|
|
||||||
location: snafu::location!(),
|
|
||||||
})?
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn provider_id(&self) -> String {
|
fn provider_id(&self) -> String {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use std::{collections::HashMap, sync::Arc};
|
|||||||
use crate::{
|
use crate::{
|
||||||
connection::Connection,
|
connection::Connection,
|
||||||
error::PythonErrorExt,
|
error::PythonErrorExt,
|
||||||
index::{extract_index_params, IndexConfig},
|
index::{IndexConfig, extract_index_params},
|
||||||
query::{Query, TakeQuery},
|
query::{Query, TakeQuery},
|
||||||
table::scannable::PyScannable,
|
table::scannable::PyScannable,
|
||||||
};
|
};
|
||||||
@@ -19,10 +19,10 @@ use lancedb::table::{
|
|||||||
Table as LanceDbTable,
|
Table as LanceDbTable,
|
||||||
};
|
};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
||||||
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
exceptions::{PyKeyError, PyRuntimeError, PyValueError},
|
||||||
pyclass, pymethods,
|
pyclass, pymethods,
|
||||||
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
types::{IntoPyDict, PyAnyMethods, PyDict, PyDictMethods},
|
||||||
Bound, FromPyObject, PyAny, PyRef, PyResult, Python,
|
|
||||||
};
|
};
|
||||||
use pyo3_async_runtimes::tokio::future_into_py;
|
use pyo3_async_runtimes::tokio::future_into_py;
|
||||||
|
|
||||||
@@ -112,19 +112,24 @@ impl From<lancedb::table::AddResult> for AddResult {
|
|||||||
#[pyclass(get_all)]
|
#[pyclass(get_all)]
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct DeleteResult {
|
pub struct DeleteResult {
|
||||||
|
pub num_deleted_rows: u64,
|
||||||
pub version: u64,
|
pub version: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pymethods]
|
#[pymethods]
|
||||||
impl DeleteResult {
|
impl DeleteResult {
|
||||||
pub fn __repr__(&self) -> String {
|
pub fn __repr__(&self) -> String {
|
||||||
format!("DeleteResult(version={})", self.version)
|
format!(
|
||||||
|
"DeleteResult(num_deleted_rows={}, version={})",
|
||||||
|
self.num_deleted_rows, self.version
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
impl From<lancedb::table::DeleteResult> for DeleteResult {
|
||||||
fn from(result: lancedb::table::DeleteResult) -> Self {
|
fn from(result: lancedb::table::DeleteResult) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
num_deleted_rows: result.num_deleted_rows,
|
||||||
version: result.version,
|
version: result.version,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -537,7 +542,7 @@ impl Table {
|
|||||||
let inner = self_.inner_ref()?.clone();
|
let inner = self_.inner_ref()?.clone();
|
||||||
future_into_py(self_.py(), async move {
|
future_into_py(self_.py(), async move {
|
||||||
let versions = inner.list_versions().await.infer_error()?;
|
let versions = inner.list_versions().await.infer_error()?;
|
||||||
let versions_as_dict = Python::attach(|py| {
|
Python::attach(|py| {
|
||||||
versions
|
versions
|
||||||
.iter()
|
.iter()
|
||||||
.map(|v| {
|
.map(|v| {
|
||||||
@@ -554,9 +559,7 @@ impl Table {
|
|||||||
Ok(dict.unbind())
|
Ok(dict.unbind())
|
||||||
})
|
})
|
||||||
.collect::<PyResult<Vec<_>>>()
|
.collect::<PyResult<Vec<_>>>()
|
||||||
});
|
})
|
||||||
|
|
||||||
versions_as_dict
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -10,11 +10,11 @@ use arrow::{
|
|||||||
};
|
};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use lancedb::{
|
use lancedb::{
|
||||||
|
Error,
|
||||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||||
data::scannable::Scannable,
|
data::scannable::Scannable,
|
||||||
Error,
|
|
||||||
};
|
};
|
||||||
use pyo3::{types::PyAnyMethods, FromPyObject, Py, PyAny, Python};
|
use pyo3::{FromPyObject, Py, PyAny, Python, types::PyAnyMethods};
|
||||||
|
|
||||||
/// Adapter that implements Scannable for a Python reader factory callable.
|
/// Adapter that implements Scannable for a Python reader factory callable.
|
||||||
///
|
///
|
||||||
@@ -99,15 +99,15 @@ impl Scannable for PyScannable {
|
|||||||
// Channel closed. Check if the task panicked — a panic
|
// Channel closed. Check if the task panicked — a panic
|
||||||
// drops the sender without sending an error, so without
|
// drops the sender without sending an error, so without
|
||||||
// this check we'd silently return a truncated stream.
|
// this check we'd silently return a truncated stream.
|
||||||
if let Some(handle) = join_handle {
|
if let Some(handle) = join_handle
|
||||||
if let Err(join_err) = handle.await {
|
&& let Err(join_err) = handle.await
|
||||||
return Some((
|
{
|
||||||
Err(Error::Runtime {
|
return Some((
|
||||||
message: format!("Reader task panicked: {}", join_err),
|
Err(Error::Runtime {
|
||||||
}),
|
message: format!("Reader task panicked: {}", join_err),
|
||||||
(rx, None),
|
}),
|
||||||
));
|
(rx, None),
|
||||||
}
|
));
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,8 +5,9 @@ use std::sync::Mutex;
|
|||||||
|
|
||||||
use lancedb::DistanceType;
|
use lancedb::DistanceType;
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
|
PyResult,
|
||||||
exceptions::{PyRuntimeError, PyValueError},
|
exceptions::{PyRuntimeError, PyValueError},
|
||||||
pyfunction, PyResult,
|
pyfunction,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// A wrapper around a rust builder
|
/// A wrapper around a rust builder
|
||||||
|
|||||||
4
python/uv.lock
generated
4
python/uv.lock
generated
@@ -2006,7 +2006,7 @@ requires-dist = [
|
|||||||
{ name = "botocore", marker = "extra == 'embeddings'", specifier = ">=1.31.57" },
|
{ name = "botocore", marker = "extra == 'embeddings'", specifier = ">=1.31.57" },
|
||||||
{ name = "cohere", marker = "extra == 'embeddings'" },
|
{ name = "cohere", marker = "extra == 'embeddings'" },
|
||||||
{ name = "colpali-engine", marker = "extra == 'embeddings'", specifier = ">=0.3.10" },
|
{ name = "colpali-engine", marker = "extra == 'embeddings'", specifier = ">=0.3.10" },
|
||||||
{ name = "datafusion", marker = "extra == 'tests'" },
|
{ name = "datafusion", marker = "extra == 'tests'", specifier = "<52" },
|
||||||
{ name = "deprecation" },
|
{ name = "deprecation" },
|
||||||
{ name = "duckdb", marker = "extra == 'tests'" },
|
{ name = "duckdb", marker = "extra == 'tests'" },
|
||||||
{ name = "google-generativeai", marker = "extra == 'embeddings'" },
|
{ name = "google-generativeai", marker = "extra == 'embeddings'" },
|
||||||
@@ -2035,7 +2035,7 @@ requires-dist = [
|
|||||||
{ name = "pyarrow-stubs", marker = "extra == 'tests'" },
|
{ name = "pyarrow-stubs", marker = "extra == 'tests'" },
|
||||||
{ name = "pydantic", specifier = ">=1.10" },
|
{ name = "pydantic", specifier = ">=1.10" },
|
||||||
{ name = "pylance", marker = "extra == 'pylance'", specifier = ">=1.0.0b14" },
|
{ name = "pylance", marker = "extra == 'pylance'", specifier = ">=1.0.0b14" },
|
||||||
{ name = "pylance", marker = "extra == 'tests'", specifier = ">=1.0.0b14" },
|
{ name = "pylance", marker = "extra == 'tests'", specifier = ">=1.0.0b14,<3.0.0" },
|
||||||
{ name = "pyright", marker = "extra == 'dev'" },
|
{ name = "pyright", marker = "extra == 'dev'" },
|
||||||
{ name = "pytest", marker = "extra == 'tests'" },
|
{ name = "pytest", marker = "extra == 'tests'" },
|
||||||
{ name = "pytest-asyncio", marker = "extra == 'tests'" },
|
{ name = "pytest-asyncio", marker = "extra == 'tests'" },
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb"
|
name = "lancedb"
|
||||||
version = "0.27.0-beta.1"
|
version = "0.27.0-beta.4"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
@@ -25,9 +25,9 @@ datafusion-catalog.workspace = true
|
|||||||
datafusion-common.workspace = true
|
datafusion-common.workspace = true
|
||||||
datafusion-execution.workspace = true
|
datafusion-execution.workspace = true
|
||||||
datafusion-expr.workspace = true
|
datafusion-expr.workspace = true
|
||||||
datafusion-functions = "51.0"
|
datafusion-functions.workspace = true
|
||||||
datafusion-physical-expr.workspace = true
|
datafusion-physical-expr.workspace = true
|
||||||
datafusion-sql = "51.0"
|
datafusion-sql.workspace = true
|
||||||
datafusion-physical-plan.workspace = true
|
datafusion-physical-plan.workspace = true
|
||||||
datafusion.workspace = true
|
datafusion.workspace = true
|
||||||
object_store = { workspace = true }
|
object_store = { workspace = true }
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# LanceDB Rust
|
# LanceDB Rust SDK
|
||||||
|
|
||||||
<a href="https://crates.io/crates/vectordb"></a>
|
<a href="https://crates.io/crates/vectordb"></a>
|
||||||
<a href="https://docs.rs/vectordb/latest/vectordb/"></a>
|
<a href="https://docs.rs/vectordb/latest/vectordb/"></a>
|
||||||
|
|||||||
@@ -9,10 +9,9 @@ use aws_config::Region;
|
|||||||
use aws_sdk_bedrockruntime::Client;
|
use aws_sdk_bedrockruntime::Client;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use lancedb::{
|
use lancedb::{
|
||||||
connect,
|
Result, connect,
|
||||||
embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
embeddings::{EmbeddingDefinition, EmbeddingFunction, bedrock::BedrockEmbeddingFunction},
|
||||||
query::{ExecutableQuery, QueryBase},
|
query::{ExecutableQuery, QueryBase},
|
||||||
Result,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|||||||
@@ -10,10 +10,10 @@ use futures::TryStreamExt;
|
|||||||
use lance_index::scalar::FullTextSearchQuery;
|
use lance_index::scalar::FullTextSearchQuery;
|
||||||
use lancedb::connection::Connection;
|
use lancedb::connection::Connection;
|
||||||
|
|
||||||
use lancedb::index::scalar::FtsIndexBuilder;
|
|
||||||
use lancedb::index::Index;
|
use lancedb::index::Index;
|
||||||
|
use lancedb::index::scalar::FtsIndexBuilder;
|
||||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||||
use lancedb::{connect, Result, Table};
|
use lancedb::{Result, Table, connect};
|
||||||
use rand::random;
|
use rand::random;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
@@ -46,19 +46,21 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
let n_terms = 3;
|
let n_terms = 3;
|
||||||
let batches = RecordBatchIterator::new(
|
let batches = RecordBatchIterator::new(
|
||||||
vec![RecordBatch::try_new(
|
vec![
|
||||||
schema.clone(),
|
RecordBatch::try_new(
|
||||||
vec![
|
schema.clone(),
|
||||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
vec![
|
||||||
Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
|
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||||
(0..n_terms)
|
Arc::new(StringArray::from_iter_values((0..TOTAL).map(|_| {
|
||||||
.map(|_| words[random::<u32>() as usize % words.len()])
|
(0..n_terms)
|
||||||
.collect::<Vec<_>>()
|
.map(|_| words[random::<u32>() as usize % words.len()])
|
||||||
.join(" ")
|
.collect::<Vec<_>>()
|
||||||
}))),
|
.join(" ")
|
||||||
],
|
}))),
|
||||||
)
|
],
|
||||||
.unwrap()]
|
)
|
||||||
|
.unwrap(),
|
||||||
|
]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(Ok),
|
.map(Ok),
|
||||||
schema.clone(),
|
schema.clone(),
|
||||||
|
|||||||
@@ -5,16 +5,15 @@ use arrow_array::{RecordBatch, StringArray};
|
|||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use lance_index::scalar::FullTextSearchQuery;
|
use lance_index::scalar::FullTextSearchQuery;
|
||||||
use lancedb::index::scalar::FtsIndexBuilder;
|
|
||||||
use lancedb::index::Index;
|
use lancedb::index::Index;
|
||||||
|
use lancedb::index::scalar::FtsIndexBuilder;
|
||||||
use lancedb::{
|
use lancedb::{
|
||||||
connect,
|
Result, Table, connect,
|
||||||
embeddings::{
|
embeddings::{
|
||||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
EmbeddingDefinition, EmbeddingFunction,
|
||||||
EmbeddingFunction,
|
sentence_transformers::SentenceTransformersEmbeddings,
|
||||||
},
|
},
|
||||||
query::{QueryBase, QueryExecutionOptions},
|
query::{QueryBase, QueryExecutionOptions},
|
||||||
Result, Table,
|
|
||||||
};
|
};
|
||||||
use std::{iter::once, sync::Arc};
|
use std::{iter::once, sync::Arc};
|
||||||
|
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ use arrow_schema::{DataType, Field, Schema};
|
|||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use lancedb::connection::Connection;
|
use lancedb::connection::Connection;
|
||||||
|
|
||||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
|
||||||
use lancedb::index::Index;
|
use lancedb::index::Index;
|
||||||
|
use lancedb::index::vector::IvfPqIndexBuilder;
|
||||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||||
use lancedb::{connect, DistanceType, Result, Table};
|
use lancedb::{DistanceType, Result, Table, connect};
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
@@ -51,19 +51,21 @@ fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send
|
|||||||
|
|
||||||
// Create a RecordBatch stream.
|
// Create a RecordBatch stream.
|
||||||
let batches = RecordBatchIterator::new(
|
let batches = RecordBatchIterator::new(
|
||||||
vec![RecordBatch::try_new(
|
vec![
|
||||||
schema.clone(),
|
RecordBatch::try_new(
|
||||||
vec![
|
schema.clone(),
|
||||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
vec![
|
||||||
Arc::new(
|
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
Arc::new(
|
||||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||||
DIM as i32,
|
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||||
|
DIM as i32,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
),
|
],
|
||||||
],
|
)
|
||||||
)
|
.unwrap(),
|
||||||
.unwrap()]
|
]
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(Ok),
|
.map(Ok),
|
||||||
schema.clone(),
|
schema.clone(),
|
||||||
|
|||||||
@@ -8,10 +8,9 @@ use std::{iter::once, sync::Arc};
|
|||||||
use arrow_array::{RecordBatch, StringArray};
|
use arrow_array::{RecordBatch, StringArray};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use lancedb::{
|
use lancedb::{
|
||||||
connect,
|
Result, connect,
|
||||||
embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
embeddings::{EmbeddingDefinition, EmbeddingFunction, openai::OpenAIEmbeddingFunction},
|
||||||
query::{ExecutableQuery, QueryBase},
|
query::{ExecutableQuery, QueryBase},
|
||||||
Result,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// --8<-- [end:imports]
|
// --8<-- [end:imports]
|
||||||
|
|||||||
@@ -7,13 +7,12 @@ use arrow_array::{RecordBatch, StringArray};
|
|||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use lancedb::{
|
use lancedb::{
|
||||||
connect,
|
Result, connect,
|
||||||
embeddings::{
|
embeddings::{
|
||||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
EmbeddingDefinition, EmbeddingFunction,
|
||||||
EmbeddingFunction,
|
sentence_transformers::SentenceTransformersEmbeddings,
|
||||||
},
|
},
|
||||||
query::{ExecutableQuery, QueryBase},
|
query::{ExecutableQuery, QueryBase},
|
||||||
Result,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ use futures::TryStreamExt;
|
|||||||
use lancedb::connection::Connection;
|
use lancedb::connection::Connection;
|
||||||
use lancedb::index::Index;
|
use lancedb::index::Index;
|
||||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||||
use lancedb::{connect, Result, Table as LanceDbTable};
|
use lancedb::{Result, Table as LanceDbTable, connect};
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ use lance_datagen::{BatchCount, BatchGeneratorBuilder, RowCount};
|
|||||||
#[cfg(feature = "polars")]
|
#[cfg(feature = "polars")]
|
||||||
use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};
|
use {crate::polars_arrow_convertors, polars::frame::ArrowChunk, polars::prelude::DataFrame};
|
||||||
|
|
||||||
use crate::{error::Result, Error};
|
use crate::{Error, error::Result};
|
||||||
|
|
||||||
/// An iterator of batches that also has a schema
|
/// An iterator of batches that also has a schema
|
||||||
pub trait RecordBatchReader: Iterator<Item = Result<arrow_array::RecordBatch>> {
|
pub trait RecordBatchReader: Iterator<Item = Result<arrow_array::RecordBatch>> {
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ use lance_namespace::models::{
|
|||||||
#[cfg(feature = "aws")]
|
#[cfg(feature = "aws")]
|
||||||
use object_store::aws::AwsCredential;
|
use object_store::aws::AwsCredential;
|
||||||
|
|
||||||
|
use crate::Table;
|
||||||
use crate::connection::create_table::CreateTableBuilder;
|
use crate::connection::create_table::CreateTableBuilder;
|
||||||
use crate::data::scannable::Scannable;
|
use crate::data::scannable::Scannable;
|
||||||
use crate::database::listing::ListingDatabase;
|
use crate::database::listing::ListingDatabase;
|
||||||
@@ -31,7 +32,6 @@ use crate::remote::{
|
|||||||
client::ClientConfig,
|
client::ClientConfig,
|
||||||
db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
|
db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
|
||||||
};
|
};
|
||||||
use crate::Table;
|
|
||||||
use lance::io::ObjectStoreParams;
|
use lance::io::ObjectStoreParams;
|
||||||
pub use lance_encoding::version::LanceFileVersion;
|
pub use lance_encoding::version::LanceFileVersion;
|
||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
@@ -136,6 +136,7 @@ impl OpenTableBuilder {
|
|||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
location: None,
|
location: None,
|
||||||
namespace_client: None,
|
namespace_client: None,
|
||||||
|
managed_versioning: None,
|
||||||
},
|
},
|
||||||
embedding_registry,
|
embedding_registry,
|
||||||
}
|
}
|
||||||
@@ -235,6 +236,29 @@ impl OpenTableBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set a namespace client for managed versioning support.
|
||||||
|
///
|
||||||
|
/// When a namespace client is provided and the table has `managed_versioning` enabled,
|
||||||
|
/// the table will use the namespace's commit handler to notify the namespace of
|
||||||
|
/// version changes. This enables features like event emission for table modifications.
|
||||||
|
pub fn namespace_client(mut self, client: Arc<dyn lance_namespace::LanceNamespace>) -> Self {
|
||||||
|
self.request.namespace_client = Some(client);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set whether managed versioning is enabled for this table.
|
||||||
|
///
|
||||||
|
/// When set to `Some(true)`, the table will use namespace-managed commits.
|
||||||
|
/// When set to `Some(false)`, the table will use local commits even if namespace_client is set.
|
||||||
|
/// When set to `None` (default), the value will be fetched from the namespace if namespace_client is set.
|
||||||
|
///
|
||||||
|
/// This is typically set when the caller has already queried the namespace and knows the
|
||||||
|
/// managed_versioning status, avoiding a redundant describe_table call.
|
||||||
|
pub fn managed_versioning(mut self, enabled: bool) -> Self {
|
||||||
|
self.request.managed_versioning = Some(enabled);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Open the table
|
/// Open the table
|
||||||
pub async fn execute(self) -> Result<Table> {
|
pub async fn execute(self) -> Result<Table> {
|
||||||
let table = self.parent.open_table(self.request).await?;
|
let table = self.parent.open_table(self.request).await?;
|
||||||
@@ -294,6 +318,12 @@ impl CloneTableBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set a namespace client for managed versioning support.
|
||||||
|
pub fn namespace_client(mut self, client: Arc<dyn lance_namespace::LanceNamespace>) -> Self {
|
||||||
|
self.request.namespace_client = Some(client);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Execute the clone operation
|
/// Execute the clone operation
|
||||||
pub async fn execute(self) -> Result<Table> {
|
pub async fn execute(self) -> Result<Table> {
|
||||||
let parent = self.parent.clone();
|
let parent = self.parent.clone();
|
||||||
@@ -566,8 +596,11 @@ pub struct ConnectBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 1] =
|
const ENV_VARS_TO_STORAGE_OPTS: [(&str, &str); 3] = [
|
||||||
[("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name")];
|
("AZURE_STORAGE_ACCOUNT_NAME", "azure_storage_account_name"),
|
||||||
|
("AZURE_CLIENT_ID", "azure_client_id"),
|
||||||
|
("AZURE_TENANT_ID", "azure_tenant_id"),
|
||||||
|
];
|
||||||
|
|
||||||
impl ConnectBuilder {
|
impl ConnectBuilder {
|
||||||
/// Create a new [`ConnectOptions`] with the given database URI.
|
/// Create a new [`ConnectOptions`] with the given database URI.
|
||||||
@@ -758,10 +791,10 @@ impl ConnectBuilder {
|
|||||||
options: &mut HashMap<String, String>,
|
options: &mut HashMap<String, String>,
|
||||||
) {
|
) {
|
||||||
for (env_key, opt_key) in env_var_to_remote_storage_option {
|
for (env_key, opt_key) in env_var_to_remote_storage_option {
|
||||||
if let Ok(env_value) = std::env::var(env_key) {
|
if let Ok(env_value) = std::env::var(env_key)
|
||||||
if !options.contains_key(*opt_key) {
|
&& !options.contains_key(*opt_key)
|
||||||
options.insert((*opt_key).to_string(), env_value);
|
{
|
||||||
}
|
options.insert((*opt_key).to_string(), env_value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1011,14 +1044,13 @@ mod tests {
|
|||||||
#[cfg(feature = "remote")]
|
#[cfg(feature = "remote")]
|
||||||
#[test]
|
#[test]
|
||||||
fn test_apply_env_defaults() {
|
fn test_apply_env_defaults() {
|
||||||
let env_key = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_KEY";
|
let env_key = "PATH";
|
||||||
let env_val = "TEST_APPLY_ENV_DEFAULTS_ENVIRONMENT_VARIABLE_ENV_VAL";
|
let env_val = std::env::var(env_key).expect("PATH should be set in test environment");
|
||||||
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
|
let opts_key = "test_apply_env_defaults_environment_variable_opts_key";
|
||||||
std::env::set_var(env_key, env_val);
|
|
||||||
|
|
||||||
let mut options = HashMap::new();
|
let mut options = HashMap::new();
|
||||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||||
assert_eq!(Some(&env_val.to_string()), options.get(opts_key));
|
assert_eq!(Some(&env_val), options.get(opts_key));
|
||||||
|
|
||||||
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
|
options.insert(opts_key.to_string(), "EXPLICIT-VALUE".to_string());
|
||||||
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
ConnectBuilder::apply_env_defaults(&[(env_key, opts_key)], &mut options);
|
||||||
|
|||||||
@@ -6,12 +6,12 @@ use std::sync::Arc;
|
|||||||
use lance_io::object_store::StorageOptionsProvider;
|
use lance_io::object_store::StorageOptionsProvider;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
Error, Result, Table,
|
||||||
connection::{merge_storage_options, set_storage_options_provider},
|
connection::{merge_storage_options, set_storage_options_provider},
|
||||||
data::scannable::{Scannable, WithEmbeddingsScannable},
|
data::scannable::{Scannable, WithEmbeddingsScannable},
|
||||||
database::{CreateTableMode, CreateTableRequest, Database},
|
database::{CreateTableMode, CreateTableRequest, Database},
|
||||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
|
embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
|
||||||
table::WriteOptions,
|
table::WriteOptions,
|
||||||
Error, Result, Table,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct CreateTableBuilder {
|
pub struct CreateTableBuilder {
|
||||||
@@ -167,7 +167,7 @@ impl CreateTableBuilder {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator,
|
Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, record_batch,
|
||||||
};
|
};
|
||||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
@@ -380,11 +380,12 @@ mod tests {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
|
let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
|
||||||
assert!(db
|
assert!(
|
||||||
.create_empty_table("test", other_schema.clone())
|
db.create_empty_table("test", other_schema.clone())
|
||||||
.execute()
|
.execute()
|
||||||
.await
|
.await
|
||||||
.is_err()); // TODO: assert what this error is
|
.is_err()
|
||||||
|
); // TODO: assert what this error is
|
||||||
let overwritten = db
|
let overwritten = db
|
||||||
.create_empty_table("test", other_schema.clone())
|
.create_empty_table("test", other_schema.clone())
|
||||||
.mode(CreateTableMode::Overwrite)
|
.mode(CreateTableMode::Overwrite)
|
||||||
|
|||||||
@@ -5,9 +5,9 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use arrow::compute::kernels::{aggregate::bool_and, length::length};
|
use arrow::compute::kernels::{aggregate::bool_and, length::length};
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
|
Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
|
||||||
cast::AsArray,
|
cast::AsArray,
|
||||||
types::{ArrowPrimitiveType, Int32Type, Int64Type},
|
types::{ArrowPrimitiveType, Int32Type, Int64Type},
|
||||||
Array, GenericListArray, OffsetSizeTrait, PrimitiveArray, RecordBatchReader,
|
|
||||||
};
|
};
|
||||||
use arrow_ord::cmp::eq;
|
use arrow_ord::cmp::eq;
|
||||||
use arrow_schema::DataType;
|
use arrow_schema::DataType;
|
||||||
@@ -78,7 +78,7 @@ pub fn infer_vector_columns(
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(Error::Schema {
|
return Err(Error::Schema {
|
||||||
message: format!("Column {} is not a list", col_name),
|
message: format!("Column {} is not a list", col_name),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
} {
|
} {
|
||||||
if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) {
|
if let Some(Some(prev_dim)) = columns_to_infer.get(&col_name) {
|
||||||
@@ -102,8 +102,8 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
types::{Float32Type, Float64Type},
|
|
||||||
FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray,
|
FixedSizeListArray, Float32Array, ListArray, RecordBatch, RecordBatchIterator, StringArray,
|
||||||
|
types::{Float32Type, Float64Type},
|
||||||
};
|
};
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use std::{sync::Arc, vec};
|
use std::{sync::Arc, vec};
|
||||||
|
|||||||
@@ -4,10 +4,10 @@
|
|||||||
use std::{iter::repeat_with, sync::Arc};
|
use std::{iter::repeat_with, sync::Arc};
|
||||||
|
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
cast::AsArray,
|
|
||||||
types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type},
|
|
||||||
Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator,
|
Array, ArrowNumericType, FixedSizeListArray, PrimitiveArray, RecordBatch, RecordBatchIterator,
|
||||||
RecordBatchReader,
|
RecordBatchReader,
|
||||||
|
cast::AsArray,
|
||||||
|
types::{Float16Type, Float32Type, Float64Type, Int32Type, Int64Type},
|
||||||
};
|
};
|
||||||
use arrow_cast::{can_cast_types, cast};
|
use arrow_cast::{can_cast_types, cast};
|
||||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||||
@@ -184,7 +184,7 @@ mod tests {
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int32Array, Int8Array,
|
FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int8Array, Int32Array,
|
||||||
RecordBatch, RecordBatchIterator, StringArray,
|
RecordBatch, RecordBatchIterator, StringArray,
|
||||||
};
|
};
|
||||||
use arrow_schema::Field;
|
use arrow_schema::Field;
|
||||||
|
|||||||
@@ -13,16 +13,16 @@ use crate::arrow::{
|
|||||||
SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream,
|
SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream,
|
||||||
};
|
};
|
||||||
use crate::embeddings::{
|
use crate::embeddings::{
|
||||||
compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction,
|
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, compute_embeddings_for_batch,
|
||||||
EmbeddingRegistry,
|
compute_output_schema,
|
||||||
};
|
};
|
||||||
use crate::table::{ColumnDefinition, ColumnKind, TableDefinition};
|
use crate::table::{ColumnDefinition, ColumnKind, TableDefinition};
|
||||||
use crate::{Error, Result};
|
use crate::{Error, Result};
|
||||||
use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader};
|
use arrow_array::{ArrayRef, RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||||
use arrow_schema::{ArrowError, SchemaRef};
|
use arrow_schema::{ArrowError, SchemaRef};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use futures::stream::once;
|
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
|
use futures::stream::once;
|
||||||
use lance_datafusion::utils::StreamingWriteSource;
|
use lance_datafusion::utils::StreamingWriteSource;
|
||||||
|
|
||||||
pub trait Scannable: Send {
|
pub trait Scannable: Send {
|
||||||
|
|||||||
@@ -19,12 +19,12 @@ use std::sync::Arc;
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use lance::dataset::ReadParams;
|
use lance::dataset::ReadParams;
|
||||||
|
use lance_namespace::LanceNamespace;
|
||||||
use lance_namespace::models::{
|
use lance_namespace::models::{
|
||||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||||
};
|
};
|
||||||
use lance_namespace::LanceNamespace;
|
|
||||||
|
|
||||||
use crate::data::scannable::Scannable;
|
use crate::data::scannable::Scannable;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
@@ -66,6 +66,10 @@ pub struct OpenTableRequest {
|
|||||||
/// Optional namespace client for server-side query execution.
|
/// Optional namespace client for server-side query execution.
|
||||||
/// When set, queries will be executed on the namespace server instead of locally.
|
/// When set, queries will be executed on the namespace server instead of locally.
|
||||||
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
|
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||||
|
/// Whether managed versioning is enabled for this table.
|
||||||
|
/// When Some(true), the table will use namespace-managed commits instead of local commits.
|
||||||
|
/// When None and namespace_client is provided, the value will be fetched from the namespace.
|
||||||
|
pub managed_versioning: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Debug for OpenTableRequest {
|
impl std::fmt::Debug for OpenTableRequest {
|
||||||
@@ -77,6 +81,7 @@ impl std::fmt::Debug for OpenTableRequest {
|
|||||||
.field("lance_read_params", &self.lance_read_params)
|
.field("lance_read_params", &self.lance_read_params)
|
||||||
.field("location", &self.location)
|
.field("location", &self.location)
|
||||||
.field("namespace_client", &self.namespace_client)
|
.field("namespace_client", &self.namespace_client)
|
||||||
|
.field("managed_versioning", &self.managed_versioning)
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -161,6 +166,9 @@ pub struct CloneTableRequest {
|
|||||||
/// Whether to perform a shallow clone (true) or deep clone (false). Defaults to true.
|
/// Whether to perform a shallow clone (true) or deep clone (false). Defaults to true.
|
||||||
/// Currently only shallow clone is supported.
|
/// Currently only shallow clone is supported.
|
||||||
pub is_shallow: bool,
|
pub is_shallow: bool,
|
||||||
|
/// Optional namespace client for managed versioning support.
|
||||||
|
/// When set, enables the commit handler to track table versions through the namespace.
|
||||||
|
pub namespace_client: Option<Arc<dyn LanceNamespace>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl CloneTableRequest {
|
impl CloneTableRequest {
|
||||||
@@ -172,6 +180,7 @@ impl CloneTableRequest {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use std::path::Path;
|
|||||||
use std::{collections::HashMap, sync::Arc};
|
use std::{collections::HashMap, sync::Arc};
|
||||||
|
|
||||||
use lance::dataset::refs::Ref;
|
use lance::dataset::refs::Ref;
|
||||||
use lance::dataset::{builder::DatasetBuilder, ReadParams, WriteMode};
|
use lance::dataset::{ReadParams, WriteMode, builder::DatasetBuilder};
|
||||||
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
use lance::io::{ObjectStore, ObjectStoreParams, WrappingObjectStore};
|
||||||
use lance_datafusion::utils::StreamingWriteSource;
|
use lance_datafusion::utils::StreamingWriteSource;
|
||||||
use lance_encoding::version::LanceFileVersion;
|
use lance_encoding::version::LanceFileVersion;
|
||||||
@@ -669,6 +669,7 @@ impl ListingDatabase {
|
|||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
location: None,
|
location: None,
|
||||||
namespace_client: None,
|
namespace_client: None,
|
||||||
|
managed_versioning: None,
|
||||||
};
|
};
|
||||||
let req = (callback)(req);
|
let req = (callback)(req);
|
||||||
let table = self.open_table(req).await?;
|
let table = self.open_table(req).await?;
|
||||||
@@ -869,6 +870,7 @@ impl Database for ListingDatabase {
|
|||||||
Some(write_params),
|
Some(write_params),
|
||||||
self.read_consistency_interval,
|
self.read_consistency_interval,
|
||||||
request.namespace_client,
|
request.namespace_client,
|
||||||
|
false, // server_side_query_enabled - listing database doesn't support server-side queries
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
@@ -946,7 +948,9 @@ impl Database for ListingDatabase {
|
|||||||
self.store_wrapper.clone(),
|
self.store_wrapper.clone(),
|
||||||
None,
|
None,
|
||||||
self.read_consistency_interval,
|
self.read_consistency_interval,
|
||||||
None,
|
request.namespace_client,
|
||||||
|
false, // server_side_query_enabled - listing database doesn't support server-side queries
|
||||||
|
None, // managed_versioning - will be queried if namespace_client is provided
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
@@ -1022,6 +1026,8 @@ impl Database for ListingDatabase {
|
|||||||
Some(read_params),
|
Some(read_params),
|
||||||
self.read_consistency_interval,
|
self.read_consistency_interval,
|
||||||
request.namespace_client,
|
request.namespace_client,
|
||||||
|
false, // server_side_query_enabled - listing database doesn't support server-side queries
|
||||||
|
request.managed_versioning, // Pass through managed_versioning from request
|
||||||
)
|
)
|
||||||
.await?,
|
.await?,
|
||||||
);
|
);
|
||||||
@@ -1097,11 +1103,11 @@ impl Database for ListingDatabase {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::Table;
|
||||||
use crate::connection::ConnectRequest;
|
use crate::connection::ConnectRequest;
|
||||||
use crate::data::scannable::Scannable;
|
use crate::data::scannable::Scannable;
|
||||||
use crate::database::{CreateTableMode, CreateTableRequest};
|
use crate::database::{CreateTableMode, CreateTableRequest};
|
||||||
use crate::table::WriteOptions;
|
use crate::table::WriteOptions;
|
||||||
use crate::Table;
|
|
||||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@@ -1162,6 +1168,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1222,6 +1229,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1281,6 +1289,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1317,6 +1326,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: false, // Request deep clone
|
is_shallow: false, // Request deep clone
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1357,6 +1367,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1397,6 +1408,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1416,6 +1428,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1452,6 +1465,7 @@ mod tests {
|
|||||||
source_version: Some(1),
|
source_version: Some(1),
|
||||||
source_tag: Some("v1.0".to_string()),
|
source_tag: Some("v1.0".to_string()),
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
@@ -1525,6 +1539,7 @@ mod tests {
|
|||||||
source_version: Some(initial_version),
|
source_version: Some(initial_version),
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1603,6 +1618,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: Some("v1.0".to_string()),
|
source_tag: Some("v1.0".to_string()),
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1654,6 +1670,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@@ -1746,6 +1763,7 @@ mod tests {
|
|||||||
source_version: None,
|
source_version: None,
|
||||||
source_tag: None,
|
source_tag: None,
|
||||||
is_shallow: true,
|
is_shallow: true,
|
||||||
|
namespace_client: None,
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|||||||
@@ -7,18 +7,20 @@ use std::collections::HashMap;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
use lance::io::commit::namespace_manifest::LanceNamespaceExternalManifestStore;
|
||||||
use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor};
|
use lance_io::object_store::{ObjectStoreParams, StorageOptionsAccessor};
|
||||||
use lance_namespace::{
|
use lance_namespace::{
|
||||||
models::{
|
|
||||||
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
|
|
||||||
DeclareTableRequest, DescribeNamespaceRequest, DescribeNamespaceResponse,
|
|
||||||
DescribeTableRequest, DropNamespaceRequest, DropNamespaceResponse, DropTableRequest,
|
|
||||||
ListNamespacesRequest, ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
|
||||||
},
|
|
||||||
LanceNamespace,
|
LanceNamespace,
|
||||||
|
models::{
|
||||||
|
CreateNamespaceRequest, CreateNamespaceResponse, DeclareTableRequest,
|
||||||
|
DescribeNamespaceRequest, DescribeNamespaceResponse, DescribeTableRequest,
|
||||||
|
DropNamespaceRequest, DropNamespaceResponse, DropTableRequest, ListNamespacesRequest,
|
||||||
|
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use lance_namespace_impls::ConnectBuilder;
|
use lance_namespace_impls::ConnectBuilder;
|
||||||
use log::warn;
|
use lance_table::io::commit::CommitHandler;
|
||||||
|
use lance_table::io::commit::external_manifest::ExternalManifestCommitHandler;
|
||||||
|
|
||||||
use crate::database::ReadConsistency;
|
use crate::database::ReadConsistency;
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
@@ -206,83 +208,48 @@ impl Database for LanceNamespaceDatabase {
|
|||||||
let mut table_id = request.namespace.clone();
|
let mut table_id = request.namespace.clone();
|
||||||
table_id.push(request.name.clone());
|
table_id.push(request.name.clone());
|
||||||
|
|
||||||
// Try declare_table first, falling back to create_empty_table for backwards
|
|
||||||
// compatibility with older namespace clients that don't support declare_table
|
|
||||||
let declare_request = DeclareTableRequest {
|
let declare_request = DeclareTableRequest {
|
||||||
id: Some(table_id.clone()),
|
id: Some(table_id.clone()),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
|
|
||||||
let (location, initial_storage_options) =
|
let (location, initial_storage_options, managed_versioning) = {
|
||||||
match self.namespace.declare_table(declare_request).await {
|
let response = self.namespace.declare_table(declare_request).await?;
|
||||||
Ok(response) => {
|
let loc = response.location.ok_or_else(|| Error::Runtime {
|
||||||
let loc = response.location.ok_or_else(|| Error::Runtime {
|
message: "Table location is missing from declare_table response".to_string(),
|
||||||
message: "Table location is missing from declare_table response"
|
})?;
|
||||||
.to_string(),
|
// Use storage options from response, fall back to self.storage_options
|
||||||
})?;
|
let opts = response
|
||||||
// Use storage options from response, fall back to self.storage_options
|
.storage_options
|
||||||
let opts = response
|
.or_else(|| Some(self.storage_options.clone()))
|
||||||
.storage_options
|
.filter(|o| !o.is_empty());
|
||||||
.or_else(|| Some(self.storage_options.clone()))
|
(loc, opts, response.managed_versioning)
|
||||||
.filter(|o| !o.is_empty());
|
};
|
||||||
(loc, opts)
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
// Check if the error is "not supported" and try create_empty_table as fallback
|
|
||||||
let err_str = e.to_string().to_lowercase();
|
|
||||||
if err_str.contains("not supported") || err_str.contains("not implemented") {
|
|
||||||
warn!(
|
|
||||||
"declare_table is not supported by the namespace client, \
|
|
||||||
falling back to deprecated create_empty_table. \
|
|
||||||
create_empty_table is deprecated and will be removed in Lance 3.0.0. \
|
|
||||||
Please upgrade your namespace client to support declare_table."
|
|
||||||
);
|
|
||||||
#[allow(deprecated)]
|
|
||||||
let create_empty_request = CreateEmptyTableRequest {
|
|
||||||
id: Some(table_id.clone()),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
|
|
||||||
#[allow(deprecated)]
|
// Build write params with storage options and commit handler
|
||||||
let create_response = self
|
let mut params = request.write_options.lance_write_params.unwrap_or_default();
|
||||||
.namespace
|
|
||||||
.create_empty_table(create_empty_request)
|
|
||||||
.await
|
|
||||||
.map_err(|e| Error::Runtime {
|
|
||||||
message: format!("Failed to create empty table: {}", e),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let loc = create_response.location.ok_or_else(|| Error::Runtime {
|
// Set up storage options if provided
|
||||||
message: "Table location is missing from create_empty_table response"
|
if let Some(storage_opts) = initial_storage_options {
|
||||||
.to_string(),
|
|
||||||
})?;
|
|
||||||
// For deprecated path, use self.storage_options
|
|
||||||
let opts = if self.storage_options.is_empty() {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(self.storage_options.clone())
|
|
||||||
};
|
|
||||||
(loc, opts)
|
|
||||||
} else {
|
|
||||||
return Err(Error::Runtime {
|
|
||||||
message: format!("Failed to declare table: {}", e),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let write_params = if let Some(storage_opts) = initial_storage_options {
|
|
||||||
let mut params = request.write_options.lance_write_params.unwrap_or_default();
|
|
||||||
let store_params = params
|
let store_params = params
|
||||||
.store_params
|
.store_params
|
||||||
.get_or_insert_with(ObjectStoreParams::default);
|
.get_or_insert_with(ObjectStoreParams::default);
|
||||||
store_params.storage_options_accessor = Some(Arc::new(
|
store_params.storage_options_accessor = Some(Arc::new(
|
||||||
StorageOptionsAccessor::with_static_options(storage_opts),
|
StorageOptionsAccessor::with_static_options(storage_opts),
|
||||||
));
|
));
|
||||||
Some(params)
|
}
|
||||||
} else {
|
|
||||||
request.write_options.lance_write_params
|
// Set up commit handler when managed_versioning is enabled
|
||||||
};
|
if managed_versioning == Some(true) {
|
||||||
|
let external_store =
|
||||||
|
LanceNamespaceExternalManifestStore::new(self.namespace.clone(), table_id.clone());
|
||||||
|
let commit_handler: Arc<dyn CommitHandler> = Arc::new(ExternalManifestCommitHandler {
|
||||||
|
external_manifest_store: Arc::new(external_store),
|
||||||
|
});
|
||||||
|
params.commit_handler = Some(commit_handler);
|
||||||
|
}
|
||||||
|
|
||||||
|
let write_params = Some(params);
|
||||||
|
|
||||||
let native_table = NativeTable::create_from_namespace(
|
let native_table = NativeTable::create_from_namespace(
|
||||||
self.namespace.clone(),
|
self.namespace.clone(),
|
||||||
|
|||||||
@@ -11,16 +11,16 @@ use lance_core::ROW_ID;
|
|||||||
use lance_datafusion::exec::SessionContextExt;
|
use lance_datafusion::exec::SessionContextExt;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
Error, Result, Table,
|
||||||
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
||||||
connect,
|
connect,
|
||||||
database::{CreateTableRequest, Database},
|
database::{CreateTableRequest, Database},
|
||||||
dataloader::permutation::{
|
dataloader::permutation::{
|
||||||
shuffle::{Shuffler, ShufflerConfig},
|
shuffle::{Shuffler, ShufflerConfig},
|
||||||
split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
|
split::{SPLIT_ID_COLUMN, SplitStrategy, Splitter},
|
||||||
util::{rename_column, TemporaryDirectory},
|
util::{TemporaryDirectory, rename_column},
|
||||||
},
|
},
|
||||||
query::{ExecutableQuery, QueryBase, Select},
|
query::{ExecutableQuery, QueryBase, Select},
|
||||||
Error, Result, Table,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const SRC_ROW_ID_COL: &str = "row_id";
|
pub const SRC_ROW_ID_COL: &str = "row_id";
|
||||||
|
|||||||
@@ -25,8 +25,8 @@ use futures::{StreamExt, TryStreamExt};
|
|||||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||||
use lance::io::RecordBatchStream;
|
use lance::io::RecordBatchStream;
|
||||||
use lance_arrow::RecordBatchExt;
|
use lance_arrow::RecordBatchExt;
|
||||||
use lance_core::error::LanceOptionExt;
|
|
||||||
use lance_core::ROW_ID;
|
use lance_core::ROW_ID;
|
||||||
|
use lance_core::error::LanceOptionExt;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -500,10 +500,10 @@ mod tests {
|
|||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
Table,
|
||||||
arrow::SendableRecordBatchStream,
|
arrow::SendableRecordBatchStream,
|
||||||
query::{ExecutableQuery, QueryBase},
|
query::{ExecutableQuery, QueryBase},
|
||||||
test_utils::datagen::{virtual_table, LanceDbDatagenExt},
|
test_utils::datagen::{LanceDbDatagenExt, virtual_table},
|
||||||
Table,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|||||||
@@ -18,12 +18,12 @@ use lance_io::{
|
|||||||
scheduler::{ScanScheduler, SchedulerConfig},
|
scheduler::{ScanScheduler, SchedulerConfig},
|
||||||
utils::CachedFileSize,
|
utils::CachedFileSize,
|
||||||
};
|
};
|
||||||
use rand::{seq::SliceRandom, Rng, RngCore};
|
use rand::{Rng, RngCore, seq::SliceRandom};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
|
||||||
dataloader::permutation::util::{non_crypto_rng, TemporaryDirectory},
|
|
||||||
Error, Result,
|
Error, Result,
|
||||||
|
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||||
|
dataloader::permutation::util::{TemporaryDirectory, non_crypto_rng},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -281,7 +281,7 @@ mod tests {
|
|||||||
use datafusion_expr::col;
|
use datafusion_expr::col;
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed};
|
use lance_datagen::{BatchCount, BatchGeneratorBuilder, ByteCount, RowCount, Seed};
|
||||||
use rand::{rngs::SmallRng, SeedableRng};
|
use rand::{SeedableRng, rngs::SmallRng};
|
||||||
|
|
||||||
fn test_gen() -> BatchGeneratorBuilder {
|
fn test_gen() -> BatchGeneratorBuilder {
|
||||||
lance_datagen::gen_batch()
|
lance_datagen::gen_batch()
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use std::sync::{
|
use std::sync::{
|
||||||
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
|
||||||
Arc,
|
Arc,
|
||||||
|
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
|
||||||
};
|
};
|
||||||
|
|
||||||
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
|
use arrow_array::{Array, BooleanArray, RecordBatch, UInt64Array};
|
||||||
@@ -15,13 +15,13 @@ use lance_arrow::SchemaExt;
|
|||||||
use lance_core::ROW_ID;
|
use lance_core::ROW_ID;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
Error, Result,
|
||||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||||
dataloader::{
|
dataloader::{
|
||||||
permutation::shuffle::{Shuffler, ShufflerConfig},
|
permutation::shuffle::{Shuffler, ShufflerConfig},
|
||||||
permutation::util::TemporaryDirectory,
|
permutation::util::TemporaryDirectory,
|
||||||
},
|
},
|
||||||
query::{Query, QueryBase, Select},
|
query::{Query, QueryBase, Select},
|
||||||
Error, Result,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const SPLIT_ID_COLUMN: &str = "split_id";
|
pub const SPLIT_ID_COLUMN: &str = "split_id";
|
||||||
|
|||||||
@@ -7,12 +7,12 @@ use arrow_array::RecordBatch;
|
|||||||
use arrow_schema::{Fields, Schema};
|
use arrow_schema::{Fields, Schema};
|
||||||
use datafusion_execution::disk_manager::DiskManagerMode;
|
use datafusion_execution::disk_manager::DiskManagerMode;
|
||||||
use futures::TryStreamExt;
|
use futures::TryStreamExt;
|
||||||
use rand::{rngs::SmallRng, RngCore, SeedableRng};
|
use rand::{RngCore, SeedableRng, rngs::SmallRng};
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
|
||||||
Error, Result,
|
Error, Result,
|
||||||
|
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Directory to use for temporary files
|
/// Directory to use for temporary files
|
||||||
|
|||||||
@@ -23,9 +23,9 @@ use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef};
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
Error,
|
||||||
error::Result,
|
error::Result,
|
||||||
table::{ColumnDefinition, ColumnKind, TableDefinition},
|
table::{ColumnDefinition, ColumnKind, TableDefinition},
|
||||||
Error,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Trait for embedding functions
|
/// Trait for embedding functions
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use arrow::array::{AsArray, Float32Builder};
|
|||||||
use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
|
use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
|
||||||
use arrow_data::ArrayData;
|
use arrow_data::ArrayData;
|
||||||
use arrow_schema::DataType;
|
use arrow_schema::DataType;
|
||||||
use serde_json::{json, Value};
|
use serde_json::{Value, json};
|
||||||
|
|
||||||
use super::EmbeddingFunction;
|
use super::EmbeddingFunction;
|
||||||
use crate::{Error, Result};
|
use crate::{Error, Result};
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ use arrow_array::{Array, ArrayRef, FixedSizeListArray, Float32Array};
|
|||||||
use arrow_data::ArrayData;
|
use arrow_data::ArrayData;
|
||||||
use arrow_schema::DataType;
|
use arrow_schema::DataType;
|
||||||
use async_openai::{
|
use async_openai::{
|
||||||
|
Client,
|
||||||
config::OpenAIConfig,
|
config::OpenAIConfig,
|
||||||
types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat},
|
types::{CreateEmbeddingRequest, Embedding, EmbeddingInput, EncodingFormat},
|
||||||
Client,
|
|
||||||
};
|
};
|
||||||
use tokio::{runtime::Handle, task};
|
use tokio::{runtime::Handle, task};
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use super::EmbeddingFunction;
|
|||||||
use arrow::{
|
use arrow::{
|
||||||
array::{AsArray, PrimitiveBuilder},
|
array::{AsArray, PrimitiveBuilder},
|
||||||
datatypes::{
|
datatypes::{
|
||||||
ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt32Type, UInt8Type,
|
ArrowPrimitiveType, Float16Type, Float32Type, Float64Type, Int64Type, UInt8Type, UInt32Type,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use arrow_array::{Array, FixedSizeListArray, PrimitiveArray};
|
use arrow_array::{Array, FixedSizeListArray, PrimitiveArray};
|
||||||
@@ -16,8 +16,8 @@ use arrow_schema::DataType;
|
|||||||
use candle_core::{CpuStorage, Device, Layout, Storage, Tensor};
|
use candle_core::{CpuStorage, Device, Layout, Storage, Tensor};
|
||||||
use candle_nn::VarBuilder;
|
use candle_nn::VarBuilder;
|
||||||
use candle_transformers::models::bert::{BertModel, DTYPE};
|
use candle_transformers::models::bert::{BertModel, DTYPE};
|
||||||
use hf_hub::{api::sync::Api, Repo, RepoType};
|
use hf_hub::{Repo, RepoType, api::sync::Api};
|
||||||
use tokenizers::{tokenizer::Tokenizer, PaddingParams};
|
use tokenizers::{PaddingParams, tokenizer::Tokenizer};
|
||||||
|
|
||||||
/// Compute embeddings using huggingface sentence-transformers.
|
/// Compute embeddings using huggingface sentence-transformers.
|
||||||
pub struct SentenceTransformersEmbeddingsBuilder {
|
pub struct SentenceTransformersEmbeddingsBuilder {
|
||||||
@@ -230,7 +230,7 @@ impl SentenceTransformersEmbeddings {
|
|||||||
Storage::Cpu(CpuStorage::BF16(_)) => {
|
Storage::Cpu(CpuStorage::BF16(_)) => {
|
||||||
return Err(crate::Error::Runtime {
|
return Err(crate::Error::Runtime {
|
||||||
message: "unsupported data type".to_string(),
|
message: "unsupported data type".to_string(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
_ => unreachable!("we already moved the tensor to the CPU device"),
|
_ => unreachable!("we already moved the tensor to the CPU device"),
|
||||||
};
|
};
|
||||||
@@ -298,12 +298,12 @@ impl SentenceTransformersEmbeddings {
|
|||||||
DataType::Utf8View => {
|
DataType::Utf8View => {
|
||||||
return Err(crate::Error::Runtime {
|
return Err(crate::Error::Runtime {
|
||||||
message: "Utf8View not yet implemented".to_string(),
|
message: "Utf8View not yet implemented".to_string(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
return Err(crate::Error::Runtime {
|
return Err(crate::Error::Runtime {
|
||||||
message: "invalid type".to_string(),
|
message: "invalid type".to_string(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ pub use sql::expr_to_sql_string;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use arrow_schema::DataType;
|
use arrow_schema::DataType;
|
||||||
use datafusion_expr::{expr_fn::cast, Expr, ScalarUDF};
|
use datafusion_expr::{Expr, ScalarUDF, expr_fn::cast};
|
||||||
use datafusion_functions::string::expr_fn as string_expr_fn;
|
use datafusion_functions::string::expr_fn as string_expr_fn;
|
||||||
|
|
||||||
pub use datafusion_expr::{col, lit};
|
pub use datafusion_expr::{col, lit};
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ use std::time::Duration;
|
|||||||
use vector::IvfFlatIndexBuilder;
|
use vector::IvfFlatIndexBuilder;
|
||||||
|
|
||||||
use crate::index::vector::IvfRqIndexBuilder;
|
use crate::index::vector::IvfRqIndexBuilder;
|
||||||
use crate::{table::BaseTable, DistanceType, Error, Result};
|
use crate::{DistanceType, Error, Result, table::BaseTable};
|
||||||
|
|
||||||
use self::{
|
use self::{
|
||||||
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
|
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
|
||||||
|
|||||||
@@ -27,7 +27,7 @@
|
|||||||
///
|
///
|
||||||
/// The btree index does not currently have any parameters though parameters such as the
|
/// The btree index does not currently have any parameters though parameters such as the
|
||||||
/// block size may be added in the future.
|
/// block size may be added in the future.
|
||||||
#[derive(Default, Debug, Clone)]
|
#[derive(Default, Debug, Clone, serde::Serialize)]
|
||||||
pub struct BTreeIndexBuilder {}
|
pub struct BTreeIndexBuilder {}
|
||||||
|
|
||||||
impl BTreeIndexBuilder {}
|
impl BTreeIndexBuilder {}
|
||||||
@@ -39,7 +39,7 @@ impl BTreeIndexBuilder {}
|
|||||||
/// This index works best for low-cardinality (i.e., less than 1000 unique values) columns,
|
/// This index works best for low-cardinality (i.e., less than 1000 unique values) columns,
|
||||||
/// where the number of unique values is small.
|
/// where the number of unique values is small.
|
||||||
/// The bitmap stores a list of row ids where the value is present.
|
/// The bitmap stores a list of row ids where the value is present.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default, serde::Serialize)]
|
||||||
pub struct BitmapIndexBuilder {}
|
pub struct BitmapIndexBuilder {}
|
||||||
|
|
||||||
/// Builder for LabelList index.
|
/// Builder for LabelList index.
|
||||||
@@ -48,10 +48,10 @@ pub struct BitmapIndexBuilder {}
|
|||||||
/// support queries with `array_contains_all` and `array_contains_any`
|
/// support queries with `array_contains_all` and `array_contains_any`
|
||||||
/// using an underlying bitmap index.
|
/// using an underlying bitmap index.
|
||||||
///
|
///
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default, serde::Serialize)]
|
||||||
pub struct LabelListIndexBuilder {}
|
pub struct LabelListIndexBuilder {}
|
||||||
|
|
||||||
pub use lance_index::scalar::inverted::query::*;
|
|
||||||
pub use lance_index::scalar::FullTextSearchQuery;
|
pub use lance_index::scalar::FullTextSearchQuery;
|
||||||
pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder;
|
pub use lance_index::scalar::InvertedIndexParams as FtsIndexBuilder;
|
||||||
pub use lance_index::scalar::InvertedIndexParams;
|
pub use lance_index::scalar::InvertedIndexParams;
|
||||||
|
pub use lance_index::scalar::inverted::query::*;
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
//! Vector indices are only supported on fixed-size-list (tensor) columns of floating point
|
//! Vector indices are only supported on fixed-size-list (tensor) columns of floating point
|
||||||
//! values
|
//! values
|
||||||
use lance::table::format::{IndexMetadata, Manifest};
|
use lance::table::format::{IndexMetadata, Manifest};
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::DistanceType;
|
use crate::DistanceType;
|
||||||
|
|
||||||
@@ -181,14 +182,17 @@ macro_rules! impl_hnsw_params_setter {
|
|||||||
/// The partitioning process is called IVF and the `num_partitions` parameter controls how many groups to create.
|
/// The partitioning process is called IVF and the `num_partitions` parameter controls how many groups to create.
|
||||||
///
|
///
|
||||||
/// Note that training an IVF Flat index on a large dataset is a slow operation and currently is also a memory intensive operation.
|
/// Note that training an IVF Flat index on a large dataset is a slow operation and currently is also a memory intensive operation.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfFlatIndexBuilder {
|
pub struct IvfFlatIndexBuilder {
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,14 +217,17 @@ impl IvfFlatIndexBuilder {
|
|||||||
///
|
///
|
||||||
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
|
/// This index compresses vectors using scalar quantization and groups them into IVF partitions.
|
||||||
/// It offers a balance between search performance and storage footprint.
|
/// It offers a balance between search performance and storage footprint.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfSqIndexBuilder {
|
pub struct IvfSqIndexBuilder {
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -261,18 +268,23 @@ impl IvfSqIndexBuilder {
|
|||||||
///
|
///
|
||||||
/// Note that training an IVF PQ index on a large dataset is a slow operation and
|
/// Note that training an IVF PQ index on a large dataset is a slow operation and
|
||||||
/// currently is also a memory intensive operation.
|
/// currently is also a memory intensive operation.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfPqIndexBuilder {
|
pub struct IvfPqIndexBuilder {
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// PQ
|
// PQ
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_sub_vectors: Option<u32>,
|
pub(crate) num_sub_vectors: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_bits: Option<u32>,
|
pub(crate) num_bits: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -323,14 +335,18 @@ pub(crate) fn suggested_num_sub_vectors(dim: u32) -> u32 {
|
|||||||
///
|
///
|
||||||
/// Note that training an IVF RQ index on a large dataset is a slow operation and
|
/// Note that training an IVF RQ index on a large dataset is a slow operation and
|
||||||
/// currently is also a memory intensive operation.
|
/// currently is also a memory intensive operation.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfRqIndexBuilder {
|
pub struct IvfRqIndexBuilder {
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_bits: Option<u32>,
|
pub(crate) num_bits: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -365,13 +381,16 @@ impl IvfRqIndexBuilder {
|
|||||||
/// quickly find the closest vectors to a query vector.
|
/// quickly find the closest vectors to a query vector.
|
||||||
///
|
///
|
||||||
/// The PQ (product quantizer) is used to compress the vectors as the same as IVF PQ.
|
/// The PQ (product quantizer) is used to compress the vectors as the same as IVF PQ.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfHnswPqIndexBuilder {
|
pub struct IvfHnswPqIndexBuilder {
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
@@ -379,7 +398,9 @@ pub struct IvfHnswPqIndexBuilder {
|
|||||||
pub(crate) ef_construction: u32,
|
pub(crate) ef_construction: u32,
|
||||||
|
|
||||||
// PQ
|
// PQ
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_sub_vectors: Option<u32>,
|
pub(crate) num_sub_vectors: Option<u32>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_bits: Option<u32>,
|
pub(crate) num_bits: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -415,13 +436,16 @@ impl IvfHnswPqIndexBuilder {
|
|||||||
///
|
///
|
||||||
/// The SQ (scalar quantizer) is used to compress the vectors,
|
/// The SQ (scalar quantizer) is used to compress the vectors,
|
||||||
/// each vector is mapped to a 8-bit integer vector, 4x compression ratio for float32 vector.
|
/// each vector is mapped to a 8-bit integer vector, 4x compression ratio for float32 vector.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct IvfHnswSqIndexBuilder {
|
pub struct IvfHnswSqIndexBuilder {
|
||||||
// IVF
|
// IVF
|
||||||
|
#[serde(rename = "metric_type")]
|
||||||
pub(crate) distance_type: DistanceType,
|
pub(crate) distance_type: DistanceType,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) num_partitions: Option<u32>,
|
pub(crate) num_partitions: Option<u32>,
|
||||||
pub(crate) sample_rate: u32,
|
pub(crate) sample_rate: u32,
|
||||||
pub(crate) max_iterations: u32,
|
pub(crate) max_iterations: u32,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub(crate) target_partition_size: Option<u32>,
|
pub(crate) target_partition_size: Option<u32>,
|
||||||
|
|
||||||
// HNSW
|
// HNSW
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
|
use crate::Error;
|
||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
use crate::Error;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
|
|||||||
@@ -5,11 +5,11 @@
|
|||||||
|
|
||||||
use std::{fmt::Formatter, sync::Arc};
|
use std::{fmt::Formatter, sync::Arc};
|
||||||
|
|
||||||
use futures::{stream::BoxStream, TryFutureExt};
|
use futures::{TryFutureExt, stream::BoxStream};
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
path::Path, Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
Error, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart,
|
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result, UploadPart, path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
|
|||||||
@@ -10,8 +10,9 @@ use bytes::Bytes;
|
|||||||
use futures::stream::BoxStream;
|
use futures::stream::BoxStream;
|
||||||
use lance::io::WrappingObjectStore;
|
use lance::io::WrappingObjectStore;
|
||||||
use object_store::{
|
use object_store::{
|
||||||
path::Path, GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore,
|
||||||
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
PutMultipartOptions, PutOptions, PutPayload, PutResult, Result as OSResult, UploadPart,
|
||||||
|
path::Path,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, Default)]
|
#[derive(Debug, Default)]
|
||||||
|
|||||||
@@ -5,26 +5,26 @@ use std::sync::Arc;
|
|||||||
use std::{future::Future, time::Duration};
|
use std::{future::Future, time::Duration};
|
||||||
|
|
||||||
use arrow::compute::concat_batches;
|
use arrow::compute::concat_batches;
|
||||||
use arrow_array::{make_array, Array, Float16Array, Float32Array, Float64Array};
|
use arrow_array::{Array, Float16Array, Float32Array, Float64Array, make_array};
|
||||||
use arrow_schema::{DataType, SchemaRef};
|
use arrow_schema::{DataType, SchemaRef};
|
||||||
use datafusion_expr::Expr;
|
use datafusion_expr::Expr;
|
||||||
use datafusion_physical_plan::ExecutionPlan;
|
use datafusion_physical_plan::ExecutionPlan;
|
||||||
use futures::{stream, try_join, FutureExt, TryFutureExt, TryStreamExt};
|
use futures::{FutureExt, TryFutureExt, TryStreamExt, stream, try_join};
|
||||||
use half::f16;
|
use half::f16;
|
||||||
use lance::dataset::{scanner::DatasetRecordBatchStream, ROW_ID};
|
use lance::dataset::{ROW_ID, scanner::DatasetRecordBatchStream};
|
||||||
use lance_arrow::RecordBatchExt;
|
use lance_arrow::RecordBatchExt;
|
||||||
use lance_datafusion::exec::execute_plan;
|
use lance_datafusion::exec::execute_plan;
|
||||||
use lance_index::scalar::inverted::SCORE_COL;
|
|
||||||
use lance_index::scalar::FullTextSearchQuery;
|
use lance_index::scalar::FullTextSearchQuery;
|
||||||
|
use lance_index::scalar::inverted::SCORE_COL;
|
||||||
use lance_index::vector::DIST_COL;
|
use lance_index::vector::DIST_COL;
|
||||||
use lance_io::stream::RecordBatchStreamAdapter;
|
use lance_io::stream::RecordBatchStreamAdapter;
|
||||||
|
|
||||||
|
use crate::DistanceType;
|
||||||
use crate::error::{Error, Result};
|
use crate::error::{Error, Result};
|
||||||
use crate::rerankers::rrf::RRFReranker;
|
use crate::rerankers::rrf::RRFReranker;
|
||||||
use crate::rerankers::{check_reranker_result, NormalizeMethod, Reranker};
|
use crate::rerankers::{NormalizeMethod, Reranker, check_reranker_result};
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
use crate::utils::TimeoutStream;
|
use crate::utils::TimeoutStream;
|
||||||
use crate::DistanceType;
|
|
||||||
use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};
|
use crate::{arrow::SendableRecordBatchStream, table::AnyQuery};
|
||||||
|
|
||||||
mod hybrid;
|
mod hybrid;
|
||||||
@@ -161,10 +161,11 @@ impl IntoQueryVector for &dyn Array {
|
|||||||
if data_type != self.data_type() {
|
if data_type != self.data_type() {
|
||||||
Err(Error::InvalidInput {
|
Err(Error::InvalidInput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
|
"failed to create query vector, the input data type was {:?} but the expected data type was {:?}",
|
||||||
self.data_type(),
|
self.data_type(),
|
||||||
data_type
|
data_type
|
||||||
)})
|
),
|
||||||
|
})
|
||||||
} else {
|
} else {
|
||||||
let data = self.to_data();
|
let data = self.to_data();
|
||||||
Ok(make_array(data))
|
Ok(make_array(data))
|
||||||
@@ -186,7 +187,7 @@ impl IntoQueryVector for &[f16] {
|
|||||||
DataType::Float32 => {
|
DataType::Float32 => {
|
||||||
let arr: Vec<f32> = self.iter().map(|x| f32::from(*x)).collect();
|
let arr: Vec<f32> = self.iter().map(|x| f32::from(*x)).collect();
|
||||||
Ok(Arc::new(Float32Array::from(arr)))
|
Ok(Arc::new(Float32Array::from(arr)))
|
||||||
},
|
}
|
||||||
DataType::Float64 => {
|
DataType::Float64 => {
|
||||||
let arr: Vec<f64> = self.iter().map(|x| f64::from(*x)).collect();
|
let arr: Vec<f64> = self.iter().map(|x| f64::from(*x)).collect();
|
||||||
Ok(Arc::new(Float64Array::from(arr)))
|
Ok(Arc::new(Float64Array::from(arr)))
|
||||||
@@ -194,8 +195,7 @@ impl IntoQueryVector for &[f16] {
|
|||||||
_ => Err(Error::InvalidInput {
|
_ => Err(Error::InvalidInput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}",
|
"failed to create query vector, the input data type was &[f16] but the embedding model \"{}\" expected data type {:?}",
|
||||||
embedding_model_label,
|
embedding_model_label, data_type
|
||||||
data_type
|
|
||||||
),
|
),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@@ -216,7 +216,7 @@ impl IntoQueryVector for &[f32] {
|
|||||||
DataType::Float32 => {
|
DataType::Float32 => {
|
||||||
let arr: Vec<f32> = self.to_vec();
|
let arr: Vec<f32> = self.to_vec();
|
||||||
Ok(Arc::new(Float32Array::from(arr)))
|
Ok(Arc::new(Float32Array::from(arr)))
|
||||||
},
|
}
|
||||||
DataType::Float64 => {
|
DataType::Float64 => {
|
||||||
let arr: Vec<f64> = self.iter().map(|x| *x as f64).collect();
|
let arr: Vec<f64> = self.iter().map(|x| *x as f64).collect();
|
||||||
Ok(Arc::new(Float64Array::from(arr)))
|
Ok(Arc::new(Float64Array::from(arr)))
|
||||||
@@ -224,8 +224,7 @@ impl IntoQueryVector for &[f32] {
|
|||||||
_ => Err(Error::InvalidInput {
|
_ => Err(Error::InvalidInput {
|
||||||
message: format!(
|
message: format!(
|
||||||
"failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}",
|
"failed to create query vector, the input data type was &[f32] but the embedding model \"{}\" expected data type {:?}",
|
||||||
embedding_model_label,
|
embedding_model_label, data_type
|
||||||
data_type
|
|
||||||
),
|
),
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
@@ -239,26 +238,25 @@ impl IntoQueryVector for &[f64] {
|
|||||||
embedding_model_label: &str,
|
embedding_model_label: &str,
|
||||||
) -> Result<Arc<dyn Array>> {
|
) -> Result<Arc<dyn Array>> {
|
||||||
match data_type {
|
match data_type {
|
||||||
DataType::Float16 => {
|
DataType::Float16 => {
|
||||||
let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
|
let arr: Vec<f16> = self.iter().map(|x| f16::from_f64(*x)).collect();
|
||||||
Ok(Arc::new(Float16Array::from(arr)))
|
Ok(Arc::new(Float16Array::from(arr)))
|
||||||
}
|
|
||||||
DataType::Float32 => {
|
|
||||||
let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
|
|
||||||
Ok(Arc::new(Float32Array::from(arr)))
|
|
||||||
},
|
|
||||||
DataType::Float64 => {
|
|
||||||
let arr: Vec<f64> = self.to_vec();
|
|
||||||
Ok(Arc::new(Float64Array::from(arr)))
|
|
||||||
}
|
|
||||||
_ => Err(Error::InvalidInput {
|
|
||||||
message: format!(
|
|
||||||
"failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
|
|
||||||
embedding_model_label,
|
|
||||||
data_type
|
|
||||||
),
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
|
DataType::Float32 => {
|
||||||
|
let arr: Vec<f32> = self.iter().map(|x| *x as f32).collect();
|
||||||
|
Ok(Arc::new(Float32Array::from(arr)))
|
||||||
|
}
|
||||||
|
DataType::Float64 => {
|
||||||
|
let arr: Vec<f64> = self.to_vec();
|
||||||
|
Ok(Arc::new(Float64Array::from(arr)))
|
||||||
|
}
|
||||||
|
_ => Err(Error::InvalidInput {
|
||||||
|
message: format!(
|
||||||
|
"failed to create query vector, the input data type was &[f64] but the embedding model \"{}\" expected data type {:?}",
|
||||||
|
embedding_model_label, data_type
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1011,13 +1009,13 @@ impl VectorQuery {
|
|||||||
message: "minimum_nprobes must be greater than 0".to_string(),
|
message: "minimum_nprobes must be greater than 0".to_string(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if let Some(maximum_nprobes) = self.request.maximum_nprobes {
|
if let Some(maximum_nprobes) = self.request.maximum_nprobes
|
||||||
if minimum_nprobes > maximum_nprobes {
|
&& minimum_nprobes > maximum_nprobes
|
||||||
return Err(Error::InvalidInput {
|
{
|
||||||
message: "minimum_nprobes must be less than or equal to maximum_nprobes"
|
return Err(Error::InvalidInput {
|
||||||
.to_string(),
|
message: "minimum_nprobes must be less than or equal to maximum_nprobes"
|
||||||
});
|
.to_string(),
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
self.request.minimum_nprobes = minimum_nprobes;
|
self.request.minimum_nprobes = minimum_nprobes;
|
||||||
Ok(self)
|
Ok(self)
|
||||||
@@ -1407,8 +1405,8 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type};
|
use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type};
|
||||||
use arrow_array::{
|
use arrow_array::{
|
||||||
cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array,
|
FixedSizeListArray, Float32Array, Int32Array, RecordBatch, StringArray, cast::AsArray,
|
||||||
RecordBatch, StringArray,
|
types::Float32Type,
|
||||||
};
|
};
|
||||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||||
use futures::{StreamExt, TryStreamExt};
|
use futures::{StreamExt, TryStreamExt};
|
||||||
@@ -1416,7 +1414,7 @@ mod tests {
|
|||||||
use rand::seq::IndexedRandom;
|
use rand::seq::IndexedRandom;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
use crate::{connect, database::CreateTableMode, index::Index, Table};
|
use crate::{Table, connect, database::CreateTableMode, index::Index};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_setters_getters() {
|
async fn test_setters_getters() {
|
||||||
@@ -1754,11 +1752,13 @@ mod tests {
|
|||||||
.limit(1)
|
.limit(1)
|
||||||
.execute()
|
.execute()
|
||||||
.await;
|
.await;
|
||||||
assert!(error_result
|
assert!(
|
||||||
.err()
|
error_result
|
||||||
.unwrap()
|
.err()
|
||||||
.to_string()
|
.unwrap()
|
||||||
.contains("No vector column found to match with the query vector dimension: 3"));
|
.to_string()
|
||||||
|
.contains("No vector column found to match with the query vector dimension: 3")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -2010,7 +2010,7 @@ mod tests {
|
|||||||
|
|
||||||
// Sample 1 - 3 tokens for each string value
|
// Sample 1 - 3 tokens for each string value
|
||||||
let tokens = ["a", "b", "c", "d", "e"];
|
let tokens = ["a", "b", "c", "d", "e"];
|
||||||
use rand::{rng, Rng};
|
use rand::{Rng, rng};
|
||||||
|
|
||||||
let mut rng = rng();
|
let mut rng = rng();
|
||||||
let text: StringArray = (0..nrows)
|
let text: StringArray = (0..nrows)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use arrow::compute::{
|
|||||||
kernels::numeric::{div, sub},
|
kernels::numeric::{div, sub},
|
||||||
max, min,
|
max, min,
|
||||||
};
|
};
|
||||||
use arrow_array::{cast::downcast_array, Float32Array, RecordBatch};
|
use arrow_array::{Float32Array, RecordBatch, cast::downcast_array};
|
||||||
use arrow_schema::{DataType, Field, Schema, SortOptions};
|
use arrow_schema::{DataType, Field, Schema, SortOptions};
|
||||||
use lance::dataset::ROW_ID;
|
use lance::dataset::ROW_ID;
|
||||||
use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL};
|
use lance_index::{scalar::inverted::SCORE_COL, vector::DIST_COL};
|
||||||
@@ -253,7 +253,10 @@ mod test {
|
|||||||
let result = rank(batch.clone(), "bad_col", None);
|
let result = rank(batch.clone(), "bad_col", None);
|
||||||
match result {
|
match result {
|
||||||
Err(Error::InvalidInput { message }) => {
|
Err(Error::InvalidInput { message }) => {
|
||||||
assert_eq!("expected column bad_col not found in rank. found columns [\"name\", \"score\"]", message);
|
assert_eq!(
|
||||||
|
"expected column bad_col not found in rank. found columns [\"name\", \"score\"]",
|
||||||
|
message
|
||||||
|
);
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
panic!("expected invalid input error, received {:?}", result)
|
panic!("expected invalid input error, received {:?}", result)
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
use http::HeaderName;
|
use http::HeaderName;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use reqwest::{
|
use reqwest::{
|
||||||
header::{HeaderMap, HeaderValue},
|
|
||||||
Body, Request, RequestBuilder, Response,
|
Body, Request, RequestBuilder, Response,
|
||||||
|
header::{HeaderMap, HeaderValue},
|
||||||
};
|
};
|
||||||
use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration};
|
use std::{collections::HashMap, future::Future, str::FromStr, sync::Arc, time::Duration};
|
||||||
|
|
||||||
@@ -446,13 +446,23 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
})?,
|
})?,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if let Some(v) = options.0.get("azure_storage_account_name") {
|
// Map azure storage options to x-azure-* headers.
|
||||||
headers.insert(
|
// The option key uses underscores (e.g. "azure_client_id") while the
|
||||||
HeaderName::from_static("x-azure-storage-account-name"),
|
// header uses hyphens (e.g. "x-azure-client-id").
|
||||||
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
let azure_opts: [(&str, &str); 3] = [
|
||||||
message: format!("non-ascii storage account name '{}' provided", db_name),
|
("azure_storage_account_name", "x-azure-storage-account-name"),
|
||||||
})?,
|
("azure_client_id", "x-azure-client-id"),
|
||||||
);
|
("azure_tenant_id", "x-azure-tenant-id"),
|
||||||
|
];
|
||||||
|
for (opt_key, header_name) in azure_opts {
|
||||||
|
if let Some(v) = options.0.get(opt_key) {
|
||||||
|
headers.insert(
|
||||||
|
HeaderName::from_static(header_name),
|
||||||
|
HeaderValue::from_str(v).map_err(|_| Error::InvalidInput {
|
||||||
|
message: format!("non-ascii value '{}' for option '{}'", v, opt_key),
|
||||||
|
})?,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (key, value) in &config.extra_headers {
|
for (key, value) in &config.extra_headers {
|
||||||
@@ -650,14 +660,13 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
|||||||
pub fn extract_request_id(&self, request: &mut Request) -> String {
|
pub fn extract_request_id(&self, request: &mut Request) -> String {
|
||||||
// Set a request id.
|
// Set a request id.
|
||||||
// TODO: allow the user to supply this, through middleware?
|
// TODO: allow the user to supply this, through middleware?
|
||||||
let request_id = if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
|
if let Some(request_id) = request.headers().get(REQUEST_ID_HEADER) {
|
||||||
request_id.to_str().unwrap().to_string()
|
request_id.to_str().unwrap().to_string()
|
||||||
} else {
|
} else {
|
||||||
let request_id = uuid::Uuid::new_v4().to_string();
|
let request_id = uuid::Uuid::new_v4().to_string();
|
||||||
self.set_request_id(request, &request_id);
|
self.set_request_id(request, &request_id);
|
||||||
request_id
|
request_id
|
||||||
};
|
}
|
||||||
request_id
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the request ID header
|
/// Set the request ID header
|
||||||
@@ -1076,4 +1085,34 @@ mod tests {
|
|||||||
_ => panic!("Expected Runtime error"),
|
_ => panic!("Expected Runtime error"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_default_headers_azure_opts() {
|
||||||
|
let mut opts = HashMap::new();
|
||||||
|
opts.insert(
|
||||||
|
"azure_storage_account_name".to_string(),
|
||||||
|
"myaccount".to_string(),
|
||||||
|
);
|
||||||
|
opts.insert("azure_client_id".to_string(), "my-client-id".to_string());
|
||||||
|
opts.insert("azure_tenant_id".to_string(), "my-tenant-id".to_string());
|
||||||
|
let remote_opts = RemoteOptions::new(opts);
|
||||||
|
|
||||||
|
let headers = RestfulLanceDbClient::<Sender>::default_headers(
|
||||||
|
"test-key",
|
||||||
|
"us-east-1",
|
||||||
|
"testdb",
|
||||||
|
false,
|
||||||
|
&remote_opts,
|
||||||
|
None,
|
||||||
|
&ClientConfig::default(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
headers.get("x-azure-storage-account-name").unwrap(),
|
||||||
|
"myaccount"
|
||||||
|
);
|
||||||
|
assert_eq!(headers.get("x-azure-client-id").unwrap(), "my-client-id");
|
||||||
|
assert_eq!(headers.get("x-azure-tenant-id").unwrap(), "my-tenant-id");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ use lance_namespace::models::{
|
|||||||
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
ListNamespacesResponse, ListTablesRequest, ListTablesResponse,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::Error;
|
||||||
use crate::database::{
|
use crate::database::{
|
||||||
CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||||
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||||
@@ -23,12 +24,11 @@ use crate::database::{
|
|||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::remote::util::stream_as_body;
|
use crate::remote::util::stream_as_body;
|
||||||
use crate::table::BaseTable;
|
use crate::table::BaseTable;
|
||||||
use crate::Error;
|
|
||||||
|
|
||||||
|
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||||
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
|
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
|
||||||
use super::table::RemoteTable;
|
use super::table::RemoteTable;
|
||||||
use super::util::parse_server_version;
|
use super::util::parse_server_version;
|
||||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
|
||||||
|
|
||||||
// Request structure for the remote clone table API
|
// Request structure for the remote clone table API
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
@@ -249,9 +249,9 @@ impl RemoteDatabase {
|
|||||||
#[cfg(all(test, feature = "remote"))]
|
#[cfg(all(test, feature = "remote"))]
|
||||||
mod test_utils {
|
mod test_utils {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::remote::ClientConfig;
|
||||||
use crate::remote::client::test_utils::MockSender;
|
use crate::remote::client::test_utils::MockSender;
|
||||||
use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config};
|
use crate::remote::client::test_utils::{client_with_handler, client_with_handler_and_config};
|
||||||
use crate::remote::ClientConfig;
|
|
||||||
|
|
||||||
impl RemoteDatabase<MockSender> {
|
impl RemoteDatabase<MockSender> {
|
||||||
pub fn new_mock<F, T>(handler: F) -> Self
|
pub fn new_mock<F, T>(handler: F) -> Self
|
||||||
@@ -464,6 +464,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
|||||||
lance_read_params: None,
|
lance_read_params: None,
|
||||||
location: None,
|
location: None,
|
||||||
namespace_client: None,
|
namespace_client: None,
|
||||||
|
managed_versioning: None,
|
||||||
};
|
};
|
||||||
let req = (callback)(req);
|
let req = (callback)(req);
|
||||||
self.open_table(req).await
|
self.open_table(req).await
|
||||||
@@ -777,7 +778,12 @@ impl RemoteOptions {
|
|||||||
|
|
||||||
impl From<StorageOptions> for RemoteOptions {
|
impl From<StorageOptions> for RemoteOptions {
|
||||||
fn from(options: StorageOptions) -> Self {
|
fn from(options: StorageOptions) -> Self {
|
||||||
let supported_opts = vec!["account_name", "azure_storage_account_name"];
|
let supported_opts = vec![
|
||||||
|
"account_name",
|
||||||
|
"azure_storage_account_name",
|
||||||
|
"azure_client_id",
|
||||||
|
"azure_tenant_id",
|
||||||
|
];
|
||||||
let mut filtered = HashMap::new();
|
let mut filtered = HashMap::new();
|
||||||
for opt in supported_opts {
|
for opt in supported_opts {
|
||||||
if let Some(v) = options.0.get(opt) {
|
if let Some(v) = options.0.get(opt) {
|
||||||
@@ -799,9 +805,9 @@ mod tests {
|
|||||||
|
|
||||||
use crate::connection::ConnectBuilder;
|
use crate::connection::ConnectBuilder;
|
||||||
use crate::{
|
use crate::{
|
||||||
database::CreateTableMode,
|
|
||||||
remote::{ClientConfig, HeaderProvider, ARROW_STREAM_CONTENT_TYPE, JSON_CONTENT_TYPE},
|
|
||||||
Connection, Error,
|
Connection, Error,
|
||||||
|
database::CreateTableMode,
|
||||||
|
remote::{ARROW_STREAM_CONTENT_TYPE, ClientConfig, HeaderProvider, JSON_CONTENT_TYPE},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
// SPDX-License-Identifier: Apache-2.0
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||||
|
|
||||||
use crate::remote::RetryConfig;
|
|
||||||
use crate::Error;
|
use crate::Error;
|
||||||
|
use crate::remote::RetryConfig;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
|
|||||||
@@ -6,15 +6,14 @@ pub mod insert;
|
|||||||
use self::insert::RemoteInsertExec;
|
use self::insert::RemoteInsertExec;
|
||||||
use crate::expr::expr_to_sql_string;
|
use crate::expr::expr_to_sql_string;
|
||||||
|
|
||||||
|
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||||
use super::client::RequestResultExt;
|
use super::client::RequestResultExt;
|
||||||
use super::client::{HttpSend, RestfulLanceDbClient, Sender};
|
use super::client::{HttpSend, RestfulLanceDbClient, Sender};
|
||||||
use super::db::ServerVersion;
|
use super::db::ServerVersion;
|
||||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
|
||||||
use crate::index::waiter::wait_for_index;
|
|
||||||
use crate::index::Index;
|
use crate::index::Index;
|
||||||
use crate::index::IndexStatistics;
|
use crate::index::IndexStatistics;
|
||||||
|
use crate::index::waiter::wait_for_index;
|
||||||
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
use crate::query::{QueryFilter, QueryRequest, Select, VectorQueryRequest};
|
||||||
use crate::table::query::create_multi_vector_plan;
|
|
||||||
use crate::table::AddColumnsResult;
|
use crate::table::AddColumnsResult;
|
||||||
use crate::table::AddResult;
|
use crate::table::AddResult;
|
||||||
use crate::table::AlterColumnsResult;
|
use crate::table::AlterColumnsResult;
|
||||||
@@ -23,19 +22,20 @@ use crate::table::DropColumnsResult;
|
|||||||
use crate::table::MergeResult;
|
use crate::table::MergeResult;
|
||||||
use crate::table::Tags;
|
use crate::table::Tags;
|
||||||
use crate::table::UpdateResult;
|
use crate::table::UpdateResult;
|
||||||
|
use crate::table::query::create_multi_vector_plan;
|
||||||
use crate::table::{AnyQuery, Filter, TableStatistics};
|
use crate::table::{AnyQuery, Filter, TableStatistics};
|
||||||
use crate::utils::background_cache::BackgroundCache;
|
use crate::utils::background_cache::BackgroundCache;
|
||||||
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
|
||||||
|
use crate::{DistanceType, Error};
|
||||||
use crate::{
|
use crate::{
|
||||||
error::Result,
|
error::Result,
|
||||||
index::{IndexBuilder, IndexConfig},
|
index::{IndexBuilder, IndexConfig},
|
||||||
query::QueryExecutionOptions,
|
query::QueryExecutionOptions,
|
||||||
table::{
|
table::{
|
||||||
merge::MergeInsertBuilder, AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats,
|
AddDataBuilder, BaseTable, OptimizeAction, OptimizeStats, TableDefinition, UpdateBuilder,
|
||||||
TableDefinition, UpdateBuilder,
|
merge::MergeInsertBuilder,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use crate::{DistanceType, Error};
|
|
||||||
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||||
use arrow_ipc::reader::FileReader;
|
use arrow_ipc::reader::FileReader;
|
||||||
use arrow_schema::{DataType, SchemaRef};
|
use arrow_schema::{DataType, SchemaRef};
|
||||||
@@ -50,7 +50,7 @@ use lance::arrow::json::{JsonDataType, JsonSchema};
|
|||||||
use lance::dataset::refs::TagContents;
|
use lance::dataset::refs::TagContents;
|
||||||
use lance::dataset::scanner::DatasetRecordBatchStream;
|
use lance::dataset::scanner::DatasetRecordBatchStream;
|
||||||
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
|
||||||
use lance_datafusion::exec::{execute_plan, OneShotExec};
|
use lance_datafusion::exec::{OneShotExec, execute_plan};
|
||||||
use reqwest::{RequestBuilder, Response};
|
use reqwest::{RequestBuilder, Response};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Number;
|
use serde_json::Number;
|
||||||
@@ -612,8 +612,8 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
message: format!(
|
message: format!(
|
||||||
"Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
|
"Cannot mutate table reference fixed at version {}. Call checkout_latest() to get a mutable table reference.",
|
||||||
version
|
version
|
||||||
)
|
),
|
||||||
})
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -697,10 +697,10 @@ impl<S: HttpSend> RemoteTable<S> {
|
|||||||
Error::Retry { status_code, .. } => *status_code,
|
Error::Retry { status_code, .. } => *status_code,
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
if let Some(status_code) = status_code {
|
if let Some(status_code) = status_code
|
||||||
if Self::should_invalidate_cache_for_status(status_code) {
|
&& Self::should_invalidate_cache_for_status(status_code)
|
||||||
self.invalidate_schema_cache();
|
{
|
||||||
}
|
self.invalidate_schema_cache();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -783,9 +783,9 @@ impl<S: HttpSend> std::fmt::Display for RemoteTable<S> {
|
|||||||
#[cfg(all(test, feature = "remote"))]
|
#[cfg(all(test, feature = "remote"))]
|
||||||
mod test_utils {
|
mod test_utils {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::remote::client::test_utils::client_with_handler;
|
|
||||||
use crate::remote::client::test_utils::{client_with_handler_and_config, MockSender};
|
|
||||||
use crate::remote::ClientConfig;
|
use crate::remote::ClientConfig;
|
||||||
|
use crate::remote::client::test_utils::client_with_handler;
|
||||||
|
use crate::remote::client::test_utils::{MockSender, client_with_handler_and_config};
|
||||||
|
|
||||||
impl RemoteTable<MockSender> {
|
impl RemoteTable<MockSender> {
|
||||||
pub fn new_mock<F, T>(name: String, handler: F, version: Option<semver::Version>) -> Self
|
pub fn new_mock<F, T>(name: String, handler: F, version: Option<semver::Version>) -> Self
|
||||||
@@ -1227,7 +1227,10 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
let body = response.text().await.err_to_http(request_id.clone())?;
|
let body = response.text().await.err_to_http(request_id.clone())?;
|
||||||
if body.trim().is_empty() {
|
if body.trim().is_empty() {
|
||||||
// Backward compatible with old servers
|
// Backward compatible with old servers
|
||||||
return Ok(DeleteResult { version: 0 });
|
return Ok(DeleteResult {
|
||||||
|
num_deleted_rows: 0,
|
||||||
|
version: 0,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
let delete_response: DeleteResult =
|
let delete_response: DeleteResult =
|
||||||
serde_json::from_str(&body).map_err(|e| Error::Http {
|
serde_json::from_str(&body).map_err(|e| Error::Http {
|
||||||
@@ -1248,13 +1251,13 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
0 => {
|
0 => {
|
||||||
return Err(Error::InvalidInput {
|
return Err(Error::InvalidInput {
|
||||||
message: "No columns specified".into(),
|
message: "No columns specified".into(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
1 => index.columns.pop().unwrap(),
|
1 => index.columns.pop().unwrap(),
|
||||||
_ => {
|
_ => {
|
||||||
return Err(Error::NotSupported {
|
return Err(Error::NotSupported {
|
||||||
message: "Indices over multiple columns not yet supported".into(),
|
message: "Indices over multiple columns not yet supported".into(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut body = serde_json::json!({
|
let mut body = serde_json::json!({
|
||||||
@@ -1273,73 +1276,24 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
match index.index {
|
fn to_json(params: &impl serde::Serialize) -> crate::Result<serde_json::Value> {
|
||||||
// TODO: Should we pass the actual index parameters? SaaS does not
|
serde_json::to_value(params).map_err(|e| Error::InvalidInput {
|
||||||
// yet support them.
|
message: format!("failed to serialize index params {:?}", e),
|
||||||
Index::IvfFlat(index) => {
|
})
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_FLAT".to_string());
|
}
|
||||||
body[METRIC_TYPE_KEY] =
|
|
||||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
// Map each Index variant to its wire type name and serializable params.
|
||||||
if let Some(num_partitions) = index.num_partitions {
|
// Auto is special-cased since it needs schema inspection.
|
||||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
let (index_type_str, params) = match &index.index {
|
||||||
}
|
Index::IvfFlat(p) => ("IVF_FLAT", Some(to_json(p)?)),
|
||||||
}
|
Index::IvfPq(p) => ("IVF_PQ", Some(to_json(p)?)),
|
||||||
Index::IvfPq(index) => {
|
Index::IvfSq(p) => ("IVF_SQ", Some(to_json(p)?)),
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
Index::IvfHnswSq(p) => ("IVF_HNSW_SQ", Some(to_json(p)?)),
|
||||||
body[METRIC_TYPE_KEY] =
|
Index::IvfRq(p) => ("IVF_RQ", Some(to_json(p)?)),
|
||||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
Index::BTree(p) => ("BTREE", Some(to_json(p)?)),
|
||||||
if let Some(num_partitions) = index.num_partitions {
|
Index::Bitmap(p) => ("BITMAP", Some(to_json(p)?)),
|
||||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
Index::LabelList(p) => ("LABEL_LIST", Some(to_json(p)?)),
|
||||||
}
|
Index::FTS(p) => ("FTS", Some(to_json(p)?)),
|
||||||
if let Some(num_bits) = index.num_bits {
|
|
||||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::IvfSq(index) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_SQ".to_string());
|
|
||||||
body[METRIC_TYPE_KEY] =
|
|
||||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
|
||||||
if let Some(num_partitions) = index.num_partitions {
|
|
||||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::IvfHnswSq(index) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_HNSW_SQ".to_string());
|
|
||||||
body[METRIC_TYPE_KEY] =
|
|
||||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
|
||||||
if let Some(num_partitions) = index.num_partitions {
|
|
||||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::IvfRq(index) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_RQ".to_string());
|
|
||||||
body[METRIC_TYPE_KEY] =
|
|
||||||
serde_json::Value::String(index.distance_type.to_string().to_lowercase());
|
|
||||||
if let Some(num_partitions) = index.num_partitions {
|
|
||||||
body["num_partitions"] = serde_json::Value::Number(num_partitions.into());
|
|
||||||
}
|
|
||||||
if let Some(num_bits) = index.num_bits {
|
|
||||||
body["num_bits"] = serde_json::Value::Number(num_bits.into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::BTree(_) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
|
||||||
}
|
|
||||||
Index::Bitmap(_) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BITMAP".to_string());
|
|
||||||
}
|
|
||||||
Index::LabelList(_) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("LABEL_LIST".to_string());
|
|
||||||
}
|
|
||||||
Index::FTS(fts) => {
|
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("FTS".to_string());
|
|
||||||
let params = serde_json::to_value(&fts).map_err(|e| Error::InvalidInput {
|
|
||||||
message: format!("failed to serialize FTS index params {:?}", e),
|
|
||||||
})?;
|
|
||||||
for (key, value) in params.as_object().unwrap() {
|
|
||||||
body[key] = value.clone();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Index::Auto => {
|
Index::Auto => {
|
||||||
let schema = self.schema().await?;
|
let schema = self.schema().await?;
|
||||||
let field = schema
|
let field = schema
|
||||||
@@ -1348,11 +1302,11 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
message: format!("Column {} not found in schema", column),
|
message: format!("Column {} not found in schema", column),
|
||||||
})?;
|
})?;
|
||||||
if supported_vector_data_type(field.data_type()) {
|
if supported_vector_data_type(field.data_type()) {
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("IVF_PQ".to_string());
|
|
||||||
body[METRIC_TYPE_KEY] =
|
body[METRIC_TYPE_KEY] =
|
||||||
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
|
serde_json::Value::String(DistanceType::L2.to_string().to_lowercase());
|
||||||
|
("IVF_PQ", None)
|
||||||
} else if supported_btree_data_type(field.data_type()) {
|
} else if supported_btree_data_type(field.data_type()) {
|
||||||
body[INDEX_TYPE_KEY] = serde_json::Value::String("BTREE".to_string());
|
("BTREE", None)
|
||||||
} else {
|
} else {
|
||||||
return Err(Error::NotSupported {
|
return Err(Error::NotSupported {
|
||||||
message: format!(
|
message: format!(
|
||||||
@@ -1366,10 +1320,17 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
|||||||
_ => {
|
_ => {
|
||||||
return Err(Error::NotSupported {
|
return Err(Error::NotSupported {
|
||||||
message: "Index type not supported".into(),
|
message: "Index type not supported".into(),
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
body[INDEX_TYPE_KEY] = index_type_str.into();
|
||||||
|
if let Some(params) = params {
|
||||||
|
for (key, value) in params.as_object().expect("params should be a JSON object") {
|
||||||
|
body[key] = value.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let request = request.json(&body);
|
let request = request.json(&body);
|
||||||
|
|
||||||
let (request_id, response) = self.send(request, true).await?;
|
let (request_id, response) = self.send(request, true).await?;
|
||||||
@@ -1810,8 +1771,8 @@ impl TryFrom<MergeInsertBuilder> for MergeInsertRequest {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::{collections::HashMap, pin::Pin};
|
use std::{collections::HashMap, pin::Pin};
|
||||||
|
|
||||||
@@ -1820,25 +1781,27 @@ mod tests {
|
|||||||
use crate::table::AddDataMode;
|
use crate::table::AddDataMode;
|
||||||
|
|
||||||
use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
|
use arrow::{array::AsArray, compute::concat_batches, datatypes::Int32Type};
|
||||||
use arrow_array::{record_batch, Int32Array, RecordBatch, RecordBatchIterator};
|
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, record_batch};
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use futures::{future::BoxFuture, StreamExt, TryFutureExt};
|
use futures::{StreamExt, TryFutureExt, future::BoxFuture};
|
||||||
use lance_index::scalar::inverted::query::MatchQuery;
|
use lance_index::scalar::inverted::query::MatchQuery;
|
||||||
use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
|
use lance_index::scalar::{FullTextSearchQuery, InvertedIndexParams};
|
||||||
use reqwest::Body;
|
use reqwest::Body;
|
||||||
use rstest::rstest;
|
use rstest::rstest;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::index::vector::{IvfFlatIndexBuilder, IvfHnswSqIndexBuilder};
|
use crate::index::vector::{
|
||||||
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
IvfFlatIndexBuilder, IvfHnswSqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
|
||||||
|
};
|
||||||
use crate::remote::JSON_CONTENT_TYPE;
|
use crate::remote::JSON_CONTENT_TYPE;
|
||||||
|
use crate::remote::db::DEFAULT_SERVER_VERSION;
|
||||||
use crate::utils::background_cache::clock;
|
use crate::utils::background_cache::clock;
|
||||||
use crate::{
|
use crate::{
|
||||||
index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType},
|
DistanceType, Error, Table,
|
||||||
|
index::{Index, IndexStatistics, IndexType, vector::IvfPqIndexBuilder},
|
||||||
query::{ExecutableQuery, QueryBase},
|
query::{ExecutableQuery, QueryBase},
|
||||||
remote::ARROW_FILE_CONTENT_TYPE,
|
remote::ARROW_FILE_CONTENT_TYPE,
|
||||||
DistanceType, Error, Table,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -2067,11 +2030,13 @@ mod tests {
|
|||||||
.unwrap(),
|
.unwrap(),
|
||||||
"/v1/table/my_table/insert/" => {
|
"/v1/table/my_table/insert/" => {
|
||||||
assert_eq!(request.method(), "POST");
|
assert_eq!(request.method(), "POST");
|
||||||
assert!(request
|
assert!(
|
||||||
.url()
|
request
|
||||||
.query_pairs()
|
.url()
|
||||||
.filter(|(k, _)| k == "mode")
|
.query_pairs()
|
||||||
.all(|(_, v)| v == "append"));
|
.filter(|(k, _)| k == "mode")
|
||||||
|
.all(|(_, v)| v == "append")
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
request.headers().get("Content-Type").unwrap(),
|
request.headers().get("Content-Type").unwrap(),
|
||||||
ARROW_STREAM_CONTENT_TYPE
|
ARROW_STREAM_CONTENT_TYPE
|
||||||
@@ -2992,6 +2957,8 @@ mod tests {
|
|||||||
"IVF_FLAT",
|
"IVF_FLAT",
|
||||||
json!({
|
json!({
|
||||||
"metric_type": "hamming",
|
"metric_type": "hamming",
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
}),
|
}),
|
||||||
Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
|
Index::IvfFlat(IvfFlatIndexBuilder::default().distance_type(DistanceType::Hamming)),
|
||||||
),
|
),
|
||||||
@@ -3000,6 +2967,8 @@ mod tests {
|
|||||||
json!({
|
json!({
|
||||||
"metric_type": "hamming",
|
"metric_type": "hamming",
|
||||||
"num_partitions": 128,
|
"num_partitions": 128,
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
}),
|
}),
|
||||||
Index::IvfFlat(
|
Index::IvfFlat(
|
||||||
IvfFlatIndexBuilder::default()
|
IvfFlatIndexBuilder::default()
|
||||||
@@ -3011,6 +2980,8 @@ mod tests {
|
|||||||
"IVF_PQ",
|
"IVF_PQ",
|
||||||
json!({
|
json!({
|
||||||
"metric_type": "l2",
|
"metric_type": "l2",
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
}),
|
}),
|
||||||
Index::IvfPq(Default::default()),
|
Index::IvfPq(Default::default()),
|
||||||
),
|
),
|
||||||
@@ -3020,6 +2991,8 @@ mod tests {
|
|||||||
"metric_type": "cosine",
|
"metric_type": "cosine",
|
||||||
"num_partitions": 128,
|
"num_partitions": 128,
|
||||||
"num_bits": 4,
|
"num_bits": 4,
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
}),
|
}),
|
||||||
Index::IvfPq(
|
Index::IvfPq(
|
||||||
IvfPqIndexBuilder::default()
|
IvfPqIndexBuilder::default()
|
||||||
@@ -3028,10 +3001,29 @@ mod tests {
|
|||||||
.num_bits(4),
|
.num_bits(4),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"IVF_PQ",
|
||||||
|
json!({
|
||||||
|
"metric_type": "l2",
|
||||||
|
"num_sub_vectors": 16,
|
||||||
|
"sample_rate": 512,
|
||||||
|
"max_iterations": 100,
|
||||||
|
}),
|
||||||
|
Index::IvfPq(
|
||||||
|
IvfPqIndexBuilder::default()
|
||||||
|
.num_sub_vectors(16)
|
||||||
|
.sample_rate(512)
|
||||||
|
.max_iterations(100),
|
||||||
|
),
|
||||||
|
),
|
||||||
(
|
(
|
||||||
"IVF_HNSW_SQ",
|
"IVF_HNSW_SQ",
|
||||||
json!({
|
json!({
|
||||||
"metric_type": "l2",
|
"metric_type": "l2",
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
"m": 20,
|
||||||
|
"ef_construction": 300,
|
||||||
}),
|
}),
|
||||||
Index::IvfHnswSq(Default::default()),
|
Index::IvfHnswSq(Default::default()),
|
||||||
),
|
),
|
||||||
@@ -3040,11 +3032,65 @@ mod tests {
|
|||||||
json!({
|
json!({
|
||||||
"metric_type": "l2",
|
"metric_type": "l2",
|
||||||
"num_partitions": 128,
|
"num_partitions": 128,
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
"m": 40,
|
||||||
|
"ef_construction": 500,
|
||||||
}),
|
}),
|
||||||
Index::IvfHnswSq(
|
Index::IvfHnswSq(
|
||||||
IvfHnswSqIndexBuilder::default()
|
IvfHnswSqIndexBuilder::default()
|
||||||
.distance_type(DistanceType::L2)
|
.distance_type(DistanceType::L2)
|
||||||
.num_partitions(128),
|
.num_partitions(128)
|
||||||
|
.num_edges(40)
|
||||||
|
.ef_construction(500),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"IVF_SQ",
|
||||||
|
json!({
|
||||||
|
"metric_type": "l2",
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
}),
|
||||||
|
Index::IvfSq(Default::default()),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"IVF_SQ",
|
||||||
|
json!({
|
||||||
|
"metric_type": "cosine",
|
||||||
|
"num_partitions": 64,
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
}),
|
||||||
|
Index::IvfSq(
|
||||||
|
IvfSqIndexBuilder::default()
|
||||||
|
.distance_type(DistanceType::Cosine)
|
||||||
|
.num_partitions(64),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"IVF_RQ",
|
||||||
|
json!({
|
||||||
|
"metric_type": "l2",
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
}),
|
||||||
|
Index::IvfRq(Default::default()),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"IVF_RQ",
|
||||||
|
json!({
|
||||||
|
"metric_type": "cosine",
|
||||||
|
"num_partitions": 64,
|
||||||
|
"num_bits": 8,
|
||||||
|
"sample_rate": 256,
|
||||||
|
"max_iterations": 50,
|
||||||
|
}),
|
||||||
|
Index::IvfRq(
|
||||||
|
IvfRqIndexBuilder::default()
|
||||||
|
.distance_type(DistanceType::Cosine)
|
||||||
|
.num_partitions(64)
|
||||||
|
.num_bits(8),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
// HNSW_PQ isn't yet supported on SaaS
|
// HNSW_PQ isn't yet supported on SaaS
|
||||||
@@ -3548,7 +3594,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn _make_table_with_indices(unindexed_rows: usize) -> Table {
|
fn _make_table_with_indices(unindexed_rows: usize) -> Table {
|
||||||
let table = Table::new_with_handler("my_table", move |request| {
|
Table::new_with_handler("my_table", move |request| {
|
||||||
assert_eq!(request.method(), "POST");
|
assert_eq!(request.method(), "POST");
|
||||||
|
|
||||||
let response_body = match request.url().path() {
|
let response_body = match request.url().path() {
|
||||||
@@ -3592,8 +3638,7 @@ mod tests {
|
|||||||
let body = serde_json::to_string(&response_body).unwrap();
|
let body = serde_json::to_string(&response_body).unwrap();
|
||||||
let status = if body == "null" { 404 } else { 200 };
|
let status = if body == "null" { 404 } else { 200 };
|
||||||
http::Response::builder().status(status).body(body).unwrap()
|
http::Response::builder().status(status).body(body).unwrap()
|
||||||
});
|
})
|
||||||
table
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
@@ -3804,8 +3849,8 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_uri_caching() {
|
async fn test_uri_caching() {
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
let call_count = Arc::new(AtomicUsize::new(0));
|
let call_count = Arc::new(AtomicUsize::new(0));
|
||||||
let call_count_clone = call_count.clone();
|
let call_count_clone = call_count.clone();
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user