mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-05 19:32:56 +00:00
Compare commits
15 Commits
codex/upda
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9d129c7e86 | ||
|
|
44878dd9a5 | ||
|
|
4b5bb2d76c | ||
|
|
434f4124fc | ||
|
|
03a1a99270 | ||
|
|
0110e3b6f8 | ||
|
|
f1f85b0a84 | ||
|
|
d6daa08b54 | ||
|
|
17b71de22e | ||
|
|
a250d8e7df | ||
|
|
5a2b33581e | ||
|
|
3d254f61b0 | ||
|
|
d15e380be1 | ||
|
|
0baf807be0 | ||
|
|
76bcc78910 |
@@ -19,7 +19,7 @@ rustflags = [
|
|||||||
"-Wclippy::string_add_assign",
|
"-Wclippy::string_add_assign",
|
||||||
"-Wclippy::string_add",
|
"-Wclippy::string_add",
|
||||||
"-Wclippy::string_lit_as_bytes",
|
"-Wclippy::string_lit_as_bytes",
|
||||||
"-Wclippy::string_to_string",
|
"-Wclippy::implicit_clone",
|
||||||
"-Wclippy::use_self",
|
"-Wclippy::use_self",
|
||||||
"-Dclippy::cargo",
|
"-Dclippy::cargo",
|
||||||
"-Dclippy::dbg_macro",
|
"-Dclippy::dbg_macro",
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ runs:
|
|||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
target: x86_64-unknown-linux-gnu
|
target: x86_64-unknown-linux-gnu
|
||||||
manylinux: ${{ inputs.manylinux }}
|
manylinux: ${{ inputs.manylinux }}
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
@@ -46,7 +46,7 @@ runs:
|
|||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
working-directory: python
|
working-directory: python
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
target: aarch64-unknown-linux-gnu
|
target: aarch64-unknown-linux-gnu
|
||||||
manylinux: ${{ inputs.manylinux }}
|
manylinux: ${{ inputs.manylinux }}
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
|
|||||||
2
.github/workflows/build_mac_wheel/action.yml
vendored
2
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -22,5 +22,5 @@ runs:
|
|||||||
command: build
|
command: build
|
||||||
# TODO: pass through interpreter
|
# TODO: pass through interpreter
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
working-directory: python
|
working-directory: python
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ runs:
|
|||||||
with:
|
with:
|
||||||
command: build
|
command: build
|
||||||
args: ${{ inputs.args }}
|
args: ${{ inputs.args }}
|
||||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||||
working-directory: python
|
working-directory: python
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -98,3 +98,30 @@ jobs:
|
|||||||
|
|
||||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||||
|
|
||||||
|
- name: Trigger sophon dependency update
|
||||||
|
env:
|
||||||
|
TAG: ${{ inputs.tag }}
|
||||||
|
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
VERSION="${TAG#refs/tags/}"
|
||||||
|
VERSION="${VERSION#v}"
|
||||||
|
LANCEDB_BRANCH="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||||
|
|
||||||
|
echo "Triggering sophon workflow with:"
|
||||||
|
echo " lance_ref: ${TAG#refs/tags/}"
|
||||||
|
echo " lancedb_ref: ${LANCEDB_BRANCH}"
|
||||||
|
|
||||||
|
gh workflow run codex-bump-lancedb-lance.yml \
|
||||||
|
--repo lancedb/sophon \
|
||||||
|
-f lance_ref="${TAG#refs/tags/}" \
|
||||||
|
-f lancedb_ref="${LANCEDB_BRANCH}"
|
||||||
|
|
||||||
|
- name: Show latest sophon workflow run
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
echo "Latest sophon workflow run:"
|
||||||
|
gh run list --repo lancedb/sophon --workflow codex-bump-lancedb-lance.yml --limit 1 --json databaseId,url,displayTitle
|
||||||
|
|||||||
6
.github/workflows/docs.yml
vendored
6
.github/workflows/docs.yml
vendored
@@ -24,7 +24,7 @@ env:
|
|||||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||||
# CI builds are faster with incremental disabled.
|
# CI builds are faster with incremental disabled.
|
||||||
CARGO_INCREMENTAL: "0"
|
CARGO_INCREMENTAL: "0"
|
||||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# Single deploy job since we're just deploying
|
# Single deploy job since we're just deploying
|
||||||
@@ -50,8 +50,8 @@ jobs:
|
|||||||
- name: Build Python
|
- name: Build Python
|
||||||
working-directory: python
|
working-directory: python
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||||
- name: Set up node
|
- name: Set up node
|
||||||
uses: actions/setup-node@v3
|
uses: actions/setup-node@v3
|
||||||
with:
|
with:
|
||||||
|
|||||||
2
.github/workflows/lance-release-timer.yml
vendored
2
.github/workflows/lance-release-timer.yml
vendored
@@ -59,4 +59,4 @@ jobs:
|
|||||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||||
run: |
|
run: |
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle
|
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,url,displayTitle
|
||||||
|
|||||||
4
.github/workflows/pypi-publish.yml
vendored
4
.github/workflows/pypi-publish.yml
vendored
@@ -11,7 +11,7 @@ on:
|
|||||||
- Cargo.toml # Change in dependency frequently breaks builds
|
- Cargo.toml # Change in dependency frequently breaks builds
|
||||||
|
|
||||||
env:
|
env:
|
||||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
linux:
|
linux:
|
||||||
@@ -65,7 +65,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- target: x86_64-apple-darwin
|
- target: x86_64-apple-darwin
|
||||||
runner: macos-13
|
runner: macos-15-large
|
||||||
- target: aarch64-apple-darwin
|
- target: aarch64-apple-darwin
|
||||||
runner: warp-macos-14-arm64-6x
|
runner: warp-macos-14-arm64-6x
|
||||||
env:
|
env:
|
||||||
|
|||||||
10
.github/workflows/python.yml
vendored
10
.github/workflows/python.yml
vendored
@@ -18,7 +18,7 @@ env:
|
|||||||
# Color output for pytest is off by default.
|
# Color output for pytest is off by default.
|
||||||
PYTEST_ADDOPTS: "--color=yes"
|
PYTEST_ADDOPTS: "--color=yes"
|
||||||
FORCE_COLOR: "1"
|
FORCE_COLOR: "1"
|
||||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||||
RUST_BACKTRACE: "1"
|
RUST_BACKTRACE: "1"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
@@ -79,7 +79,7 @@ jobs:
|
|||||||
doctest:
|
doctest:
|
||||||
name: "Doctest"
|
name: "Doctest"
|
||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
runs-on: "ubuntu-24.04"
|
runs-on: ubuntu-2404-8x-x64
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
@@ -100,7 +100,7 @@ jobs:
|
|||||||
sudo apt install -y protobuf-compiler
|
sudo apt install -y protobuf-compiler
|
||||||
- name: Install
|
- name: Install
|
||||||
run: |
|
run: |
|
||||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||||
pip install tantivy
|
pip install tantivy
|
||||||
pip install mlx
|
pip install mlx
|
||||||
- name: Doctest
|
- name: Doctest
|
||||||
@@ -149,7 +149,7 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
- name: x86
|
- name: x86
|
||||||
runner: macos-13
|
runner: macos-15-large
|
||||||
- name: Arm
|
- name: Arm
|
||||||
runner: macos-14
|
runner: macos-14
|
||||||
runs-on: "${{ matrix.config.runner }}"
|
runs-on: "${{ matrix.config.runner }}"
|
||||||
@@ -226,7 +226,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
pip install "pydantic<2"
|
pip install "pydantic<2"
|
||||||
pip install pyarrow==16
|
pip install pyarrow==16
|
||||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||||
pip install tantivy
|
pip install tantivy
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||||
|
|||||||
2
.github/workflows/run_tests/action.yml
vendored
2
.github/workflows/run_tests/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
|||||||
- name: Install lancedb
|
- name: Install lancedb
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
pip3 install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||||
- name: Setup localstack for integration tests
|
- name: Setup localstack for integration tests
|
||||||
if: ${{ inputs.integration == 'true' }}
|
if: ${{ inputs.integration == 'true' }}
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|||||||
2
.github/workflows/rust.yml
vendored
2
.github/workflows/rust.yml
vendored
@@ -122,7 +122,7 @@ jobs:
|
|||||||
timeout-minutes: 30
|
timeout-minutes: 30
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
mac-runner: ["macos-13", "macos-14"]
|
mac-runner: ["macos-14", "macos-15"]
|
||||||
runs-on: "${{ matrix.mac-runner }}"
|
runs-on: "${{ matrix.mac-runner }}"
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
|
|||||||
767
Cargo.lock
generated
767
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -15,20 +15,20 @@ categories = ["database-implementations"]
|
|||||||
rust-version = "1.78.0"
|
rust-version = "1.78.0"
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
lance = { "version" = "=1.0.0-beta.5", default-features = false, "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-core = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-core = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datagen = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datagen = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-file = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-file = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-io = { "version" = "=1.0.0-beta.5", default-features = false, "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-io = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-index = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-index = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-linalg = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-linalg = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-namespace-impls = { "version" = "=1.0.0-beta.5", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-namespace-impls = { "version" = "=1.0.0-beta.16", default-features = false, "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-table = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-table = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-testing = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-testing = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-datafusion = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-datafusion = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-encoding = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-encoding = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
lance-arrow = { "version" = "=1.0.0-beta.5", "tag" = "v1.0.0-beta.5", "git" = "https://github.com/lance-format/lance.git" }
|
lance-arrow = { "version" = "=1.0.0-beta.16", "tag" = "v1.0.0-beta.16", "git" = "https://github.com/lance-format/lance.git" }
|
||||||
ahash = "0.8"
|
ahash = "0.8"
|
||||||
# Note that this one does not include pyarrow
|
# Note that this one does not include pyarrow
|
||||||
arrow = { version = "56.2", optional = false }
|
arrow = { version = "56.2", optional = false }
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
|||||||
console.log(results);
|
console.log(results);
|
||||||
```
|
```
|
||||||
|
|
||||||
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains a more complete example.
|
The [quickstart](https://lancedb.com/docs/quickstart/basic-usage/) contains more complete examples.
|
||||||
|
|
||||||
## Development
|
## Development
|
||||||
|
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ A new PermutationBuilder instance
|
|||||||
#### Example
|
#### Example
|
||||||
|
|
||||||
```ts
|
```ts
|
||||||
builder.splitCalculated("user_id % 3");
|
builder.splitCalculated({ calculation: "user_id % 3" });
|
||||||
```
|
```
|
||||||
|
|
||||||
***
|
***
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ export class PermutationBuilder {
|
|||||||
* @returns A new PermutationBuilder instance
|
* @returns A new PermutationBuilder instance
|
||||||
* @example
|
* @example
|
||||||
* ```ts
|
* ```ts
|
||||||
* builder.splitCalculated("user_id % 3");
|
* builder.splitCalculated({ calculation: "user_id % 3" });
|
||||||
* ```
|
* ```
|
||||||
*/
|
*/
|
||||||
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
[tool.bumpversion]
|
[tool.bumpversion]
|
||||||
current_version = "0.25.4-beta.2"
|
current_version = "0.25.4-beta.3"
|
||||||
parse = """(?x)
|
parse = """(?x)
|
||||||
(?P<major>0|[1-9]\\d*)\\.
|
(?P<major>0|[1-9]\\d*)\\.
|
||||||
(?P<minor>0|[1-9]\\d*)\\.
|
(?P<minor>0|[1-9]\\d*)\\.
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "lancedb-python"
|
name = "lancedb-python"
|
||||||
version = "0.25.4-beta.2"
|
version = "0.25.4-beta.3"
|
||||||
edition.workspace = true
|
edition.workspace = true
|
||||||
description = "Python bindings for LanceDB"
|
description = "Python bindings for LanceDB"
|
||||||
license.workspace = true
|
license.workspace = true
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lancedb/
|
PIP_EXTRA_INDEX_URL ?= https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/
|
||||||
|
|
||||||
help: ## Show this help.
|
help: ## Show this help.
|
||||||
@sed -ne '/@sed/!s/## //p' $(MAKEFILE_LIST)
|
@sed -ne '/@sed/!s/## //p' $(MAKEFILE_LIST)
|
||||||
|
|
||||||
.PHONY: develop
|
.PHONY: develop
|
||||||
develop: ## Install the package in development mode.
|
develop: ## Install the package in development mode.
|
||||||
PIP_EXTRA_INDEX_URL=$(PIP_EXTRA_INDEX_URL) maturin develop --extras tests,dev,embeddings
|
PIP_EXTRA_INDEX_URL="$(PIP_EXTRA_INDEX_URL)" maturin develop --extras tests,dev,embeddings
|
||||||
|
|
||||||
.PHONY: format
|
.PHONY: format
|
||||||
format: ## Format the code.
|
format: ## Format the code.
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ dependencies = [
|
|||||||
"pyarrow>=16",
|
"pyarrow>=16",
|
||||||
"pydantic>=1.10",
|
"pydantic>=1.10",
|
||||||
"tqdm>=4.27.0",
|
"tqdm>=4.27.0",
|
||||||
"lance-namespace>=0.0.21"
|
"lance-namespace>=0.2.1"
|
||||||
]
|
]
|
||||||
description = "lancedb"
|
description = "lancedb"
|
||||||
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
|
||||||
@@ -45,7 +45,7 @@ repository = "https://github.com/lancedb/lancedb"
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
pylance = [
|
pylance = [
|
||||||
"pylance>=0.25",
|
"pylance>=1.0.0b14",
|
||||||
]
|
]
|
||||||
tests = [
|
tests = [
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
@@ -59,7 +59,7 @@ tests = [
|
|||||||
"polars>=0.19, <=1.3.0",
|
"polars>=0.19, <=1.3.0",
|
||||||
"tantivy",
|
"tantivy",
|
||||||
"pyarrow-stubs",
|
"pyarrow-stubs",
|
||||||
"pylance>=1.0.0b4",
|
"pylance>=1.0.0b14",
|
||||||
"requests",
|
"requests",
|
||||||
"datafusion",
|
"datafusion",
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -194,6 +194,10 @@ class DBConnection(EnforceOverrides):
|
|||||||
connection will be inherited by the table, but can be overridden here.
|
connection will be inherited by the table, but can be overridden here.
|
||||||
See available options at
|
See available options at
|
||||||
<https://lancedb.com/docs/storage/>
|
<https://lancedb.com/docs/storage/>
|
||||||
|
|
||||||
|
To enable stable row IDs (row IDs remain stable after compaction,
|
||||||
|
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||||
|
to `"true"` in storage_options when connecting to the database.
|
||||||
data_storage_version: optional, str, default "stable"
|
data_storage_version: optional, str, default "stable"
|
||||||
Deprecated. Set `storage_options` when connecting to the database and set
|
Deprecated. Set `storage_options` when connecting to the database and set
|
||||||
`new_table_data_storage_version` in the options.
|
`new_table_data_storage_version` in the options.
|
||||||
@@ -1079,6 +1083,10 @@ class AsyncConnection(object):
|
|||||||
See available options at
|
See available options at
|
||||||
<https://lancedb.com/docs/storage/>
|
<https://lancedb.com/docs/storage/>
|
||||||
|
|
||||||
|
To enable stable row IDs (row IDs remain stable after compaction,
|
||||||
|
update, delete, and merges), set `new_table_enable_stable_row_ids`
|
||||||
|
to `"true"` in storage_options when connecting to the database.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
AsyncTable
|
AsyncTable
|
||||||
|
|||||||
@@ -609,9 +609,19 @@ class IvfPq:
|
|||||||
class IvfRq:
|
class IvfRq:
|
||||||
"""Describes an IVF RQ Index
|
"""Describes an IVF RQ Index
|
||||||
|
|
||||||
IVF-RQ (Residual Quantization) stores a compressed copy of each vector using
|
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
||||||
residual quantization and organizes them into IVF partitions. Parameters
|
and organizes them into IVF partitions.
|
||||||
largely mirror IVF-PQ for consistency.
|
|
||||||
|
The compression scheme is called RabitQ quantization. Each dimension is
|
||||||
|
quantized into a small number of bits. The parameters `num_bits` and
|
||||||
|
`num_partitions` control this process, providing a tradeoff between
|
||||||
|
index size (and thus search speed) and index accuracy.
|
||||||
|
|
||||||
|
The partitioning process is called IVF and the `num_partitions` parameter
|
||||||
|
controls how many groups to create.
|
||||||
|
|
||||||
|
Note that training an IVF RQ index on a large dataset is a slow operation
|
||||||
|
and currently is also a memory intensive operation.
|
||||||
|
|
||||||
Attributes
|
Attributes
|
||||||
----------
|
----------
|
||||||
@@ -628,7 +638,7 @@ class IvfRq:
|
|||||||
Number of IVF partitions to create.
|
Number of IVF partitions to create.
|
||||||
|
|
||||||
num_bits: int, default 1
|
num_bits: int, default 1
|
||||||
Number of bits to encode each dimension.
|
Number of bits to encode each dimension in the RabitQ codebook.
|
||||||
|
|
||||||
max_iterations: int, default 50
|
max_iterations: int, default 50
|
||||||
Max iterations to train kmeans when computing IVF partitions.
|
Max iterations to train kmeans when computing IVF partitions.
|
||||||
|
|||||||
@@ -127,13 +127,17 @@ class LanceNamespaceStorageOptionsProvider(StorageOptionsProvider):
|
|||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
>>> from lance_namespace import connect as namespace_connect
|
Create a provider and fetch storage options::
|
||||||
>>> namespace = namespace_connect("rest", {"url": "https://..."})
|
|
||||||
>>> provider = LanceNamespaceStorageOptionsProvider(
|
from lance_namespace import connect as namespace_connect
|
||||||
... namespace=namespace,
|
|
||||||
... table_id=["my_namespace", "my_table"]
|
# Connect to namespace (requires a running namespace server)
|
||||||
... )
|
namespace = namespace_connect("rest", {"uri": "https://..."})
|
||||||
>>> options = provider.fetch_storage_options()
|
provider = LanceNamespaceStorageOptionsProvider(
|
||||||
|
namespace=namespace,
|
||||||
|
table_id=["my_namespace", "my_table"]
|
||||||
|
)
|
||||||
|
options = provider.fetch_storage_options()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
|
def __init__(self, namespace: LanceNamespace, table_id: List[str]):
|
||||||
|
|||||||
@@ -2429,9 +2429,8 @@ class AsyncQueryBase(object):
|
|||||||
>>> from lancedb import connect_async
|
>>> from lancedb import connect_async
|
||||||
>>> async def doctest_example():
|
>>> async def doctest_example():
|
||||||
... conn = await connect_async("./.lancedb")
|
... conn = await connect_async("./.lancedb")
|
||||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||||
... query = [100, 100]
|
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
|
||||||
... print(plan)
|
... print(plan)
|
||||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
@@ -2440,6 +2439,7 @@ class AsyncQueryBase(object):
|
|||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceRead: uri=..., projection=[vector], ...
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
|
<BLANKLINE>
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
@@ -3141,10 +3141,9 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
|||||||
>>> from lancedb.index import FTS
|
>>> from lancedb.index import FTS
|
||||||
>>> async def doctest_example():
|
>>> async def doctest_example():
|
||||||
... conn = await connect_async("./.lancedb")
|
... conn = await connect_async("./.lancedb")
|
||||||
... table = await conn.create_table("my_table", [{"vector": [99, 99], "text": "hello world"}])
|
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0], "text": "hello world"}])
|
||||||
... await table.create_index("text", config=FTS(with_position=False))
|
... await table.create_index("text", config=FTS(with_position=False))
|
||||||
... query = [100, 100]
|
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
|
||||||
... plan = await table.query().nearest_to([1, 2]).nearest_to_text("hello").explain_plan(True)
|
|
||||||
... print(plan)
|
... print(plan)
|
||||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
Vector Search Plan:
|
Vector Search Plan:
|
||||||
@@ -3418,9 +3417,8 @@ class BaseQueryBuilder(object):
|
|||||||
>>> from lancedb import connect_async
|
>>> from lancedb import connect_async
|
||||||
>>> async def doctest_example():
|
>>> async def doctest_example():
|
||||||
... conn = await connect_async("./.lancedb")
|
... conn = await connect_async("./.lancedb")
|
||||||
... table = await conn.create_table("my_table", [{"vector": [99, 99]}])
|
... table = await conn.create_table("my_table", [{"vector": [99.0, 99.0]}])
|
||||||
... query = [100, 100]
|
... plan = await table.query().nearest_to([1.0, 2.0]).explain_plan(True)
|
||||||
... plan = await table.query().nearest_to([1, 2]).explain_plan(True)
|
|
||||||
... print(plan)
|
... print(plan)
|
||||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||||
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance]
|
||||||
@@ -3429,6 +3427,7 @@ class BaseQueryBuilder(object):
|
|||||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||||
KNNVectorDistance: metric=l2
|
KNNVectorDistance: metric=l2
|
||||||
LanceRead: uri=..., projection=[vector], ...
|
LanceRead: uri=..., projection=[vector], ...
|
||||||
|
<BLANKLINE>
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
|||||||
@@ -1018,7 +1018,7 @@ class Table(ABC):
|
|||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
>>> res
|
>>> res
|
||||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
@@ -3634,7 +3634,7 @@ class AsyncTable:
|
|||||||
... .when_not_matched_insert_all() \\
|
... .when_not_matched_insert_all() \\
|
||||||
... .execute(new_data)
|
... .execute(new_data)
|
||||||
>>> res
|
>>> res
|
||||||
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0)
|
MergeResult(version=2, num_updated_rows=2, num_inserted_rows=1, num_deleted_rows=0, num_attempts=1)
|
||||||
>>> # The order of new rows is non-deterministic since we use
|
>>> # The order of new rows is non-deterministic since we use
|
||||||
>>> # a hash-join as part of this operation and so we sort here
|
>>> # a hash-join as part of this operation and so we sort here
|
||||||
>>> table.to_arrow().sort_by("a").to_pandas()
|
>>> table.to_arrow().sort_by("a").to_pandas()
|
||||||
|
|||||||
@@ -441,6 +441,150 @@ async def test_create_table_v2_manifest_paths_async(tmp_path):
|
|||||||
assert re.match(r"\d{20}\.manifest", manifest)
|
assert re.match(r"\d{20}\.manifest", manifest)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_table_stable_row_ids_via_storage_options(tmp_path):
|
||||||
|
"""Test stable_row_ids via storage_options at connect time."""
|
||||||
|
import lance
|
||||||
|
|
||||||
|
# Connect with stable row IDs enabled as default for new tables
|
||||||
|
db_with = await lancedb.connect_async(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||||
|
)
|
||||||
|
# Connect without stable row IDs (default)
|
||||||
|
db_without = await lancedb.connect_async(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create table using connection with stable row IDs enabled
|
||||||
|
await db_with.create_table(
|
||||||
|
"with_stable_via_opts",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
)
|
||||||
|
lance_ds_with = lance.dataset(tmp_path / "with_stable_via_opts.lance")
|
||||||
|
fragments_with = lance_ds_with.get_fragments()
|
||||||
|
assert len(fragments_with) > 0
|
||||||
|
assert fragments_with[0].metadata.row_id_meta is not None
|
||||||
|
|
||||||
|
# Create table using connection without stable row IDs
|
||||||
|
await db_without.create_table(
|
||||||
|
"without_stable_via_opts",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
)
|
||||||
|
lance_ds_without = lance.dataset(tmp_path / "without_stable_via_opts.lance")
|
||||||
|
fragments_without = lance_ds_without.get_fragments()
|
||||||
|
assert len(fragments_without) > 0
|
||||||
|
assert fragments_without[0].metadata.row_id_meta is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_table_stable_row_ids_via_storage_options_sync(tmp_path):
|
||||||
|
"""Test that enable_stable_row_ids can be set via storage_options (sync API)."""
|
||||||
|
# Connect with stable row IDs enabled as default for new tables
|
||||||
|
db_with = lancedb.connect(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||||
|
)
|
||||||
|
# Connect without stable row IDs (default)
|
||||||
|
db_without = lancedb.connect(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "false"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create table using connection with stable row IDs enabled
|
||||||
|
tbl_with = db_with.create_table(
|
||||||
|
"with_stable_sync",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
)
|
||||||
|
lance_ds_with = tbl_with.to_lance()
|
||||||
|
fragments_with = lance_ds_with.get_fragments()
|
||||||
|
assert len(fragments_with) > 0
|
||||||
|
assert fragments_with[0].metadata.row_id_meta is not None
|
||||||
|
|
||||||
|
# Create table using connection without stable row IDs
|
||||||
|
tbl_without = db_without.create_table(
|
||||||
|
"without_stable_sync",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
)
|
||||||
|
lance_ds_without = tbl_without.to_lance()
|
||||||
|
fragments_without = lance_ds_without.get_fragments()
|
||||||
|
assert len(fragments_without) > 0
|
||||||
|
assert fragments_without[0].metadata.row_id_meta is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_table_stable_row_ids_table_level_override(tmp_path):
|
||||||
|
"""Test that stable_row_ids can be enabled/disabled at create_table level."""
|
||||||
|
import lance
|
||||||
|
|
||||||
|
# Connect without any stable row ID setting
|
||||||
|
db_default = await lancedb.connect_async(tmp_path)
|
||||||
|
|
||||||
|
# Connect with stable row IDs enabled at connection level
|
||||||
|
db_with_stable = await lancedb.connect_async(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Case 1: No connection setting, enable at table level
|
||||||
|
await db_default.create_table(
|
||||||
|
"table_level_enabled",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||||
|
)
|
||||||
|
lance_ds = lance.dataset(tmp_path / "table_level_enabled.lance")
|
||||||
|
fragments = lance_ds.get_fragments()
|
||||||
|
assert len(fragments) > 0
|
||||||
|
assert fragments[0].metadata.row_id_meta is not None, (
|
||||||
|
"Table should have stable row IDs when enabled at table level"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Case 2: Connection has stable row IDs, override with false at table level
|
||||||
|
await db_with_stable.create_table(
|
||||||
|
"table_level_disabled",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||||
|
)
|
||||||
|
lance_ds = lance.dataset(tmp_path / "table_level_disabled.lance")
|
||||||
|
fragments = lance_ds.get_fragments()
|
||||||
|
assert len(fragments) > 0
|
||||||
|
assert fragments[0].metadata.row_id_meta is None, (
|
||||||
|
"Table should NOT have stable row IDs when disabled at table level"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_table_stable_row_ids_table_level_override_sync(tmp_path):
|
||||||
|
"""Test that stable_row_ids can be enabled/disabled at create_table level (sync)."""
|
||||||
|
# Connect without any stable row ID setting
|
||||||
|
db_default = lancedb.connect(tmp_path)
|
||||||
|
|
||||||
|
# Connect with stable row IDs enabled at connection level
|
||||||
|
db_with_stable = lancedb.connect(
|
||||||
|
tmp_path, storage_options={"new_table_enable_stable_row_ids": "true"}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Case 1: No connection setting, enable at table level
|
||||||
|
tbl = db_default.create_table(
|
||||||
|
"table_level_enabled_sync",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
storage_options={"new_table_enable_stable_row_ids": "true"},
|
||||||
|
)
|
||||||
|
lance_ds = tbl.to_lance()
|
||||||
|
fragments = lance_ds.get_fragments()
|
||||||
|
assert len(fragments) > 0
|
||||||
|
assert fragments[0].metadata.row_id_meta is not None, (
|
||||||
|
"Table should have stable row IDs when enabled at table level"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Case 2: Connection has stable row IDs, override with false at table level
|
||||||
|
tbl = db_with_stable.create_table(
|
||||||
|
"table_level_disabled_sync",
|
||||||
|
data=[{"id": i} for i in range(10)],
|
||||||
|
storage_options={"new_table_enable_stable_row_ids": "false"},
|
||||||
|
)
|
||||||
|
lance_ds = tbl.to_lance()
|
||||||
|
fragments = lance_ds.get_fragments()
|
||||||
|
assert len(fragments) > 0
|
||||||
|
assert fragments[0].metadata.row_id_meta is None, (
|
||||||
|
"Table should NOT have stable row IDs when disabled at table level"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
def test_open_table_sync(tmp_db: lancedb.DBConnection):
|
||||||
tmp_db.create_table("test", data=[{"id": 0}])
|
tmp_db.create_table("test", data=[{"id": 0}])
|
||||||
assert tmp_db.open_table("test").count_rows() == 1
|
assert tmp_db.open_table("test").count_rows() == 1
|
||||||
|
|||||||
@@ -690,7 +690,7 @@ impl FTSQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_query(&self) -> String {
|
pub fn get_query(&self) -> String {
|
||||||
self.fts_query.query.query().to_owned()
|
self.fts_query.query.query().clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_query_request(&self) -> PyQueryRequest {
|
pub fn to_query_request(&self) -> PyQueryRequest {
|
||||||
|
|||||||
@@ -105,12 +105,12 @@ test-log = "0.2"
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
|
default = ["aws", "gcs", "azure", "dynamodb", "oss"]
|
||||||
aws = ["lance/aws", "lance-io/aws"]
|
aws = ["lance/aws", "lance-io/aws", "lance-namespace-impls/dir-aws"]
|
||||||
oss = ["lance/oss", "lance-io/oss"]
|
oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
|
||||||
gcs = ["lance/gcp", "lance-io/gcp"]
|
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
|
||||||
azure = ["lance/azure", "lance-io/azure"]
|
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
|
||||||
dynamodb = ["lance/dynamodb", "aws"]
|
dynamodb = ["lance/dynamodb", "aws"]
|
||||||
remote = ["dep:reqwest", "dep:http"]
|
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
|
||||||
fp16kernels = ["lance-linalg/fp16kernels"]
|
fp16kernels = ["lance-linalg/fp16kernels"]
|
||||||
s3-test = []
|
s3-test = []
|
||||||
bedrock = ["dep:aws-sdk-bedrockruntime"]
|
bedrock = ["dep:aws-sdk-bedrockruntime"]
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ pub const LANCE_FILE_EXTENSION: &str = "lance";
|
|||||||
|
|
||||||
pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version";
|
pub const OPT_NEW_TABLE_STORAGE_VERSION: &str = "new_table_data_storage_version";
|
||||||
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
|
pub const OPT_NEW_TABLE_V2_MANIFEST_PATHS: &str = "new_table_enable_v2_manifest_paths";
|
||||||
|
pub const OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS: &str = "new_table_enable_stable_row_ids";
|
||||||
|
|
||||||
/// Controls how new tables should be created
|
/// Controls how new tables should be created
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
@@ -48,6 +49,12 @@ pub struct NewTableConfig {
|
|||||||
/// V2 manifest paths are more efficient than V2 manifest paths but are not
|
/// V2 manifest paths are more efficient than V2 manifest paths but are not
|
||||||
/// supported by old clients.
|
/// supported by old clients.
|
||||||
pub enable_v2_manifest_paths: Option<bool>,
|
pub enable_v2_manifest_paths: Option<bool>,
|
||||||
|
/// Whether to enable stable row IDs for new tables
|
||||||
|
///
|
||||||
|
/// When enabled, row IDs remain stable after compaction, update, delete,
|
||||||
|
/// and merges. This is useful for materialized views and other use cases
|
||||||
|
/// that need to track source rows across these operations.
|
||||||
|
pub enable_stable_row_ids: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Options specific to the listing database
|
/// Options specific to the listing database
|
||||||
@@ -87,6 +94,14 @@ impl ListingDatabaseOptions {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
.transpose()?,
|
.transpose()?,
|
||||||
|
enable_stable_row_ids: map
|
||||||
|
.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS)
|
||||||
|
.map(|s| {
|
||||||
|
s.parse::<bool>().map_err(|_| Error::InvalidInput {
|
||||||
|
message: format!("enable_stable_row_ids must be a boolean, received {}", s),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.transpose()?,
|
||||||
};
|
};
|
||||||
// We just assume that any options that are not new table config options are storage options
|
// We just assume that any options that are not new table config options are storage options
|
||||||
let storage_options = map
|
let storage_options = map
|
||||||
@@ -94,6 +109,7 @@ impl ListingDatabaseOptions {
|
|||||||
.filter(|(key, _)| {
|
.filter(|(key, _)| {
|
||||||
key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION
|
key.as_str() != OPT_NEW_TABLE_STORAGE_VERSION
|
||||||
&& key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS
|
&& key.as_str() != OPT_NEW_TABLE_V2_MANIFEST_PATHS
|
||||||
|
&& key.as_str() != OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS
|
||||||
})
|
})
|
||||||
.map(|(key, value)| (key.clone(), value.clone()))
|
.map(|(key, value)| (key.clone(), value.clone()))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -118,6 +134,12 @@ impl DatabaseOptions for ListingDatabaseOptions {
|
|||||||
enable_v2_manifest_paths.to_string(),
|
enable_v2_manifest_paths.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if let Some(enable_stable_row_ids) = self.new_table_config.enable_stable_row_ids {
|
||||||
|
map.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
enable_stable_row_ids.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,7 +497,7 @@ impl ListingDatabase {
|
|||||||
// this error is not lance::Error::DatasetNotFound, as the method
|
// this error is not lance::Error::DatasetNotFound, as the method
|
||||||
// `remove_dir_all` may be used to remove something not be a dataset
|
// `remove_dir_all` may be used to remove something not be a dataset
|
||||||
lance::Error::NotFound { .. } => Error::TableNotFound {
|
lance::Error::NotFound { .. } => Error::TableNotFound {
|
||||||
name: name.to_owned(),
|
name: name.clone(),
|
||||||
source: Box::new(err),
|
source: Box::new(err),
|
||||||
},
|
},
|
||||||
_ => Error::from(err),
|
_ => Error::from(err),
|
||||||
@@ -497,7 +519,7 @@ impl ListingDatabase {
|
|||||||
fn extract_storage_overrides(
|
fn extract_storage_overrides(
|
||||||
&self,
|
&self,
|
||||||
request: &CreateTableRequest,
|
request: &CreateTableRequest,
|
||||||
) -> Result<(Option<LanceFileVersion>, Option<bool>)> {
|
) -> Result<(Option<LanceFileVersion>, Option<bool>, Option<bool>)> {
|
||||||
let storage_options = request
|
let storage_options = request
|
||||||
.write_options
|
.write_options
|
||||||
.lance_write_params
|
.lance_write_params
|
||||||
@@ -518,7 +540,19 @@ impl ListingDatabase {
|
|||||||
message: "enable_v2_manifest_paths must be a boolean".to_string(),
|
message: "enable_v2_manifest_paths must be a boolean".to_string(),
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
Ok((storage_version_override, v2_manifest_override))
|
let stable_row_ids_override = storage_options
|
||||||
|
.and_then(|opts| opts.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS))
|
||||||
|
.map(|s| s.parse::<bool>())
|
||||||
|
.transpose()
|
||||||
|
.map_err(|_| Error::InvalidInput {
|
||||||
|
message: "enable_stable_row_ids must be a boolean".to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
storage_version_override,
|
||||||
|
v2_manifest_override,
|
||||||
|
stable_row_ids_override,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Prepare write parameters for table creation
|
/// Prepare write parameters for table creation
|
||||||
@@ -527,6 +561,7 @@ impl ListingDatabase {
|
|||||||
request: &CreateTableRequest,
|
request: &CreateTableRequest,
|
||||||
storage_version_override: Option<LanceFileVersion>,
|
storage_version_override: Option<LanceFileVersion>,
|
||||||
v2_manifest_override: Option<bool>,
|
v2_manifest_override: Option<bool>,
|
||||||
|
stable_row_ids_override: Option<bool>,
|
||||||
) -> lance::dataset::WriteParams {
|
) -> lance::dataset::WriteParams {
|
||||||
let mut write_params = request
|
let mut write_params = request
|
||||||
.write_options
|
.write_options
|
||||||
@@ -571,6 +606,13 @@ impl ListingDatabase {
|
|||||||
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
write_params.enable_v2_manifest_paths = enable_v2_manifest_paths;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply enable_stable_row_ids: table-level override takes precedence over connection config
|
||||||
|
if let Some(enable_stable_row_ids) =
|
||||||
|
stable_row_ids_override.or(self.new_table_config.enable_stable_row_ids)
|
||||||
|
{
|
||||||
|
write_params.enable_stable_row_ids = enable_stable_row_ids;
|
||||||
|
}
|
||||||
|
|
||||||
if matches!(&request.mode, CreateTableMode::Overwrite) {
|
if matches!(&request.mode, CreateTableMode::Overwrite) {
|
||||||
write_params.mode = WriteMode::Overwrite;
|
write_params.mode = WriteMode::Overwrite;
|
||||||
}
|
}
|
||||||
@@ -706,11 +748,15 @@ impl Database for ListingDatabase {
|
|||||||
.clone()
|
.clone()
|
||||||
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
|
.unwrap_or_else(|| self.table_uri(&request.name).unwrap());
|
||||||
|
|
||||||
let (storage_version_override, v2_manifest_override) =
|
let (storage_version_override, v2_manifest_override, stable_row_ids_override) =
|
||||||
self.extract_storage_overrides(&request)?;
|
self.extract_storage_overrides(&request)?;
|
||||||
|
|
||||||
let write_params =
|
let write_params = self.prepare_write_params(
|
||||||
self.prepare_write_params(&request, storage_version_override, v2_manifest_override);
|
&request,
|
||||||
|
storage_version_override,
|
||||||
|
v2_manifest_override,
|
||||||
|
stable_row_ids_override,
|
||||||
|
);
|
||||||
|
|
||||||
let data_schema = request.data.arrow_schema();
|
let data_schema = request.data.arrow_schema();
|
||||||
|
|
||||||
@@ -921,7 +967,7 @@ impl Database for ListingDatabase {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::connection::ConnectRequest;
|
use crate::connection::ConnectRequest;
|
||||||
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest};
|
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest, WriteOptions};
|
||||||
use crate::table::{Table, TableDefinition};
|
use crate::table::{Table, TableDefinition};
|
||||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||||
use arrow_schema::{DataType, Field, Schema};
|
use arrow_schema::{DataType, Field, Schema};
|
||||||
@@ -1621,4 +1667,267 @@ mod tests {
|
|||||||
// Cloned table should have all 8 rows from the latest version
|
// Cloned table should have all 8 rows from the latest version
|
||||||
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
|
assert_eq!(cloned_table.count_rows(None).await.unwrap(), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_table_with_stable_row_ids_connection_level() {
|
||||||
|
let tempdir = tempdir().unwrap();
|
||||||
|
let uri = tempdir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
// Create database with stable row IDs enabled at connection level
|
||||||
|
let mut options = HashMap::new();
|
||||||
|
options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"true".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: uri.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options,
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let db = ListingDatabase::connect_with_options(&request)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify the config was parsed correctly
|
||||||
|
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
|
||||||
|
|
||||||
|
// Create a table - it should inherit the stable row IDs setting
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||||
|
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||||
|
vec![Ok(batch)],
|
||||||
|
schema.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
|
let table = db
|
||||||
|
.create_table(CreateTableRequest {
|
||||||
|
name: "test_stable".to_string(),
|
||||||
|
namespace: vec![],
|
||||||
|
data: CreateTableData::Data(reader),
|
||||||
|
mode: CreateTableMode::Create,
|
||||||
|
write_options: Default::default(),
|
||||||
|
location: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify table was created successfully
|
||||||
|
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_table_with_stable_row_ids_table_level() {
|
||||||
|
let (_tempdir, db) = setup_database().await;
|
||||||
|
|
||||||
|
// Verify connection has no stable row IDs config
|
||||||
|
assert_eq!(db.new_table_config.enable_stable_row_ids, None);
|
||||||
|
|
||||||
|
// Create a table with stable row IDs enabled at table level via storage_options
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||||
|
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||||
|
vec![Ok(batch)],
|
||||||
|
schema.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut storage_options = HashMap::new();
|
||||||
|
storage_options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"true".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let write_options = WriteOptions {
|
||||||
|
lance_write_params: Some(lance::dataset::WriteParams {
|
||||||
|
store_params: Some(lance::io::ObjectStoreParams {
|
||||||
|
storage_options: Some(storage_options),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let table = db
|
||||||
|
.create_table(CreateTableRequest {
|
||||||
|
name: "test_stable_table_level".to_string(),
|
||||||
|
namespace: vec![],
|
||||||
|
data: CreateTableData::Data(reader),
|
||||||
|
mode: CreateTableMode::Create,
|
||||||
|
write_options,
|
||||||
|
location: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify table was created successfully
|
||||||
|
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_create_table_stable_row_ids_table_overrides_connection() {
|
||||||
|
let tempdir = tempdir().unwrap();
|
||||||
|
let uri = tempdir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
// Create database with stable row IDs enabled at connection level
|
||||||
|
let mut options = HashMap::new();
|
||||||
|
options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"true".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: uri.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options,
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let db = ListingDatabase::connect_with_options(&request)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(db.new_table_config.enable_stable_row_ids, Some(true));
|
||||||
|
|
||||||
|
// Create table with stable row IDs disabled at table level (overrides connection)
|
||||||
|
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
|
||||||
|
|
||||||
|
let batch = RecordBatch::try_new(
|
||||||
|
schema.clone(),
|
||||||
|
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||||
|
vec![Ok(batch)],
|
||||||
|
schema.clone(),
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut storage_options = HashMap::new();
|
||||||
|
storage_options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"false".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let write_options = WriteOptions {
|
||||||
|
lance_write_params: Some(lance::dataset::WriteParams {
|
||||||
|
store_params: Some(lance::io::ObjectStoreParams {
|
||||||
|
storage_options: Some(storage_options),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
|
||||||
|
let table = db
|
||||||
|
.create_table(CreateTableRequest {
|
||||||
|
name: "test_override".to_string(),
|
||||||
|
namespace: vec![],
|
||||||
|
data: CreateTableData::Data(reader),
|
||||||
|
mode: CreateTableMode::Create,
|
||||||
|
write_options,
|
||||||
|
location: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// Verify table was created successfully
|
||||||
|
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_stable_row_ids_invalid_value() {
|
||||||
|
let tempdir = tempdir().unwrap();
|
||||||
|
let uri = tempdir.path().to_str().unwrap();
|
||||||
|
|
||||||
|
// Try to create database with invalid stable row IDs value
|
||||||
|
let mut options = HashMap::new();
|
||||||
|
options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"not_a_boolean".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let request = ConnectRequest {
|
||||||
|
uri: uri.to_string(),
|
||||||
|
#[cfg(feature = "remote")]
|
||||||
|
client_config: Default::default(),
|
||||||
|
options,
|
||||||
|
read_consistency_interval: None,
|
||||||
|
session: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = ListingDatabase::connect_with_options(&request).await;
|
||||||
|
|
||||||
|
assert!(result.is_err());
|
||||||
|
assert!(matches!(
|
||||||
|
result.unwrap_err(),
|
||||||
|
Error::InvalidInput { message } if message.contains("enable_stable_row_ids must be a boolean")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stable_row_ids_config_serialization() {
|
||||||
|
// Test that ListingDatabaseOptions correctly serializes stable_row_ids
|
||||||
|
let mut options = HashMap::new();
|
||||||
|
options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"true".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Parse the options
|
||||||
|
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
db_options.new_table_config.enable_stable_row_ids,
|
||||||
|
Some(true)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Serialize back to map
|
||||||
|
let mut serialized = HashMap::new();
|
||||||
|
db_options.serialize_into_map(&mut serialized);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
serialized.get(OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS),
|
||||||
|
Some(&"true".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stable_row_ids_config_parse_false() {
|
||||||
|
let mut options = HashMap::new();
|
||||||
|
options.insert(
|
||||||
|
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||||
|
"false".to_string(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
db_options.new_table_config.enable_stable_row_ids,
|
||||||
|
Some(false)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stable_row_ids_config_not_set() {
|
||||||
|
let options = HashMap::new();
|
||||||
|
|
||||||
|
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
|
||||||
|
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user