mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-21 14:00:40 +00:00
Compare commits
28 Commits
release/v0
...
dependabot
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
80b8022255 | ||
|
|
2d5298b6ee | ||
|
|
4cb9147bbf | ||
|
|
54a1982ef1 | ||
|
|
5bfde47a8e | ||
|
|
049b0c8f09 | ||
|
|
20556e23a9 | ||
|
|
01e272c0b0 | ||
|
|
ad1634a0a5 | ||
|
|
5d1c28922a | ||
|
|
53c2164b84 | ||
|
|
6286ee8192 | ||
|
|
af8ca2ad5e | ||
|
|
aac6c62459 | ||
|
|
8df2fff75f | ||
|
|
0d30b31998 | ||
|
|
6a431ff0a0 | ||
|
|
ab2c5adf5e | ||
|
|
f02c4cad90 | ||
|
|
7b74c3dd91 | ||
|
|
13c6dae9a3 | ||
|
|
64aeee84a8 | ||
|
|
5b45e44ce3 | ||
|
|
f893589356 | ||
|
|
df4ad9f851 | ||
|
|
9330a9b851 | ||
|
|
02de07576e | ||
|
|
81617fd3d9 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.11"
|
||||
current_version = "0.29.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
5
.github/dependabot.yml
vendored
5
.github/dependabot.yml
vendored
@@ -11,6 +11,11 @@ updates:
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
# Only update Cargo.lock, never widen/raise the version requirements in
|
||||
# Cargo.toml. The goal is keeping the lockfile (and the binaries we ship)
|
||||
# current on security fixes, not forcing our library's consumers onto
|
||||
# newer minimum versions.
|
||||
versioning-strategy: lockfile-only
|
||||
groups:
|
||||
rust-minor-patch:
|
||||
update-types:
|
||||
|
||||
12
.github/workflows/codex-fix-ci.yml
vendored
12
.github/workflows/codex-fix-ci.yml
vendored
@@ -45,7 +45,9 @@ jobs:
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
# pnpm 11 (used by the nodejs install step below) requires
|
||||
# Node >= 22.13; use 24 since 22 hits EOL in October.
|
||||
node-version: 24
|
||||
|
||||
- name: Install Codex CLI
|
||||
run: npm install -g @openai/codex
|
||||
@@ -79,10 +81,14 @@ jobs:
|
||||
java-version: '11'
|
||||
cache: maven
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Install Node.js dependencies for TypeScript bindings
|
||||
run: |
|
||||
cd nodejs
|
||||
npm ci
|
||||
pnpm install --frozen-lockfile
|
||||
|
||||
- name: Configure git user
|
||||
run: |
|
||||
@@ -137,7 +143,7 @@ jobs:
|
||||
- For Rust test failures: Run the specific test with "cargo test -p <crate> <test_name>"
|
||||
- For Python test failures: Build with "cd python && maturin develop" then run "pytest <specific_test_file>::<test_name>"
|
||||
- For Java test failures: Run "cd java && mvn test -Dtest=<TestClass>#<testMethod>"
|
||||
- For TypeScript test failures: Run "cd nodejs && npm run build && npm test -- --testNamePattern='<test_name>'"
|
||||
- For TypeScript test failures: Run "cd nodejs && pnpm build && pnpm test -- --testNamePattern='<test_name>'"
|
||||
- Do NOT run the full test suite - only run the tests that were failing
|
||||
|
||||
7. If the additional guidelines are provided, follow them as well.
|
||||
|
||||
114
.github/workflows/nodejs.yml
vendored
114
.github/workflows/nodejs.yml
vendored
@@ -42,11 +42,17 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October. The library itself still supports Node >= 18
|
||||
# (see test matrix below).
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
@@ -61,11 +67,13 @@ jobs:
|
||||
run: cargo clippy --profile ci --all --all-features -- -D warnings
|
||||
- name: Lint Typescript
|
||||
run: |
|
||||
npm ci
|
||||
npm run lint-ci
|
||||
pnpm install --frozen-lockfile
|
||||
pnpm lint-ci
|
||||
- name: Lint examples
|
||||
working-directory: nodejs/examples
|
||||
run: npm ci && npm run lint-ci
|
||||
# The `@lancedb/lancedb` dep points at file:../dist; pnpm errors if
|
||||
# that dir is missing, so create an empty one for lint-only runs.
|
||||
run: mkdir -p ../dist && pnpm install --frozen-lockfile && pnpm lint-ci
|
||||
linux:
|
||||
name: Linux (NodeJS ${{ matrix.node-version }})
|
||||
timeout-minutes: 30
|
||||
@@ -82,14 +90,18 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js 20 for build
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
|
||||
# Build always on Node 20; tests run on the matrix version below.
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js 24 for build
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October. Build/install runs on Node 24; tests run on the
|
||||
# matrix version below using direct jest invocation.
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -97,48 +109,58 @@ jobs:
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm run build:debug -- --profile ci
|
||||
pnpm install --frozen-lockfile
|
||||
# No `--` separator: pnpm forwards it literally, which would
|
||||
# make napi-rs treat `--profile ci` as a cargo passthrough arg.
|
||||
pnpm build:debug --profile ci
|
||||
pnpm tsc
|
||||
- name: Setup examples
|
||||
working-directory: nodejs/examples
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Check docs
|
||||
run: |
|
||||
# We run this as part of the job because the binary needs to be built
|
||||
# first to export the types of the native code.
|
||||
set -e
|
||||
# `pnpm docs` would invoke pnpm's built-in `docs` command, not
|
||||
# the script — use `pnpm run docs`.
|
||||
pnpm run docs
|
||||
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'pnpm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
fi
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js ${{ matrix.node-version }} for test
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
- name: Compile TypeScript
|
||||
run: npm run tsc
|
||||
- name: Setup localstack
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Test
|
||||
env:
|
||||
S3_TEST: "1"
|
||||
run: npm run test
|
||||
- name: Setup examples
|
||||
working-directory: nodejs/examples
|
||||
run: npm ci
|
||||
# Newer @smithy/core uses dynamic ESM imports.
|
||||
NODE_OPTIONS: "--experimental-vm-modules"
|
||||
# Invoke jest directly because pnpm 11 itself requires Node 22+
|
||||
# while the matrix tests on older Node versions.
|
||||
run: npx jest --verbose
|
||||
- name: Test examples
|
||||
working-directory: ./
|
||||
env:
|
||||
OPENAI_API_KEY: test
|
||||
OPENAI_BASE_URL: http://0.0.0.0:8000
|
||||
NODE_OPTIONS: "--experimental-vm-modules"
|
||||
run: |
|
||||
python ci/mock_openai.py &
|
||||
cd nodejs/examples
|
||||
npm test
|
||||
- name: Check docs
|
||||
run: |
|
||||
# We run this as part of the job because the binary needs to be built
|
||||
# first to export the types of the native code.
|
||||
set -e
|
||||
npm ci
|
||||
npm run docs
|
||||
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
fi
|
||||
npx jest --testEnvironment jest-environment-node-single-context --verbose
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
runs-on: "macos-14"
|
||||
# macos-15 ships a newer linker; the older macos-14 linker fails to insert
|
||||
# branch islands when the debug cdylib's __text section exceeds the 128 MB
|
||||
# AArch64 B/BL branch range.
|
||||
runs-on: "macos-15"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -148,20 +170,28 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install protobuf
|
||||
- name: Build
|
||||
run: |
|
||||
npm ci --include=optional
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
pnpm install --frozen-lockfile
|
||||
# No `--` separator: pnpm forwards it literally, which would
|
||||
# make napi-rs treat `--profile ci` as a cargo passthrough arg.
|
||||
pnpm build:debug --profile ci
|
||||
pnpm tsc
|
||||
- name: Test
|
||||
run: |
|
||||
npm run test
|
||||
pnpm test
|
||||
|
||||
53
.github/workflows/npm-publish.yml
vendored
53
.github/workflows/npm-publish.yml
vendored
@@ -171,13 +171,18 @@ jobs:
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
if: ${{ !matrix.settings.docker }}
|
||||
with:
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- name: Install
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
if: ${{ !matrix.settings.docker }}
|
||||
@@ -195,7 +200,7 @@ jobs:
|
||||
target/
|
||||
key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Install Zig
|
||||
uses: mlugg/setup-zig@v2
|
||||
if: ${{ contains(matrix.settings.target, 'musl') }}
|
||||
@@ -248,7 +253,7 @@ jobs:
|
||||
# one to do the upload.
|
||||
- name: Make generic artifacts
|
||||
if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
|
||||
run: npm run tsc
|
||||
run: pnpm tsc
|
||||
- name: Upload Generic Artifacts
|
||||
if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
@@ -283,14 +288,24 @@ jobs:
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup node
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup Node.js 24 for install
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Setup Node.js ${{ matrix.node }} for test
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node }}
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
@@ -311,7 +326,9 @@ jobs:
|
||||
- name: Move built files
|
||||
run: cp dist/native.d.ts dist/native.js dist/*.node lancedb/
|
||||
- name: Test bindings
|
||||
run: npm test
|
||||
# Invoke jest directly because pnpm 11 itself requires Node 22+
|
||||
# while the matrix tests on older Node versions.
|
||||
run: npx jest --verbose
|
||||
publish:
|
||||
name: Publish
|
||||
runs-on: ubuntu-latest
|
||||
@@ -323,15 +340,19 @@ jobs:
|
||||
- test-lancedb
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 24
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
run: pnpm install --frozen-lockfile
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: nodejs-dist
|
||||
@@ -351,7 +372,7 @@ jobs:
|
||||
- name: Display structure of downloaded files
|
||||
run: find dist && find nodejs-artifacts
|
||||
- name: Move artifacts
|
||||
run: npx napi artifacts -d nodejs-artifacts
|
||||
run: pnpm exec napi artifacts -d nodejs-artifacts
|
||||
- name: List packages
|
||||
run: find npm
|
||||
- name: Publish
|
||||
|
||||
2
.github/workflows/python.yml
vendored
2
.github/workflows/python.yml
vendored
@@ -205,7 +205,7 @@ jobs:
|
||||
- name: Delete wheels
|
||||
run: rm -rf target/wheels
|
||||
pydantic1x:
|
||||
timeout-minutes: 30
|
||||
timeout-minutes: 60
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
|
||||
20
.github/workflows/rust.yml
vendored
20
.github/workflows/rust.yml
vendored
@@ -233,6 +233,26 @@ jobs:
|
||||
cargo update -p aws-sdk-sso --precise 1.62.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.63.0
|
||||
cargo update -p aws-sdk-sts --precise 1.63.0
|
||||
# aws-runtime/sigv4/credential-types/types and the aws-smithy-*
|
||||
# crates bumped their MSRV to 1.91.1 in late 2026; pin to the last
|
||||
# 1.91.0-compatible versions. The order matters — each downgrade
|
||||
# only succeeds once everything that still pins it at a higher
|
||||
# version has itself been downgraded.
|
||||
cargo update -p aws-runtime --precise 1.5.12
|
||||
cargo update -p aws-types --precise 1.3.9
|
||||
cargo update -p aws-sigv4 --precise 1.3.5
|
||||
cargo update -p aws-credential-types --precise 1.2.8
|
||||
cargo update -p aws-smithy-checksums --precise 0.63.9
|
||||
cargo update -p aws-smithy-runtime --precise 1.9.3
|
||||
cargo update -p aws-smithy-http --precise 0.62.4
|
||||
cargo update -p aws-smithy-eventstream --precise 0.60.12
|
||||
cargo update -p aws-smithy-http-client --precise 1.1.3
|
||||
cargo update -p aws-smithy-observability --precise 0.1.4
|
||||
cargo update -p aws-smithy-query --precise 0.60.8
|
||||
cargo update -p aws-smithy-runtime-api --precise 1.9.1
|
||||
cargo update -p aws-smithy-async --precise 1.2.6
|
||||
cargo update -p aws-smithy-types --precise 1.3.5
|
||||
cargo update -p aws-smithy-xml --precise 0.60.11
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
env:
|
||||
|
||||
25
AGENTS.md
25
AGENTS.md
@@ -17,9 +17,30 @@ Common commands:
|
||||
* Run tests: `cargo test --quiet --features remote --tests`
|
||||
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
|
||||
* Lint: `cargo clippy --quiet --features remote --tests --examples`
|
||||
* Format: `cargo fmt --all`
|
||||
* Format Rust: `cargo fmt --all`
|
||||
* Format Python: `ruff format .`
|
||||
* Lint Python: `ruff check .`
|
||||
* Bootstrap Python dev env: `cd python && uv run --extra tests --extra dev maturin develop --extras tests,dev`
|
||||
* Run Python tests: `cd python && uv run --extra tests pytest python/tests -vv --durations=10 -m "not slow and not s3_test"`
|
||||
* Run specific Python test: `cd python && uv run --extra tests pytest python/tests/<test_file>.py::<test_name> -q`
|
||||
|
||||
Before committing changes, run formatting.
|
||||
For Python validation, prefer the uv-managed environment declared by `python/uv.lock`.
|
||||
Do not treat system `python`, global `pytest`, or missing editable-install errors as
|
||||
final blockers; bootstrap or enter the uv environment instead. If `lancedb._lancedb`
|
||||
is missing or stale, or if Rust/PyO3 binding code changed, rebuild the Python
|
||||
extension with the bootstrap command above before running tests.
|
||||
|
||||
Before committing changes, run formatting for every language you touched. At minimum:
|
||||
|
||||
* Rust changes: run `cargo fmt --all`.
|
||||
* Python changes: run `ruff format .` and `ruff check .` from the repository root,
|
||||
and run targeted tests through `cd python && uv run ...`.
|
||||
* TypeScript changes: run the relevant `npm`/`pnpm` lint, format, build, and docs commands in `nodejs`.
|
||||
|
||||
Before creating a PR, make sure the PR title follows Conventional Commits, such as
|
||||
`fix: support nested field paths in native index creation` or
|
||||
`feat(python): add dataset multiprocessing support`. The semantic-release check uses the
|
||||
PR title and body as the merge commit message, so a non-conventional PR title will fail CI.
|
||||
|
||||
## Coding tips
|
||||
|
||||
|
||||
1591
Cargo.lock
generated
1591
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
28
Cargo.toml
28
Cargo.toml
@@ -13,20 +13,20 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.7", default-features = false, "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.7", "tag" = "v7.0.0-beta.7", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.29.1-beta.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -12,20 +12,22 @@ Typescript.
|
||||
* `src/`: Rust bindings source code
|
||||
* `lancedb/`: Typescript package source code
|
||||
* `__test__/`: Unit tests
|
||||
* `examples/`: An npm package with the examples shown in the documentation
|
||||
* `examples/`: A pnpm package with the examples shown in the documentation
|
||||
|
||||
## Development environment
|
||||
|
||||
To set up your development environment, you will need to install the following:
|
||||
|
||||
1. Node.js 14 or later
|
||||
2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
1. Node.js 22 or later (required by pnpm 11)
|
||||
2. [pnpm](https://pnpm.io/installation) 11 or later (or run via `corepack enable`,
|
||||
which uses the `packageManager` field in `package.json`)
|
||||
3. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
4. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
|
||||
Initial setup:
|
||||
|
||||
```shell
|
||||
npm install
|
||||
pnpm install
|
||||
```
|
||||
|
||||
### Commit Hooks
|
||||
@@ -39,38 +41,38 @@ pre-commit install
|
||||
|
||||
## Development
|
||||
|
||||
Most common development commands can be run using the npm scripts.
|
||||
Most common development commands can be run using the pnpm scripts.
|
||||
|
||||
Build the package
|
||||
|
||||
```shell
|
||||
npm install
|
||||
npm run build
|
||||
pnpm install
|
||||
pnpm build
|
||||
```
|
||||
|
||||
Lint:
|
||||
|
||||
```shell
|
||||
npm run lint
|
||||
pnpm lint
|
||||
```
|
||||
|
||||
Format and fix lints:
|
||||
|
||||
```shell
|
||||
npm run lint-fix
|
||||
pnpm lint-fix
|
||||
```
|
||||
|
||||
Run tests:
|
||||
|
||||
```shell
|
||||
npm test
|
||||
pnpm test
|
||||
```
|
||||
|
||||
To run a single test:
|
||||
|
||||
```shell
|
||||
# Single file: table.test.ts
|
||||
npm test -- table.test.ts
|
||||
pnpm test -- table.test.ts
|
||||
# Single test: 'merge insert' in table.test.ts
|
||||
npm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
pnpm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
```
|
||||
|
||||
@@ -148,6 +148,33 @@ Creates a new empty Table
|
||||
|
||||
***
|
||||
|
||||
### createNamespace()
|
||||
|
||||
```ts
|
||||
abstract createNamespace(namespacePath, options?): Promise<CreateNamespaceResponse>
|
||||
```
|
||||
|
||||
Create a new namespace at the given path.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to create.
|
||||
|
||||
* **options?**: `Partial`<[`CreateNamespaceOptions`](../interfaces/CreateNamespaceOptions.md)>
|
||||
Creation `mode`
|
||||
("create" | "exist_ok" | "overwrite") and optional `properties`
|
||||
to attach to the namespace.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`CreateNamespaceResponse`](../interfaces/CreateNamespaceResponse.md)>
|
||||
|
||||
The properties of the
|
||||
created namespace and an optional transaction id.
|
||||
|
||||
***
|
||||
|
||||
### createTable()
|
||||
|
||||
#### createTable(options, namespacePath)
|
||||
@@ -230,6 +257,29 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
***
|
||||
|
||||
### describeNamespace()
|
||||
|
||||
```ts
|
||||
abstract describeNamespace(namespacePath): Promise<DescribeNamespaceResponse>
|
||||
```
|
||||
|
||||
Describe a namespace, returning its properties.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to describe, in
|
||||
parent → child order, e.g. `["analytics", "sales"]`.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DescribeNamespaceResponse`](../interfaces/DescribeNamespaceResponse.md)>
|
||||
|
||||
The namespace's properties
|
||||
(may be undefined if the namespace has none).
|
||||
|
||||
***
|
||||
|
||||
### display()
|
||||
|
||||
```ts
|
||||
@@ -263,6 +313,36 @@ Drop all tables in the database.
|
||||
|
||||
***
|
||||
|
||||
### dropNamespace()
|
||||
|
||||
```ts
|
||||
abstract dropNamespace(namespacePath, options?): Promise<DropNamespaceResponse>
|
||||
```
|
||||
|
||||
Drop a namespace.
|
||||
|
||||
Use `behavior: "cascade"` to also drop everything contained in the
|
||||
namespace (sub-namespaces and tables). The default `"restrict"`
|
||||
behavior refuses to drop a non-empty namespace.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to drop.
|
||||
|
||||
* **options?**: `Partial`<[`DropNamespaceOptions`](../interfaces/DropNamespaceOptions.md)>
|
||||
`mode` ("skip" | "fail"
|
||||
for missing-namespace handling) and `behavior` ("restrict" | "cascade").
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DropNamespaceResponse`](../interfaces/DropNamespaceResponse.md)>
|
||||
|
||||
Any properties returned by
|
||||
the server and an optional transaction id.
|
||||
|
||||
***
|
||||
|
||||
### dropTable()
|
||||
|
||||
```ts
|
||||
@@ -299,6 +379,36 @@ Return true if the connection has not been closed
|
||||
|
||||
***
|
||||
|
||||
### listNamespaces()
|
||||
|
||||
```ts
|
||||
abstract listNamespaces(namespacePath?, options?): Promise<ListNamespacesResponse>
|
||||
```
|
||||
|
||||
List the immediate child namespaces under the given parent.
|
||||
|
||||
Results may be paginated. To retrieve subsequent pages, pass the
|
||||
`pageToken` returned by a previous call.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The parent namespace path. Defaults
|
||||
to the root namespace if omitted.
|
||||
|
||||
* **options?**: `Partial`<[`ListNamespacesOptions`](../interfaces/ListNamespacesOptions.md)>
|
||||
Pagination options
|
||||
(`pageToken`, `limit`).
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`ListNamespacesResponse`](../interfaces/ListNamespacesResponse.md)>
|
||||
|
||||
Child namespace names and
|
||||
an optional token for fetching the next page.
|
||||
|
||||
***
|
||||
|
||||
### openTable()
|
||||
|
||||
```ts
|
||||
@@ -327,6 +437,39 @@ Open a table in the database.
|
||||
|
||||
***
|
||||
|
||||
### renameTable()
|
||||
|
||||
```ts
|
||||
abstract renameTable(
|
||||
currentName,
|
||||
newName,
|
||||
options?): Promise<void>
|
||||
```
|
||||
|
||||
Rename a table.
|
||||
|
||||
Currently only supported by LanceDB Cloud. Local OSS connections and
|
||||
namespace-backed connections (via [connectNamespace](../functions/connectNamespace.md)) reject with
|
||||
a "not supported" error.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **currentName**: `string`
|
||||
The current name of the table.
|
||||
|
||||
* **newName**: `string`
|
||||
The new name for the table.
|
||||
|
||||
* **options?**: [`RenameTableOptions`](../interfaces/RenameTableOptions.md)
|
||||
Optional namespace paths. When
|
||||
`newNamespacePath` is omitted the table stays in `namespacePath`.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### tableNames()
|
||||
|
||||
#### tableNames(options)
|
||||
|
||||
@@ -343,6 +343,30 @@ This is useful for pagination.
|
||||
|
||||
***
|
||||
|
||||
### orderBy()
|
||||
|
||||
```ts
|
||||
orderBy(ordering): this
|
||||
```
|
||||
|
||||
Sort the results by the specified column(s).
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **ordering**: [`ColumnOrdering`](../interfaces/ColumnOrdering.md) \| [`ColumnOrdering`](../interfaces/ColumnOrdering.md)[]
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
This query builder.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.orderBy`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
|
||||
173
docs/src/js/classes/Scannable.md
Normal file
173
docs/src/js/classes/Scannable.md
Normal file
@@ -0,0 +1,173 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / Scannable
|
||||
|
||||
# Class: Scannable
|
||||
|
||||
A data source that can be scanned as a stream of Arrow `RecordBatch`es.
|
||||
|
||||
`Scannable` wraps the schema + optional row count + rescannable flag and
|
||||
a callback that yields batches one at a time. It is passed to consumers
|
||||
(e.g. `Table.add`, `createTable`, `mergeInsert` — follow-up work) that
|
||||
need to pull data without materializing the full dataset in JS memory.
|
||||
|
||||
Batches cross the JS↔Rust boundary as Arrow IPC Stream messages; a fresh
|
||||
writer serializes each batch, and the Rust side decodes it with
|
||||
`arrow_ipc::reader::StreamReader`. One batch is in flight at a time.
|
||||
|
||||
## Properties
|
||||
|
||||
### numRows
|
||||
|
||||
```ts
|
||||
readonly numRows: null | number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### rescannable
|
||||
|
||||
```ts
|
||||
readonly rescannable: boolean;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### schema
|
||||
|
||||
```ts
|
||||
readonly schema: Schema<any>;
|
||||
```
|
||||
|
||||
## Methods
|
||||
|
||||
### fromFactory()
|
||||
|
||||
```ts
|
||||
static fromFactory(
|
||||
schema,
|
||||
factory,
|
||||
opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an explicit schema and a factory that returns a
|
||||
fresh batch iterator on each call.
|
||||
|
||||
The factory is invoked once per scan. Each iterator yields
|
||||
`RecordBatch`es matching the declared schema. Use this when you need
|
||||
direct control over the pull loop — for example, to wrap a streaming
|
||||
source whose batches are produced lazily.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **schema**: `Schema`<`any`>
|
||||
The Arrow schema of the produced batches.
|
||||
|
||||
* **factory**
|
||||
Called at the start of each scan to produce a batch
|
||||
iterator. Must be idempotent when `rescannable` is true.
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
Optional hints. `rescannable` defaults to `true`; set to
|
||||
`false` if calling `factory()` twice would not reproduce the same data.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromIterable()
|
||||
|
||||
```ts
|
||||
static fromIterable(
|
||||
schema,
|
||||
iter,
|
||||
opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an iterable of `RecordBatch`es. `rescannable`
|
||||
defaults to `false`. Pass an explicit schema so the consumer can
|
||||
validate before any batch is pulled.
|
||||
|
||||
`opts.rescannable: true` is honest for replayable iterables (Arrays,
|
||||
Sets, or custom iterables whose `[Symbol.iterator]()` returns a fresh
|
||||
iterator each call). It is rejected for one-shot iterables (generators,
|
||||
async generators, or already-an-iterator inputs) because their
|
||||
`[Symbol.iterator]()` returns the same exhausted object on the second
|
||||
scan. For replayable sources outside this shape, use
|
||||
`fromFactory(schema, () => createIter(), { rescannable: true })`.
|
||||
|
||||
Note: when `opts.rescannable` is `true`, the constructor calls
|
||||
`[Symbol.iterator]()` once on the input to perform the structural check.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **schema**: `Schema`<`any`>
|
||||
|
||||
* **iter**: `Iterable`<`RecordBatch`<`any`>> \| `AsyncIterable`<`RecordBatch`<`any`>>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromRecordBatchReader()
|
||||
|
||||
```ts
|
||||
static fromRecordBatchReader(reader, opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an Arrow `RecordBatchReader`. A reader can only
|
||||
be consumed once; `rescannable` defaults to `false`.
|
||||
|
||||
The reader must already be opened (via `.open()`) so its `.schema` is
|
||||
populated. `RecordBatchReader.from(...)` returns an unopened reader.
|
||||
|
||||
`opts.rescannable: true` is rejected because `RecordBatchReader` is a
|
||||
self-iterator (its `[Symbol.iterator]()` returns itself), and this
|
||||
constructor does not call `reader.reset()` between scans, so a second
|
||||
scan would always see an exhausted reader. For genuinely replayable
|
||||
sources, use
|
||||
`fromFactory(schema, () => openReader(), { rescannable: true })`,
|
||||
which mints a fresh reader on each scan.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **reader**: `RecordBatchReader`<`any`>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromTable()
|
||||
|
||||
```ts
|
||||
static fromTable(table, opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an in-memory Arrow `Table`. Always rescannable;
|
||||
the table's batches are replayed on each scan.
|
||||
|
||||
The table's row count is authoritative: `opts.numRows` must either be
|
||||
omitted or equal to `table.numRows`. `opts.rescannable` of `false` is
|
||||
rejected because in-memory Tables are always rescannable.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **table**: `Table`<`any`>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
@@ -690,6 +690,74 @@ of the given query
|
||||
|
||||
***
|
||||
|
||||
### setLsmWriteSpec()
|
||||
|
||||
```ts
|
||||
abstract setLsmWriteSpec(spec): Promise<void>
|
||||
```
|
||||
|
||||
Install an [LsmWriteSpec](../interfaces/LsmWriteSpec.md) on this table, selecting Lance's MemWAL
|
||||
LSM-style write path for future `mergeInsert` calls.
|
||||
|
||||
`LsmWriteSpec` chooses one of three sharding strategies via `specType`:
|
||||
|
||||
- `"bucket"` — hash-bucket writes by the single-column unenforced primary
|
||||
key (`column` and `numBuckets` required).
|
||||
- `"identity"` — shard by the raw value of a scalar `column`.
|
||||
- `"unsharded"` — route every write to a single shard.
|
||||
|
||||
All variants require the table to have an unenforced primary key
|
||||
([Table#setUnenforcedPrimaryKey](Table.md#setunenforcedprimarykey)); bucket sharding additionally
|
||||
requires it to be the single column being bucketed.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **spec**: [`LsmWriteSpec`](../interfaces/LsmWriteSpec.md)
|
||||
The sharding spec to install.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "id",
|
||||
numBuckets: 16,
|
||||
maintainedIndexes: ["id_idx"],
|
||||
});
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### setUnenforcedPrimaryKey()
|
||||
|
||||
```ts
|
||||
abstract setUnenforcedPrimaryKey(columns): Promise<void>
|
||||
```
|
||||
|
||||
Set the unenforced primary key for this table to a single column.
|
||||
|
||||
"Unenforced" means LanceDB does not check uniqueness on writes; the
|
||||
column is recorded in the schema as the primary key for use by features
|
||||
such as `merge_insert`. Only single-column primary keys are supported,
|
||||
and the key cannot be changed once set.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **columns**: `string` \| `string`[]
|
||||
The primary key column. A one-element
|
||||
array is also accepted; passing more than one column is rejected.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### stats()
|
||||
|
||||
```ts
|
||||
@@ -793,6 +861,23 @@ Return the table as an arrow table
|
||||
|
||||
***
|
||||
|
||||
### unsetLsmWriteSpec()
|
||||
|
||||
```ts
|
||||
abstract unsetLsmWriteSpec(): Promise<void>
|
||||
```
|
||||
|
||||
Remove the [LsmWriteSpec](../interfaces/LsmWriteSpec.md) from this table, reverting to the standard
|
||||
`mergeInsert` write path.
|
||||
|
||||
Errors if no spec is currently set.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### update()
|
||||
|
||||
#### update(opts)
|
||||
|
||||
@@ -498,6 +498,30 @@ This is useful for pagination.
|
||||
|
||||
***
|
||||
|
||||
### orderBy()
|
||||
|
||||
```ts
|
||||
orderBy(ordering): this
|
||||
```
|
||||
|
||||
Sort the results by the specified column(s).
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **ordering**: [`ColumnOrdering`](../interfaces/ColumnOrdering.md) \| [`ColumnOrdering`](../interfaces/ColumnOrdering.md)[]
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
This query builder.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.orderBy`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
|
||||
131
docs/src/js/functions/connectNamespace.md
Normal file
131
docs/src/js/functions/connectNamespace.md
Normal file
@@ -0,0 +1,131 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / connectNamespace
|
||||
|
||||
# Function: connectNamespace()
|
||||
|
||||
## connectNamespace(implName, config, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
config,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect to a LanceDB database through a namespace.
|
||||
|
||||
Unlike [connect](connect.md), which routes by URI scheme (local path vs.
|
||||
`db://` cloud), `connectNamespace` always returns a namespace-backed
|
||||
connection. The `implName` selects the namespace implementation:
|
||||
|
||||
- `"dir"` — directory namespace, configured with [DirNamespaceConfig](../interfaces/DirNamespaceConfig.md).
|
||||
- `"rest"` — remote REST catalog, configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md).
|
||||
- Any other string — full module path for a custom implementation,
|
||||
configured with a free-form string-keyed `properties` map.
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `"dir"`
|
||||
|
||||
* **config**: [`DirNamespaceConfig`](../interfaces/DirNamespaceConfig.md)
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Examples
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("dir", { root: "/path/to/db" });
|
||||
await db.createTable("users", [{ id: 1 }]);
|
||||
```
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("rest", {
|
||||
uri: "https://catalog.example.com",
|
||||
headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
});
|
||||
```
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("my.custom.Namespace", {
|
||||
endpoint: "...",
|
||||
});
|
||||
```
|
||||
|
||||
## connectNamespace(implName, config, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
config,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect through the built-in REST namespace.
|
||||
|
||||
Configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md). See the function-level
|
||||
documentation above for the full surface, examples, and how this
|
||||
relates to [connect](connect.md).
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `"rest"`
|
||||
|
||||
* **config**: [`RestNamespaceConfig`](../interfaces/RestNamespaceConfig.md)
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("rest", {
|
||||
uri: "https://catalog.example.com",
|
||||
headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
});
|
||||
```
|
||||
|
||||
## connectNamespace(implName, properties, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
properties,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect through a custom namespace implementation by full module path,
|
||||
configured with a free-form string-keyed `properties` map. Use the
|
||||
typed overloads above for the built-in `"dir"` and `"rest"` impls.
|
||||
|
||||
See the function-level documentation above for examples and how this
|
||||
relates to [connect](connect.md).
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `string`
|
||||
|
||||
* **properties**: `Record`<`string`, `string`>
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("my.custom.Namespace", {
|
||||
endpoint: "...",
|
||||
});
|
||||
```
|
||||
@@ -32,6 +32,7 @@
|
||||
- [PhraseQuery](classes/PhraseQuery.md)
|
||||
- [Query](classes/Query.md)
|
||||
- [QueryBase](classes/QueryBase.md)
|
||||
- [Scannable](classes/Scannable.md)
|
||||
- [Session](classes/Session.md)
|
||||
- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
|
||||
- [Table](classes/Table.md)
|
||||
@@ -50,11 +51,19 @@
|
||||
- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
|
||||
- [ClientConfig](interfaces/ClientConfig.md)
|
||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||
- [ColumnOrdering](interfaces/ColumnOrdering.md)
|
||||
- [CompactionStats](interfaces/CompactionStats.md)
|
||||
- [ConnectNamespaceOptions](interfaces/ConnectNamespaceOptions.md)
|
||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||
- [CreateNamespaceOptions](interfaces/CreateNamespaceOptions.md)
|
||||
- [CreateNamespaceResponse](interfaces/CreateNamespaceResponse.md)
|
||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||
- [DeleteResult](interfaces/DeleteResult.md)
|
||||
- [DescribeNamespaceResponse](interfaces/DescribeNamespaceResponse.md)
|
||||
- [DirNamespaceConfig](interfaces/DirNamespaceConfig.md)
|
||||
- [DropColumnsResult](interfaces/DropColumnsResult.md)
|
||||
- [DropNamespaceOptions](interfaces/DropNamespaceOptions.md)
|
||||
- [DropNamespaceResponse](interfaces/DropNamespaceResponse.md)
|
||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
||||
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
||||
@@ -69,13 +78,19 @@
|
||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||
- [IvfRqOptions](interfaces/IvfRqOptions.md)
|
||||
- [ListNamespacesOptions](interfaces/ListNamespacesOptions.md)
|
||||
- [ListNamespacesResponse](interfaces/ListNamespacesResponse.md)
|
||||
- [LsmWriteSpec](interfaces/LsmWriteSpec.md)
|
||||
- [MergeResult](interfaces/MergeResult.md)
|
||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||
- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
|
||||
- [RemovalStats](interfaces/RemovalStats.md)
|
||||
- [RenameTableOptions](interfaces/RenameTableOptions.md)
|
||||
- [RestNamespaceConfig](interfaces/RestNamespaceConfig.md)
|
||||
- [RetryConfig](interfaces/RetryConfig.md)
|
||||
- [ScannableOptions](interfaces/ScannableOptions.md)
|
||||
- [ShuffleOptions](interfaces/ShuffleOptions.md)
|
||||
- [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
|
||||
- [SplitHashOptions](interfaces/SplitHashOptions.md)
|
||||
@@ -90,6 +105,7 @@
|
||||
- [UpdateResult](interfaces/UpdateResult.md)
|
||||
- [Version](interfaces/Version.md)
|
||||
- [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)
|
||||
- [WriteProgress](interfaces/WriteProgress.md)
|
||||
|
||||
## Type Aliases
|
||||
|
||||
@@ -107,6 +123,7 @@
|
||||
|
||||
- [RecordBatchIterator](functions/RecordBatchIterator.md)
|
||||
- [connect](functions/connect.md)
|
||||
- [connectNamespace](functions/connectNamespace.md)
|
||||
- [makeArrowTable](functions/makeArrowTable.md)
|
||||
- [packBits](functions/packBits.md)
|
||||
- [permutationBuilder](functions/permutationBuilder.md)
|
||||
|
||||
@@ -19,3 +19,39 @@ mode: "append" | "overwrite";
|
||||
If "append" (the default) then the new data will be added to the table
|
||||
|
||||
If "overwrite" then the new data will replace the existing data in the table.
|
||||
|
||||
***
|
||||
|
||||
### progress()
|
||||
|
||||
```ts
|
||||
progress: (progress) => void;
|
||||
```
|
||||
|
||||
Optional callback invoked periodically with write progress.
|
||||
|
||||
The callback is fired once per batch written and once more with
|
||||
`done: true` when the write completes. Calls are dispatched
|
||||
asynchronously to the JS event loop and never block the write — a slow
|
||||
callback will queue events rather than back-pressure the writer.
|
||||
|
||||
Errors thrown from the callback are logged with `console.warn` and
|
||||
swallowed — they do not abort the write.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **progress**: [`WriteProgress`](WriteProgress.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
await table.add(data, {
|
||||
progress: (p) => {
|
||||
console.log(`${p.outputRows}/${p.totalRows ?? "?"} rows`);
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
31
docs/src/js/interfaces/ColumnOrdering.md
Normal file
31
docs/src/js/interfaces/ColumnOrdering.md
Normal file
@@ -0,0 +1,31 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ColumnOrdering
|
||||
|
||||
# Interface: ColumnOrdering
|
||||
|
||||
## Properties
|
||||
|
||||
### ascending?
|
||||
|
||||
```ts
|
||||
optional ascending: boolean;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### columnName
|
||||
|
||||
```ts
|
||||
columnName: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### nullsFirst?
|
||||
|
||||
```ts
|
||||
optional nullsFirst: boolean;
|
||||
```
|
||||
54
docs/src/js/interfaces/ConnectNamespaceOptions.md
Normal file
54
docs/src/js/interfaces/ConnectNamespaceOptions.md
Normal file
@@ -0,0 +1,54 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ConnectNamespaceOptions
|
||||
|
||||
# Interface: ConnectNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Extra properties for the backing namespace client.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
optional readConsistencyInterval: number;
|
||||
```
|
||||
|
||||
The interval, in seconds, at which to check for updates to the table
|
||||
from other processes. If None, then consistency is not checked. For
|
||||
performance reasons, this is the default. For strong consistency, set
|
||||
this to zero seconds. Then every read will check for updates from other
|
||||
processes. As a compromise, you can set this to a non-zero value for
|
||||
eventual consistency.
|
||||
|
||||
***
|
||||
|
||||
### session?
|
||||
|
||||
```ts
|
||||
optional session: Session;
|
||||
```
|
||||
|
||||
The session to use for this connection. Holds shared caches and other
|
||||
session-specific state.
|
||||
|
||||
***
|
||||
|
||||
### storageOptions?
|
||||
|
||||
```ts
|
||||
optional storageOptions: Record<string, string>;
|
||||
```
|
||||
|
||||
Configuration for object storage. The available options are described
|
||||
at https://docs.lancedb.com/storage/
|
||||
27
docs/src/js/interfaces/CreateNamespaceOptions.md
Normal file
27
docs/src/js/interfaces/CreateNamespaceOptions.md
Normal file
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / CreateNamespaceOptions
|
||||
|
||||
# Interface: CreateNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### mode?
|
||||
|
||||
```ts
|
||||
optional mode: "overwrite" | "create" | "exist_ok";
|
||||
```
|
||||
|
||||
Creation mode.
|
||||
|
||||
***
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
Properties to set on the new namespace.
|
||||
23
docs/src/js/interfaces/CreateNamespaceResponse.md
Normal file
23
docs/src/js/interfaces/CreateNamespaceResponse.md
Normal file
@@ -0,0 +1,23 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / CreateNamespaceResponse
|
||||
|
||||
# Interface: CreateNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### transactionId?
|
||||
|
||||
```ts
|
||||
optional transactionId: string;
|
||||
```
|
||||
15
docs/src/js/interfaces/DescribeNamespaceResponse.md
Normal file
15
docs/src/js/interfaces/DescribeNamespaceResponse.md
Normal file
@@ -0,0 +1,15 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DescribeNamespaceResponse
|
||||
|
||||
# Interface: DescribeNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
47
docs/src/js/interfaces/DirNamespaceConfig.md
Normal file
47
docs/src/js/interfaces/DirNamespaceConfig.md
Normal file
@@ -0,0 +1,47 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DirNamespaceConfig
|
||||
|
||||
# Interface: DirNamespaceConfig
|
||||
|
||||
Configuration for the built-in directory namespace (`"dir"`).
|
||||
|
||||
The directory namespace stores tables under a single root path (local
|
||||
filesystem or object storage URI). See
|
||||
[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
|
||||
less-common knobs live under [DirNamespaceConfig.extraProperties](DirNamespaceConfig.md#extraproperties).
|
||||
|
||||
## Properties
|
||||
|
||||
### extraProperties?
|
||||
|
||||
```ts
|
||||
optional extraProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Additional raw properties passed verbatim to the namespace
|
||||
implementation (e.g. `storage.*`, `credential_vendor.*`). Typed
|
||||
fields above take precedence on key collision.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
Whether to maintain a namespace manifest at the root. Required for
|
||||
child namespaces. Defaults to true on the impl side.
|
||||
|
||||
***
|
||||
|
||||
### root
|
||||
|
||||
```ts
|
||||
root: string;
|
||||
```
|
||||
|
||||
Root path or URI containing the LanceDB tables.
|
||||
27
docs/src/js/interfaces/DropNamespaceOptions.md
Normal file
27
docs/src/js/interfaces/DropNamespaceOptions.md
Normal file
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DropNamespaceOptions
|
||||
|
||||
# Interface: DropNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### behavior?
|
||||
|
||||
```ts
|
||||
optional behavior: "restrict" | "cascade";
|
||||
```
|
||||
|
||||
Refuse to drop if non-empty (restrict) or drop recursively (cascade).
|
||||
|
||||
***
|
||||
|
||||
### mode?
|
||||
|
||||
```ts
|
||||
optional mode: "fail" | "skip";
|
||||
```
|
||||
|
||||
Whether to skip if the namespace doesn't exist, or fail.
|
||||
23
docs/src/js/interfaces/DropNamespaceResponse.md
Normal file
23
docs/src/js/interfaces/DropNamespaceResponse.md
Normal file
@@ -0,0 +1,23 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DropNamespaceResponse
|
||||
|
||||
# Interface: DropNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### transactionId?
|
||||
|
||||
```ts
|
||||
optional transactionId: string[];
|
||||
```
|
||||
27
docs/src/js/interfaces/ListNamespacesOptions.md
Normal file
27
docs/src/js/interfaces/ListNamespacesOptions.md
Normal file
@@ -0,0 +1,27 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ListNamespacesOptions
|
||||
|
||||
# Interface: ListNamespacesOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### limit?
|
||||
|
||||
```ts
|
||||
optional limit: number;
|
||||
```
|
||||
|
||||
An optional limit to the number of results to return.
|
||||
|
||||
***
|
||||
|
||||
### pageToken?
|
||||
|
||||
```ts
|
||||
optional pageToken: string;
|
||||
```
|
||||
|
||||
Token from a previous response for pagination.
|
||||
23
docs/src/js/interfaces/ListNamespacesResponse.md
Normal file
23
docs/src/js/interfaces/ListNamespacesResponse.md
Normal file
@@ -0,0 +1,23 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ListNamespacesResponse
|
||||
|
||||
# Interface: ListNamespacesResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### namespaces
|
||||
|
||||
```ts
|
||||
namespaces: string[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### pageToken?
|
||||
|
||||
```ts
|
||||
optional pageToken: string;
|
||||
```
|
||||
64
docs/src/js/interfaces/LsmWriteSpec.md
Normal file
64
docs/src/js/interfaces/LsmWriteSpec.md
Normal file
@@ -0,0 +1,64 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / LsmWriteSpec
|
||||
|
||||
# Interface: LsmWriteSpec
|
||||
|
||||
Specification selecting Lance's MemWAL LSM-style write path for
|
||||
`mergeInsert`.
|
||||
|
||||
`specType` is `"bucket"`, `"identity"`, or `"unsharded"`. For `"bucket"`,
|
||||
`column` and `numBuckets` are required; for `"identity"`, `column` is
|
||||
required.
|
||||
|
||||
## Properties
|
||||
|
||||
### column?
|
||||
|
||||
```ts
|
||||
optional column: string;
|
||||
```
|
||||
|
||||
Bucket and identity variants: the sharding column.
|
||||
|
||||
***
|
||||
|
||||
### maintainedIndexes?
|
||||
|
||||
```ts
|
||||
optional maintainedIndexes: string[];
|
||||
```
|
||||
|
||||
Names of indexes the MemWAL should keep up to date during writes.
|
||||
|
||||
***
|
||||
|
||||
### numBuckets?
|
||||
|
||||
```ts
|
||||
optional numBuckets: number;
|
||||
```
|
||||
|
||||
Bucket variant: the number of buckets, in `[1, 1024]`.
|
||||
|
||||
***
|
||||
|
||||
### specType
|
||||
|
||||
```ts
|
||||
specType: "bucket" | "identity" | "unsharded";
|
||||
```
|
||||
|
||||
One of `"bucket"`, `"identity"`, or `"unsharded"`.
|
||||
|
||||
***
|
||||
|
||||
### writerConfigDefaults?
|
||||
|
||||
```ts
|
||||
optional writerConfigDefaults: Record<string, string>;
|
||||
```
|
||||
|
||||
Default `ShardWriter` configuration recorded in the MemWAL index.
|
||||
29
docs/src/js/interfaces/RenameTableOptions.md
Normal file
29
docs/src/js/interfaces/RenameTableOptions.md
Normal file
@@ -0,0 +1,29 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RenameTableOptions
|
||||
|
||||
# Interface: RenameTableOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### namespacePath?
|
||||
|
||||
```ts
|
||||
optional namespacePath: string[];
|
||||
```
|
||||
|
||||
The namespace path of the table being renamed. Defaults to the root
|
||||
namespace (`[]`) when omitted.
|
||||
|
||||
***
|
||||
|
||||
### newNamespacePath?
|
||||
|
||||
```ts
|
||||
optional newNamespacePath: string[];
|
||||
```
|
||||
|
||||
The namespace path to move the table to as part of the rename. When
|
||||
omitted the table stays in `namespacePath`.
|
||||
47
docs/src/js/interfaces/RestNamespaceConfig.md
Normal file
47
docs/src/js/interfaces/RestNamespaceConfig.md
Normal file
@@ -0,0 +1,47 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RestNamespaceConfig
|
||||
|
||||
# Interface: RestNamespaceConfig
|
||||
|
||||
Configuration for the built-in REST namespace (`"rest"`).
|
||||
|
||||
The REST namespace talks to a remote catalog server over HTTP. See
|
||||
[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
|
||||
less-common knobs (TLS, metrics) live under
|
||||
[RestNamespaceConfig.extraProperties](RestNamespaceConfig.md#extraproperties).
|
||||
|
||||
## Properties
|
||||
|
||||
### extraProperties?
|
||||
|
||||
```ts
|
||||
optional extraProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Additional raw properties passed verbatim to the namespace
|
||||
implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`).
|
||||
Typed fields above take precedence on key collision.
|
||||
|
||||
***
|
||||
|
||||
### headers?
|
||||
|
||||
```ts
|
||||
optional headers: Record<string, string>;
|
||||
```
|
||||
|
||||
HTTP headers forwarded with each request. Keys are passed through
|
||||
as-is (e.g. `"x-api-key"`, `"Authorization"`).
|
||||
|
||||
***
|
||||
|
||||
### uri
|
||||
|
||||
```ts
|
||||
uri: string;
|
||||
```
|
||||
|
||||
Catalog endpoint URL.
|
||||
29
docs/src/js/interfaces/ScannableOptions.md
Normal file
29
docs/src/js/interfaces/ScannableOptions.md
Normal file
@@ -0,0 +1,29 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ScannableOptions
|
||||
|
||||
# Interface: ScannableOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### numRows?
|
||||
|
||||
```ts
|
||||
optional numRows: number;
|
||||
```
|
||||
|
||||
Hint about the number of rows. Not validated against the stream.
|
||||
|
||||
***
|
||||
|
||||
### rescannable?
|
||||
|
||||
```ts
|
||||
optional rescannable: boolean;
|
||||
```
|
||||
|
||||
Whether the source can be scanned more than once. Defaults to `true` for
|
||||
`fromTable` / `fromFactory` and `false` for `fromIterable` /
|
||||
`fromRecordBatchReader`.
|
||||
84
docs/src/js/interfaces/WriteProgress.md
Normal file
84
docs/src/js/interfaces/WriteProgress.md
Normal file
@@ -0,0 +1,84 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / WriteProgress
|
||||
|
||||
# Interface: WriteProgress
|
||||
|
||||
Progress snapshot for a write operation, delivered to the `progress`
|
||||
callback passed to [Table.add](../classes/Table.md#add).
|
||||
|
||||
## Properties
|
||||
|
||||
### activeTasks
|
||||
|
||||
```ts
|
||||
activeTasks: number;
|
||||
```
|
||||
|
||||
Number of parallel write tasks currently in flight.
|
||||
|
||||
***
|
||||
|
||||
### done
|
||||
|
||||
```ts
|
||||
done: boolean;
|
||||
```
|
||||
|
||||
`true` for the final callback; `false` otherwise.
|
||||
|
||||
***
|
||||
|
||||
### elapsedSeconds
|
||||
|
||||
```ts
|
||||
elapsedSeconds: number;
|
||||
```
|
||||
|
||||
Wall-clock seconds since the write started.
|
||||
|
||||
***
|
||||
|
||||
### outputBytes
|
||||
|
||||
```ts
|
||||
outputBytes: number;
|
||||
```
|
||||
|
||||
Number of bytes written so far.
|
||||
|
||||
***
|
||||
|
||||
### outputRows
|
||||
|
||||
```ts
|
||||
outputRows: number;
|
||||
```
|
||||
|
||||
Number of rows written so far.
|
||||
|
||||
***
|
||||
|
||||
### totalRows?
|
||||
|
||||
```ts
|
||||
optional totalRows: number;
|
||||
```
|
||||
|
||||
Total rows expected, when the input source reports it.
|
||||
|
||||
Always set on the final callback (the one with `done: true`), falling
|
||||
back to the actual number of rows written when the source could not
|
||||
report a row count up front.
|
||||
|
||||
***
|
||||
|
||||
### totalTasks
|
||||
|
||||
```ts
|
||||
totalTasks: number;
|
||||
```
|
||||
|
||||
Total number of parallel write tasks (the write parallelism).
|
||||
@@ -166,6 +166,12 @@ lists the indices that LanceDb supports.
|
||||
|
||||
::: lancedb.index.IvfFlat
|
||||
|
||||
::: lancedb.index.IvfSq
|
||||
|
||||
::: lancedb.index.IvfRq
|
||||
|
||||
::: lancedb.index.HnswFlat
|
||||
|
||||
::: lancedb.table.IndexStatistics
|
||||
|
||||
## Querying (Asynchronous)
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.29.1-beta.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.28.0-beta.11</version>
|
||||
<version>0.29.1-beta.0</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>7.0.0-beta.7</lance-core.version>
|
||||
<lance-core.version>7.0.0-beta.13</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
|
||||
@@ -3,11 +3,11 @@ The core Rust library is in the `../rust/lancedb` directory, the rust binding
|
||||
code is in the `src/` directory and the typescript bindings are in
|
||||
the `lancedb/` directory.
|
||||
|
||||
Whenever you change the Rust code, you will need to recompile: `npm run build`.
|
||||
Whenever you change the Rust code, you will need to recompile: `pnpm build`.
|
||||
|
||||
Common commands:
|
||||
* Build: `npm run build`
|
||||
* Lint: `npm run lint`
|
||||
* Fix lints: `npm run lint-fix`
|
||||
* Test: `npm test`
|
||||
* Run single test file: `npm test __test__/arrow.test.ts`
|
||||
* Build: `pnpm build`
|
||||
* Lint: `pnpm lint`
|
||||
* Fix lints: `pnpm lint-fix`
|
||||
* Test: `pnpm test`
|
||||
* Run single test file: `pnpm test __test__/arrow.test.ts`
|
||||
|
||||
@@ -12,20 +12,22 @@ Typescript.
|
||||
* `src/`: Rust bindings source code
|
||||
* `lancedb/`: Typescript package source code
|
||||
* `__test__/`: Unit tests
|
||||
* `examples/`: An npm package with the examples shown in the documentation
|
||||
* `examples/`: A pnpm package with the examples shown in the documentation
|
||||
|
||||
## Development environment
|
||||
|
||||
To set up your development environment, you will need to install the following:
|
||||
|
||||
1. Node.js 14 or later
|
||||
2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
1. Node.js 22 or later (required by pnpm 11)
|
||||
2. [pnpm](https://pnpm.io/installation) 11 or later (or run via `corepack enable`,
|
||||
which uses the `packageManager` field in `package.json`)
|
||||
3. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
4. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
|
||||
Initial setup:
|
||||
|
||||
```shell
|
||||
npm install
|
||||
pnpm install
|
||||
```
|
||||
|
||||
### Commit Hooks
|
||||
@@ -39,38 +41,38 @@ pre-commit install
|
||||
|
||||
## Development
|
||||
|
||||
Most common development commands can be run using the npm scripts.
|
||||
Most common development commands can be run using the pnpm scripts.
|
||||
|
||||
Build the package
|
||||
|
||||
```shell
|
||||
npm install
|
||||
npm run build
|
||||
pnpm install
|
||||
pnpm build
|
||||
```
|
||||
|
||||
Lint:
|
||||
|
||||
```shell
|
||||
npm run lint
|
||||
pnpm lint
|
||||
```
|
||||
|
||||
Format and fix lints:
|
||||
|
||||
```shell
|
||||
npm run lint-fix
|
||||
pnpm lint-fix
|
||||
```
|
||||
|
||||
Run tests:
|
||||
|
||||
```shell
|
||||
npm test
|
||||
pnpm test
|
||||
```
|
||||
|
||||
To run a single test:
|
||||
|
||||
```shell
|
||||
# Single file: table.test.ts
|
||||
npm test -- table.test.ts
|
||||
pnpm test -- table.test.ts
|
||||
# Single test: 'merge insert' in table.test.ts
|
||||
npm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
pnpm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
```
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.28.0-beta.11"
|
||||
version = "0.29.1-beta.0"
|
||||
publish = false
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
@@ -22,6 +22,7 @@ arrow-schema.workspace = true
|
||||
env_logger.workspace = true
|
||||
futures.workspace = true
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-namespace.workspace = true
|
||||
napi = { version = "3.8.3", default-features = false, features = [
|
||||
"napi9",
|
||||
"async"
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import { readdirSync } from "fs";
|
||||
import { Field, Float64, Schema } from "apache-arrow";
|
||||
import * as tmp from "tmp";
|
||||
import { Connection, Table, connect } from "../lancedb";
|
||||
import { Connection, Table, connect, connectNamespace } from "../lancedb";
|
||||
import { LocalTable } from "../lancedb/table";
|
||||
|
||||
describe("when connecting", () => {
|
||||
@@ -47,6 +47,14 @@ describe("given a connection", () => {
|
||||
await db.close();
|
||||
expect(db.isOpen()).toBe(false);
|
||||
await expect(db.tableNames()).rejects.toThrow("Connection is closed");
|
||||
await expect(db.renameTable("a", "b")).rejects.toThrow(
|
||||
"Connection is closed",
|
||||
);
|
||||
});
|
||||
|
||||
it("should report renameTable as unsupported on an OSS connection", async () => {
|
||||
await db.createTable("a", [{ id: 1 }]);
|
||||
await expect(db.renameTable("a", "b")).rejects.toThrow(/not supported/);
|
||||
});
|
||||
it("should be able to create a table from an object arg `createTable(options)`, or args `createTable(name, data, options)`", async () => {
|
||||
let tbl = await db.createTable("test", [{ id: 1 }, { id: 2 }]);
|
||||
@@ -306,3 +314,186 @@ describe("clone table functionality", () => {
|
||||
).rejects.toThrow("Deep clone is not yet implemented");
|
||||
});
|
||||
});
|
||||
|
||||
describe("namespaces", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let db: Connection;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
// The local DirectoryNamespace backend only supports child namespaces
|
||||
// when manifest mode is enabled (see lance-namespace-impls/src/dir.rs).
|
||||
db = await connect(tmpDir.name, {
|
||||
// biome-ignore lint/style/useNamingConvention: opaque backend property key, must match Rust
|
||||
namespaceClientProperties: { manifest_enabled: "true" },
|
||||
});
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
it("should create and describe a namespace", async () => {
|
||||
await db.createNamespace(["myns"]);
|
||||
const desc = await db.describeNamespace(["myns"]);
|
||||
expect(desc).toBeDefined();
|
||||
});
|
||||
|
||||
it("should list namespaces created at the root", async () => {
|
||||
await db.createNamespace(["alpha"]);
|
||||
await db.createNamespace(["beta"]);
|
||||
const list = await db.listNamespaces();
|
||||
expect(list.namespaces).toEqual(expect.arrayContaining(["alpha", "beta"]));
|
||||
});
|
||||
|
||||
it("should list child namespaces under a parent", async () => {
|
||||
await db.createNamespace(["parent"]);
|
||||
await db.createNamespace(["parent", "child"]);
|
||||
const list = await db.listNamespaces(["parent"]);
|
||||
expect(list.namespaces).toContain("child");
|
||||
});
|
||||
|
||||
it("should drop a namespace", async () => {
|
||||
await db.createNamespace(["ephemeral"]);
|
||||
await db.dropNamespace(["ephemeral"]);
|
||||
const list = await db.listNamespaces();
|
||||
expect(list.namespaces).not.toContain("ephemeral");
|
||||
});
|
||||
|
||||
it("should raise an error on any namespace op after close", async () => {
|
||||
await db.close();
|
||||
await expect(db.describeNamespace(["foo"])).rejects.toThrow(
|
||||
"Connection is closed",
|
||||
);
|
||||
await expect(db.listNamespaces()).rejects.toThrow("Connection is closed");
|
||||
await expect(db.createNamespace(["foo"])).rejects.toThrow(
|
||||
"Connection is closed",
|
||||
);
|
||||
await expect(db.dropNamespace(["foo"])).rejects.toThrow(
|
||||
"Connection is closed",
|
||||
);
|
||||
});
|
||||
|
||||
it("should raise an understandable error when describing a non-existent namespace", async () => {
|
||||
await expect(db.describeNamespace(["does-not-exist"])).rejects.toThrow(
|
||||
/not found/i,
|
||||
);
|
||||
});
|
||||
|
||||
it("should raise an error when creating a namespace that already exists", async () => {
|
||||
await db.createNamespace(["dup"]);
|
||||
await expect(db.createNamespace(["dup"])).rejects.toThrow();
|
||||
});
|
||||
|
||||
it("should reject an unrecognized createNamespace mode with a clear error", async () => {
|
||||
await expect(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
|
||||
db.createNamespace(["x"], { mode: "frobnicate" as any }),
|
||||
).rejects.toThrow(/Invalid mode 'frobnicate'/);
|
||||
});
|
||||
|
||||
it("should reject an unrecognized dropNamespace mode with a clear error", async () => {
|
||||
await db.createNamespace(["x"]);
|
||||
await expect(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
|
||||
db.dropNamespace(["x"], { mode: "frobnicate" as any }),
|
||||
).rejects.toThrow(/Invalid mode 'frobnicate'/);
|
||||
});
|
||||
|
||||
it("should reject an unrecognized dropNamespace behavior with a clear error", async () => {
|
||||
await db.createNamespace(["x"]);
|
||||
await expect(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: deliberately bypass TS to test runtime validation
|
||||
db.dropNamespace(["x"], { behavior: "frobnicate" as any }),
|
||||
).rejects.toThrow(/Invalid behavior 'frobnicate'/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("connectNamespace", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
beforeEach(() => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
it("connects via the dir implementation and supports table ops", async () => {
|
||||
const db = await connectNamespace("dir", { root: tmpDir.name });
|
||||
await db.createTable("users", [{ id: 1 }, { id: 2 }]);
|
||||
await expect(db.tableNames()).resolves.toContain("users");
|
||||
});
|
||||
|
||||
it("throws a clear error when implName is empty", async () => {
|
||||
await expect(connectNamespace("", {})).rejects.toThrow(
|
||||
"implName must be a non-empty string",
|
||||
);
|
||||
});
|
||||
|
||||
it("throws when the namespace implementation is unknown", async () => {
|
||||
await expect(connectNamespace("not-a-real-impl", {})).rejects.toThrow();
|
||||
});
|
||||
|
||||
it("passes storage options through to the namespace", async () => {
|
||||
const db = await connectNamespace(
|
||||
"dir",
|
||||
{ root: tmpDir.name },
|
||||
{ storageOptions: { newTableDataStorageVersion: "stable" } },
|
||||
);
|
||||
await db.createTable("plumbing", [{ id: 1 }]);
|
||||
await expect(db.tableNames()).resolves.toContain("plumbing");
|
||||
});
|
||||
|
||||
it("supports child namespaces when manifestEnabled is true on the dir config", async () => {
|
||||
const writer = await connectNamespace("dir", {
|
||||
root: tmpDir.name,
|
||||
manifestEnabled: true,
|
||||
});
|
||||
await writer.createNamespace(["analytics"]);
|
||||
await writer.createTable("orders", [{ id: 1 }, { id: 2 }], ["analytics"]);
|
||||
await writer.close();
|
||||
|
||||
const reader = await connectNamespace("dir", {
|
||||
root: tmpDir.name,
|
||||
manifestEnabled: true,
|
||||
});
|
||||
await expect(reader.tableNames(["analytics"])).resolves.toContain("orders");
|
||||
const orders = await reader.openTable("orders", ["analytics"]);
|
||||
await expect(orders.countRows()).resolves.toBe(2);
|
||||
});
|
||||
|
||||
it("merges extraProperties into the dir config and is overridden by typed fields", async () => {
|
||||
// Two observable assertions:
|
||||
// - Typed `root` overrides extraProperties.root: createTable would fail
|
||||
// under the bogus path if the override didn't happen.
|
||||
// - extraProperties.manifest_enabled="false" is honored end-to-end. Child
|
||||
// namespaces require manifest mode (default true), so explicitly
|
||||
// disabling it via extraProperties must make createNamespace reject. If
|
||||
// extraProperties pass-through were silently broken, the default would
|
||||
// let createNamespace succeed.
|
||||
const db = await connectNamespace("dir", {
|
||||
root: tmpDir.name,
|
||||
extraProperties: {
|
||||
root: "/should/be/overridden",
|
||||
// biome-ignore lint/style/useNamingConvention: backend property key
|
||||
manifest_enabled: "false",
|
||||
},
|
||||
});
|
||||
await db.createTable("base", [{ id: 1 }]);
|
||||
await expect(db.tableNames()).resolves.toContain("base");
|
||||
await expect(db.createNamespace(["analytics"])).rejects.toThrow();
|
||||
});
|
||||
|
||||
it("flows unknown top-level keys through when implName is dynamic (no silent drop)", async () => {
|
||||
// Routes via the third overload because `impl` is `string`, not the
|
||||
// literal `"dir"`. The dispatcher still notices the runtime value is
|
||||
// "dir", but unknown keys like `manifest_enabled` must not be silently
|
||||
// dropped during the conversion.
|
||||
//
|
||||
// Asserting a *negative* outcome (manifest disabled -> createNamespace
|
||||
// rejects) is required for observability, since the backend default for
|
||||
// `manifest_enabled` is true.
|
||||
const impl: string = "dir";
|
||||
const db = await connectNamespace(impl, {
|
||||
root: tmpDir.name,
|
||||
// biome-ignore lint/style/useNamingConvention: backend property key
|
||||
manifest_enabled: "false",
|
||||
});
|
||||
await expect(db.createNamespace(["mixed"])).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -109,3 +109,209 @@ describe("Query outputSchema", () => {
|
||||
expect(schema.fields.length).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Query orderBy", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let table: Table;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
const db = await connect(tmpDir.name);
|
||||
|
||||
// Create table with numeric data for sorting
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int64(), true),
|
||||
new Field("score", new Float32(), true),
|
||||
new Field("name", new Utf8(), true),
|
||||
]);
|
||||
|
||||
const data = makeArrowTable(
|
||||
[
|
||||
{ id: 1n, score: 3.5, name: "charlie" },
|
||||
{ id: 2n, score: 1.2, name: "alice" },
|
||||
{ id: 3n, score: 2.8, name: "bob" },
|
||||
{ id: 4n, score: 0.5, name: "david" },
|
||||
{ id: 5n, score: 4.1, name: "eve" },
|
||||
],
|
||||
{ schema },
|
||||
);
|
||||
table = await db.createTable("test", data);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
tmpDir.removeCallback();
|
||||
});
|
||||
|
||||
it("should sort by single column ascending", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "score", ascending: true, nullsFirst: false })
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(5);
|
||||
// Verify ascending order
|
||||
expect(results[0].score).toBeCloseTo(0.5, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(1.2, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(results[3].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[4].score).toBeCloseTo(4.1, 0.001);
|
||||
});
|
||||
|
||||
it("should sort by single column descending", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "score", ascending: false, nullsFirst: false })
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(5);
|
||||
// Verify descending order
|
||||
expect(results[0].score).toBeCloseTo(4.1, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(results[3].score).toBeCloseTo(1.2, 0.001);
|
||||
expect(results[4].score).toBeCloseTo(0.5, 0.001);
|
||||
});
|
||||
|
||||
it("should use ascending as default direction", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "score" })
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(5);
|
||||
// Verify ascending order (default)
|
||||
expect(results[0].score).toBeCloseTo(0.5, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(1.2, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(results[3].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[4].score).toBeCloseTo(4.1, 0.001);
|
||||
});
|
||||
|
||||
it("should sort by string column", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "name" })
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(5);
|
||||
// Verify alphabetical order
|
||||
expect(results[0].name).toBe("alice");
|
||||
expect(results[1].name).toBe("bob");
|
||||
expect(results[2].name).toBe("charlie");
|
||||
expect(results[3].name).toBe("david");
|
||||
expect(results[4].name).toBe("eve");
|
||||
});
|
||||
|
||||
it("should support method chaining with where", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.where("score > 2.0")
|
||||
.orderBy({ columnName: "score" })
|
||||
.toArray();
|
||||
expect(results.length).toBe(3);
|
||||
// Verify filtered and sorted
|
||||
expect(results[0].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(4.1, 0.001);
|
||||
});
|
||||
|
||||
it("should support method chaining with limit", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "score", ascending: false })
|
||||
.limit(3)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(3);
|
||||
// Verify top 3 in descending order
|
||||
expect(results[0].score).toBeCloseTo(4.1, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(2.8, 0.001);
|
||||
});
|
||||
|
||||
it("should support method chaining with offset", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "score" })
|
||||
.offset(2)
|
||||
.limit(2)
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(2);
|
||||
// Verify results skip first 2 and take next 2
|
||||
expect(results[0].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(3.5, 0.001);
|
||||
});
|
||||
|
||||
it("should support method chaining with select", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.orderBy({ columnName: "name" })
|
||||
.select(["name", "score"])
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(5);
|
||||
// Verify only selected columns are present
|
||||
expect(Object.keys(results[0])).toEqual(["name", "score"]);
|
||||
expect(Object.keys(results[4])).toEqual(["name", "score"]);
|
||||
// Verify sorted by name
|
||||
expect(results[0].name).toBe("alice");
|
||||
expect(results[4].name).toBe("eve");
|
||||
});
|
||||
|
||||
it("should support complex method chaining", async () => {
|
||||
const results = await table
|
||||
.query()
|
||||
.where("score > 1.0")
|
||||
.orderBy({ columnName: "score", ascending: false })
|
||||
.limit(3)
|
||||
.select(["id", "score", "name"])
|
||||
.toArray();
|
||||
|
||||
expect(results.length).toBe(3);
|
||||
// Verify filtered, sorted, limited, and projected
|
||||
expect(results[0].score).toBeCloseTo(4.1, 0.001);
|
||||
expect(results[1].score).toBeCloseTo(3.5, 0.001);
|
||||
expect(results[2].score).toBeCloseTo(2.8, 0.001);
|
||||
expect(Object.keys(results[0])).toEqual(["id", "score", "name"]);
|
||||
});
|
||||
|
||||
it("should support multi-column ordering and null placement", async () => {
|
||||
const schema = new Schema([
|
||||
new Field("group", new Int64(), true),
|
||||
new Field("score", new Float32(), true),
|
||||
new Field("name", new Utf8(), true),
|
||||
]);
|
||||
|
||||
const data = makeArrowTable(
|
||||
[
|
||||
{ group: 1n, score: null, name: "z" },
|
||||
{ group: 1n, score: 1.0, name: "b" },
|
||||
{ group: 1n, score: 1.0, name: "a" },
|
||||
{ group: 2n, score: 0.5, name: "c" },
|
||||
],
|
||||
{ schema },
|
||||
);
|
||||
const nullTable = await (await connect(tmpDir.name)).createTable(
|
||||
"test_multi_order",
|
||||
data,
|
||||
{ mode: "overwrite" },
|
||||
);
|
||||
|
||||
const results = await nullTable
|
||||
.query()
|
||||
.orderBy([
|
||||
{ columnName: "group", ascending: true, nullsFirst: false },
|
||||
{ columnName: "score", ascending: true, nullsFirst: true },
|
||||
{ columnName: "name", ascending: true, nullsFirst: false },
|
||||
])
|
||||
.toArray();
|
||||
|
||||
expect(results.map((r) => [r.group, r.score, r.name])).toEqual([
|
||||
[1n, null, "z"],
|
||||
[1n, 1.0, "a"],
|
||||
[1n, 1.0, "b"],
|
||||
[2n, 0.5, "c"],
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -617,4 +617,68 @@ describe("remote connection", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("renameTable", () => {
|
||||
async function captureRenameRequest(
|
||||
call: (db: Connection) => Promise<void>,
|
||||
): Promise<{ url: string; body: Record<string, unknown> }> {
|
||||
let captured: { url: string; body: Record<string, unknown> } | undefined;
|
||||
await withMockDatabase((req, res) => {
|
||||
let raw = "";
|
||||
req.on("data", (chunk) => {
|
||||
raw += chunk;
|
||||
});
|
||||
req.on("end", () => {
|
||||
captured = {
|
||||
url: req.url ?? "",
|
||||
body: raw ? JSON.parse(raw) : {},
|
||||
};
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end("");
|
||||
});
|
||||
}, call);
|
||||
if (!captured) {
|
||||
throw new Error("mock server never saw a request");
|
||||
}
|
||||
return captured;
|
||||
}
|
||||
|
||||
it("sends rename request for a table in the root namespace", async () => {
|
||||
const { url, body } = await captureRenameRequest(async (db) => {
|
||||
await db.renameTable("table1", "table2");
|
||||
});
|
||||
expect(url).toBe("/v1/table/table1/rename/");
|
||||
// biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
|
||||
expect(body).toEqual({ new_table_name: "table2" });
|
||||
});
|
||||
|
||||
it("omits new_namespace when only the current namespace is supplied", async () => {
|
||||
// Safe-default check: passing namespacePath alone must not send
|
||||
// `new_namespace`, so the server keeps the table in its current
|
||||
// namespace instead of silently moving it to root.
|
||||
const { url, body } = await captureRenameRequest(async (db) => {
|
||||
await db.renameTable("table1", "table2", {
|
||||
namespacePath: ["ns1"],
|
||||
});
|
||||
});
|
||||
expect(url).toBe("/v1/table/ns1$table1/rename/");
|
||||
// biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
|
||||
expect(body).toEqual({ new_table_name: "table2" });
|
||||
});
|
||||
|
||||
it("includes new_namespace in the body for a cross-namespace rename", async () => {
|
||||
const { url, body } = await captureRenameRequest(async (db) => {
|
||||
await db.renameTable("table1", "table2", {
|
||||
namespacePath: ["ns1"],
|
||||
newNamespacePath: ["ns2"],
|
||||
});
|
||||
});
|
||||
expect(url).toBe("/v1/table/ns1$table1/rename/");
|
||||
expect(body).toEqual({
|
||||
// biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
|
||||
new_table_name: "table2",
|
||||
// biome-ignore lint/style/useNamingConvention: snake_case mandated by the server wire format
|
||||
new_namespace: ["ns2"],
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
438
nodejs/__test__/scannable.test.ts
Normal file
438
nodejs/__test__/scannable.test.ts
Normal file
@@ -0,0 +1,438 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import {
|
||||
Field,
|
||||
Float16,
|
||||
Int32,
|
||||
type RecordBatch,
|
||||
RecordBatchReader,
|
||||
Schema,
|
||||
tableToIPC,
|
||||
} from "apache-arrow";
|
||||
import { makeArrowTable, makeEmptyTable } from "../lancedb/arrow";
|
||||
import { Scannable } from "../lancedb/scannable";
|
||||
|
||||
function makeTable() {
|
||||
return makeArrowTable(
|
||||
[
|
||||
{ id: 1, name: "a" },
|
||||
{ id: 2, name: "b" },
|
||||
{ id: 3, name: "c" },
|
||||
],
|
||||
{ vectorColumns: {} },
|
||||
);
|
||||
}
|
||||
|
||||
async function makeReader(): Promise<RecordBatchReader> {
|
||||
// `RecordBatchReader.from()` returns an unopened reader; `.schema` is only
|
||||
// populated after `.open()`. Opening sync readers is synchronous.
|
||||
const reader = RecordBatchReader.from(tableToIPC(makeTable()));
|
||||
return reader.open() as RecordBatchReader;
|
||||
}
|
||||
|
||||
describe("Scannable", () => {
|
||||
describe("fromTable", () => {
|
||||
test("reflects schema, numRows, and defaults rescannable=true", async () => {
|
||||
const table = makeTable();
|
||||
const scannable = await Scannable.fromTable(table);
|
||||
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.numRows).toBe(table.numRows);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
test("throws when opts.numRows does not match table.numRows", async () => {
|
||||
await expect(
|
||||
Scannable.fromTable(makeTable(), { numRows: 42 }),
|
||||
).rejects.toThrow(/does not match table\.numRows/);
|
||||
});
|
||||
|
||||
test("throws when opts.rescannable is false", async () => {
|
||||
await expect(
|
||||
Scannable.fromTable(makeTable(), { rescannable: false }),
|
||||
).rejects.toThrow(/always rescannable/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("fromRecordBatchReader", () => {
|
||||
test("reflects schema and defaults numRows=null, rescannable=false", async () => {
|
||||
const reader = await makeReader();
|
||||
const scannable = await Scannable.fromRecordBatchReader(reader);
|
||||
|
||||
expect(scannable.schema).toBe(reader.schema);
|
||||
expect(scannable.numRows).toBeNull();
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
test("honors numRows override", async () => {
|
||||
const scannable = await Scannable.fromRecordBatchReader(
|
||||
await makeReader(),
|
||||
{ numRows: 3 },
|
||||
);
|
||||
|
||||
expect(scannable.numRows).toBe(3);
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
test("rescannable: false explicit does not throw", async () => {
|
||||
const reader = await makeReader();
|
||||
const scannable = await Scannable.fromRecordBatchReader(reader, {
|
||||
rescannable: false,
|
||||
});
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
test("throws when opts.rescannable is true", async () => {
|
||||
const reader = await makeReader();
|
||||
await expect(
|
||||
Scannable.fromRecordBatchReader(reader, { rescannable: true }),
|
||||
).rejects.toThrow(/does not accept rescannable/);
|
||||
});
|
||||
|
||||
test("throws when opts.rescannable is true even alongside numRows", async () => {
|
||||
const reader = await makeReader();
|
||||
await expect(
|
||||
Scannable.fromRecordBatchReader(reader, {
|
||||
numRows: 3,
|
||||
rescannable: true,
|
||||
}),
|
||||
).rejects.toThrow(/does not accept rescannable/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("fromIterable", () => {
|
||||
test("accepts a sync iterable of batches", async () => {
|
||||
const table = makeTable();
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
table.batches,
|
||||
);
|
||||
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.numRows).toBeNull();
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
test("accepts an async iterable of batches", async () => {
|
||||
const table = makeTable();
|
||||
async function* generator(): AsyncGenerator<RecordBatch> {
|
||||
for (const batch of table.batches) {
|
||||
yield batch;
|
||||
}
|
||||
}
|
||||
|
||||
const scannable = await Scannable.fromIterable(table.schema, generator());
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
describe("rescannable: true detection", () => {
|
||||
// Replayable inputs: [Symbol.iterator]() / [Symbol.asyncIterator]()
|
||||
// returns a fresh iterator each call. Must NOT throw.
|
||||
|
||||
test("Array passes (fresh ArrayIterator each call)", async () => {
|
||||
const table = makeTable();
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
table.batches,
|
||||
{ rescannable: true },
|
||||
);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
test("Set passes (fresh SetIterator each call)", async () => {
|
||||
const table = makeTable();
|
||||
const set = new Set<RecordBatch>(table.batches);
|
||||
const scannable = await Scannable.fromIterable(table.schema, set, {
|
||||
rescannable: true,
|
||||
});
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
test("custom Iterable returning a fresh iterator passes", async () => {
|
||||
const table = makeTable();
|
||||
const replayable: Iterable<RecordBatch> = {
|
||||
[Symbol.iterator]() {
|
||||
return table.batches[Symbol.iterator]();
|
||||
},
|
||||
};
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
replayable,
|
||||
{ rescannable: true },
|
||||
);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
test("object with generator method passes (fresh generator each call)", async () => {
|
||||
const table = makeTable();
|
||||
const replayable: Iterable<RecordBatch> = {
|
||||
*[Symbol.iterator]() {
|
||||
for (const batch of table.batches) yield batch;
|
||||
},
|
||||
};
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
replayable,
|
||||
{ rescannable: true },
|
||||
);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
test("empty Array passes (replayable degenerate case)", async () => {
|
||||
const schema = makeTable().schema;
|
||||
const scannable = await Scannable.fromIterable(
|
||||
schema,
|
||||
[] as RecordBatch[],
|
||||
{ rescannable: true },
|
||||
);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
});
|
||||
|
||||
// One-shot inputs: [Symbol.iterator]() / [Symbol.asyncIterator]()
|
||||
// returns the same object, or the input is already-an-iterator.
|
||||
// Must throw with a /one-shot/ message.
|
||||
|
||||
test("sync generator throws", async () => {
|
||||
const table = makeTable();
|
||||
function* generator(): Generator<RecordBatch> {
|
||||
for (const batch of table.batches) yield batch;
|
||||
}
|
||||
await expect(
|
||||
Scannable.fromIterable(table.schema, generator(), {
|
||||
rescannable: true,
|
||||
}),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
test("async generator throws", async () => {
|
||||
const table = makeTable();
|
||||
async function* generator(): AsyncGenerator<RecordBatch> {
|
||||
for (const batch of table.batches) yield batch;
|
||||
}
|
||||
await expect(
|
||||
Scannable.fromIterable(table.schema, generator(), {
|
||||
rescannable: true,
|
||||
}),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
test("empty generator throws (one-shot degenerate case)", async () => {
|
||||
const schema = makeTable().schema;
|
||||
function* generator(): Generator<RecordBatch> {
|
||||
// intentionally empty; yields nothing.
|
||||
}
|
||||
await expect(
|
||||
Scannable.fromIterable(schema, generator(), { rescannable: true }),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
test("custom self-iterator throws", async () => {
|
||||
const table = makeTable();
|
||||
const batches = table.batches;
|
||||
let i = 0;
|
||||
const oneShot: Iterable<RecordBatch> & Iterator<RecordBatch> = {
|
||||
[Symbol.iterator]() {
|
||||
return this;
|
||||
},
|
||||
next() {
|
||||
if (i >= batches.length) {
|
||||
return { done: true, value: undefined };
|
||||
}
|
||||
return { done: false, value: batches[i++] };
|
||||
},
|
||||
};
|
||||
await expect(
|
||||
Scannable.fromIterable(table.schema, oneShot, { rescannable: true }),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
test("Array.values() (IterableIterator) throws", async () => {
|
||||
const table = makeTable();
|
||||
const iter = table.batches.values();
|
||||
await expect(
|
||||
Scannable.fromIterable(table.schema, iter, { rescannable: true }),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
test("raw iterator (only `.next`) throws", async () => {
|
||||
const table = makeTable();
|
||||
const batches = table.batches;
|
||||
let i = 0;
|
||||
const rawIter = {
|
||||
next(): IteratorResult<RecordBatch> {
|
||||
if (i >= batches.length) {
|
||||
return { done: true, value: undefined };
|
||||
}
|
||||
return { done: false, value: batches[i++] };
|
||||
},
|
||||
};
|
||||
await expect(
|
||||
Scannable.fromIterable(
|
||||
table.schema,
|
||||
rawIter as unknown as Iterable<RecordBatch>,
|
||||
{ rescannable: true },
|
||||
),
|
||||
).rejects.toThrow(/one-shot/);
|
||||
});
|
||||
|
||||
// Edge: null/undefined must not crash the detection helper. The
|
||||
// null check belongs to `normalizeIterator` and only fires when a
|
||||
// scan starts.
|
||||
|
||||
test("null input does not crash detection at construction", async () => {
|
||||
const schema = makeTable().schema;
|
||||
await expect(
|
||||
Scannable.fromIterable(
|
||||
schema,
|
||||
null as unknown as Iterable<RecordBatch>,
|
||||
{
|
||||
rescannable: true,
|
||||
},
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
});
|
||||
|
||||
test("undefined input does not crash detection at construction", async () => {
|
||||
const schema = makeTable().schema;
|
||||
await expect(
|
||||
Scannable.fromIterable(
|
||||
schema,
|
||||
undefined as unknown as Iterable<RecordBatch>,
|
||||
{ rescannable: true },
|
||||
),
|
||||
).resolves.toBeDefined();
|
||||
});
|
||||
|
||||
// Default (rescannable omitted) skips the check entirely, so even
|
||||
// pathological inputs construct without throwing here.
|
||||
|
||||
test("rescannable omitted skips detection entirely (generator passes)", async () => {
|
||||
const table = makeTable();
|
||||
function* generator(): Generator<RecordBatch> {
|
||||
for (const batch of table.batches) yield batch;
|
||||
}
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
generator(),
|
||||
);
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
|
||||
test("rescannable: false explicit skips detection entirely (generator passes)", async () => {
|
||||
const table = makeTable();
|
||||
function* generator(): Generator<RecordBatch> {
|
||||
for (const batch of table.batches) yield batch;
|
||||
}
|
||||
const scannable = await Scannable.fromIterable(
|
||||
table.schema,
|
||||
generator(),
|
||||
{ rescannable: false },
|
||||
);
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("fromFactory", () => {
|
||||
test("defaults rescannable=true and does not invoke the factory eagerly", async () => {
|
||||
const table = makeTable();
|
||||
const factory = jest.fn(() => table.batches);
|
||||
|
||||
const scannable = await Scannable.fromFactory(table.schema, factory);
|
||||
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.rescannable).toBe(true);
|
||||
expect(factory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
test("honors rescannable and numRows overrides", async () => {
|
||||
const table = makeTable();
|
||||
const scannable = await Scannable.fromFactory(
|
||||
table.schema,
|
||||
() => table.batches,
|
||||
{ numRows: 7, rescannable: false },
|
||||
);
|
||||
|
||||
expect(scannable.numRows).toBe(7);
|
||||
expect(scannable.rescannable).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("validation", () => {
|
||||
test("throws when numRows is negative", async () => {
|
||||
await expect(
|
||||
Scannable.fromFactory(makeTable().schema, () => [], { numRows: -1 }),
|
||||
).rejects.toThrow(/non-negative/);
|
||||
});
|
||||
|
||||
test("throws when numRows is not an integer", async () => {
|
||||
await expect(
|
||||
Scannable.fromFactory(makeTable().schema, () => [], { numRows: 3.5 }),
|
||||
).rejects.toThrow(/integer/);
|
||||
});
|
||||
});
|
||||
|
||||
describe("native handle", () => {
|
||||
test("exposes a native handle via inner", async () => {
|
||||
const scannable = await Scannable.fromTable(makeTable());
|
||||
expect(scannable.inner).toBeDefined();
|
||||
expect(typeof scannable.inner).toBe("object");
|
||||
expect(scannable.inner).not.toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// Schema-variety construction tests. Each asserts that construction
|
||||
// succeeds against a richer Arrow schema, which transitively exercises
|
||||
// schema serialization and the Rust-side `ipc_file_to_schema` for types
|
||||
// beyond flat primitives.
|
||||
describe("schema variety", () => {
|
||||
test("accepts an empty table", async () => {
|
||||
const schema = new Schema([new Field("id", new Int32(), true)]);
|
||||
const table = makeEmptyTable(schema);
|
||||
const scannable = await Scannable.fromTable(table);
|
||||
|
||||
expect(scannable.numRows).toBe(0);
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
});
|
||||
|
||||
test("accepts nested struct and list columns", async () => {
|
||||
const table = makeArrowTable(
|
||||
[
|
||||
{ id: 1, point: { x: 0, y: 0 }, tags: ["a", "b"] },
|
||||
{ id: 2, point: { x: 1, y: 2 }, tags: ["c"] },
|
||||
],
|
||||
{ vectorColumns: {} },
|
||||
);
|
||||
const scannable = await Scannable.fromTable(table);
|
||||
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.numRows).toBe(2);
|
||||
});
|
||||
|
||||
test("accepts a FixedSizeList (vector) column", async () => {
|
||||
const table = makeArrowTable(
|
||||
[
|
||||
{ id: 1, vec: [1, 2, 3] },
|
||||
{ id: 2, vec: [4, 5, 6] },
|
||||
],
|
||||
{ vectorColumns: { vec: { type: new Float16() } } },
|
||||
);
|
||||
const scannable = await Scannable.fromTable(table);
|
||||
|
||||
expect(scannable.schema).toBe(table.schema);
|
||||
expect(scannable.numRows).toBe(2);
|
||||
});
|
||||
|
||||
test("accepts a table with many columns", async () => {
|
||||
const row: Record<string, number> = {};
|
||||
for (let i = 0; i < 50; i++) row[`c${i}`] = i;
|
||||
const table = makeArrowTable([row, row], { vectorColumns: {} });
|
||||
const scannable = await Scannable.fromTable(table);
|
||||
|
||||
expect(scannable.schema.fields.length).toBe(50);
|
||||
expect(scannable.numRows).toBe(2);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -115,6 +115,48 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
await expect(table.countRows()).resolves.toBe(1);
|
||||
});
|
||||
|
||||
it("should invoke the progress callback", async () => {
|
||||
const events: import("../lancedb").WriteProgress[] = [];
|
||||
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }], {
|
||||
progress: (p) => events.push(p),
|
||||
});
|
||||
|
||||
expect(events.length).toBeGreaterThan(0);
|
||||
const last = events[events.length - 1];
|
||||
expect(last.done).toBe(true);
|
||||
// Earlier callbacks must have done=false.
|
||||
for (const ev of events.slice(0, -1)) {
|
||||
expect(ev.done).toBe(false);
|
||||
}
|
||||
// outputRows reflects the rows added in this call, not table size.
|
||||
expect(last.outputRows).toBe(3);
|
||||
// The input source (an array) reports a row count, so totalRows is set.
|
||||
expect(last.totalRows).toBe(3);
|
||||
// outputRows is monotonic.
|
||||
for (let i = 1; i < events.length; i++) {
|
||||
expect(events[i].outputRows).toBeGreaterThanOrEqual(
|
||||
events[i - 1].outputRows,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it("should swallow errors thrown from the progress callback", async () => {
|
||||
const warn = jest
|
||||
.spyOn(console, "warn")
|
||||
.mockImplementation(() => undefined);
|
||||
try {
|
||||
const res = await table.add([{ id: 1 }, { id: 2 }], {
|
||||
progress: () => {
|
||||
throw new Error("callback bomb");
|
||||
},
|
||||
});
|
||||
expect(res.version).toBeGreaterThan(0);
|
||||
expect(warn).toHaveBeenCalled();
|
||||
} finally {
|
||||
warn.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
it("should let me close the table", async () => {
|
||||
expect(table.isOpen()).toBe(true);
|
||||
table.close();
|
||||
@@ -2348,3 +2390,130 @@ describe("when creating a table with Float32Array vectors", () => {
|
||||
expect((fsl.children[0].type as Float32).precision).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("setUnenforcedPrimaryKey", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
it("sets a single-column primary key (string or one-element array)", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int64(), false),
|
||||
]);
|
||||
const t1 = await conn.createEmptyTable("t1", schema);
|
||||
await t1.setUnenforcedPrimaryKey("id");
|
||||
|
||||
const t2 = await conn.createEmptyTable("t2", schema);
|
||||
await t2.setUnenforcedPrimaryKey(["id"]);
|
||||
});
|
||||
|
||||
it("rejects a compound primary key", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await conn.createEmptyTable(
|
||||
"t",
|
||||
new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int64(), false),
|
||||
new arrow.Field("name", new arrow.Utf8(), false),
|
||||
]),
|
||||
);
|
||||
await expect(
|
||||
table.setUnenforcedPrimaryKey(["id", "name"]),
|
||||
).rejects.toThrow();
|
||||
});
|
||||
|
||||
it("rejects changing the primary key once set", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await conn.createEmptyTable(
|
||||
"t",
|
||||
new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int64(), false),
|
||||
new arrow.Field("name", new arrow.Utf8(), false),
|
||||
]),
|
||||
);
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await expect(table.setUnenforcedPrimaryKey("name")).rejects.toThrow();
|
||||
await expect(table.setUnenforcedPrimaryKey("id")).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("setLsmWriteSpec / unsetLsmWriteSpec", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
async function makeTable(conn: Connection): Promise<Table> {
|
||||
return await conn.createEmptyTable(
|
||||
"t",
|
||||
new arrow.Schema([new arrow.Field("id", new arrow.Int64(), false)]),
|
||||
);
|
||||
}
|
||||
|
||||
it("installs and removes a bucket spec", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await makeTable(conn);
|
||||
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "id",
|
||||
numBuckets: 4,
|
||||
});
|
||||
await table.unsetLsmWriteSpec();
|
||||
// A second unset errors — there is no spec left to remove.
|
||||
await expect(table.unsetLsmWriteSpec()).rejects.toThrow();
|
||||
// A fresh spec can be installed after unset.
|
||||
await table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "id",
|
||||
numBuckets: 8,
|
||||
});
|
||||
});
|
||||
|
||||
it("installs an unsharded spec", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await makeTable(conn);
|
||||
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await table.setLsmWriteSpec({ specType: "unsharded" });
|
||||
await table.unsetLsmWriteSpec();
|
||||
});
|
||||
|
||||
it("installs an identity spec", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await makeTable(conn);
|
||||
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await table.setLsmWriteSpec({ specType: "identity", column: "id" });
|
||||
await table.unsetLsmWriteSpec();
|
||||
});
|
||||
|
||||
it("rejects an invalid spec", async () => {
|
||||
const conn = await connect(tmpDir.name);
|
||||
const table = await makeTable(conn);
|
||||
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
// num_buckets out of range.
|
||||
await expect(
|
||||
table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "id",
|
||||
numBuckets: 0,
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
// Column mismatch.
|
||||
await expect(
|
||||
table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "missing",
|
||||
numBuckets: 4,
|
||||
}),
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -38,5 +38,14 @@ test("filtering examples", async () => {
|
||||
// --8<-- [start:sql_search]
|
||||
await tbl.query().where("id = 10").limit(10).toArray();
|
||||
// --8<-- [end:sql_search]
|
||||
|
||||
// --8<-- [start:orderby_search]
|
||||
await tbl
|
||||
.query()
|
||||
.where("id > 10")
|
||||
.orderBy({ columnName: "id", ascending: false })
|
||||
.limit(5)
|
||||
.toArray();
|
||||
// --8<-- [end:orderby_search]
|
||||
});
|
||||
});
|
||||
|
||||
4810
nodejs/examples/package-lock.json
generated
4810
nodejs/examples/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -11,16 +11,17 @@
|
||||
"test": "node --experimental-vm-modules node_modules/.bin/jest --testEnvironment jest-environment-node-single-context --verbose",
|
||||
"lint": "biome check *.ts && biome format *.ts",
|
||||
"lint-ci": "biome ci .",
|
||||
"lint-fix": "biome check --write *.ts && npm run format",
|
||||
"lint-fix": "biome check --write *.ts && pnpm format",
|
||||
"format": "biome format --write *.ts"
|
||||
},
|
||||
"author": "Lance Devs",
|
||||
"license": "Apache-2.0",
|
||||
"packageManager": "pnpm@11.1.1",
|
||||
"dependencies": {
|
||||
"@huggingface/transformers": "^3.0.2",
|
||||
"@huggingface/transformers": "3.0.2",
|
||||
"@lancedb/lancedb": "file:../dist",
|
||||
"openai": "^4.29.2",
|
||||
"sharp": "^0.33.5"
|
||||
"openai": "4.29.2",
|
||||
"sharp": "0.33.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
|
||||
3466
nodejs/examples/pnpm-lock.yaml
generated
Normal file
3466
nodejs/examples/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
13
nodejs/examples/pnpm-workspace.yaml
Normal file
13
nodejs/examples/pnpm-workspace.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# Block resolution of versions less than 24h old (Shai-Hulud window).
|
||||
# This is the pnpm 11 default but pinned here so it's visible to
|
||||
# reviewers and survives a future pnpm major flipping the default.
|
||||
minimumReleaseAge: 1440
|
||||
|
||||
# Fail install if a transitive dep tries to run an unapproved script.
|
||||
strictDepBuilds: true
|
||||
|
||||
allowBuilds:
|
||||
'@biomejs/biome': true
|
||||
onnxruntime-node: true
|
||||
protobufjs: true
|
||||
sharp: true
|
||||
@@ -1291,6 +1291,18 @@ export async function fromRecordBatchToBuffer(
|
||||
return Buffer.from(await writer.toUint8Array());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a buffer containing a single record batch using the Arrow IPC Stream
|
||||
* serialization. Each call produces a self-contained Stream message (schema +
|
||||
* batch + EOS) suitable for incremental decode by `arrow_ipc::reader::StreamReader`.
|
||||
*/
|
||||
export async function fromRecordBatchToStreamBuffer(
|
||||
batch: RecordBatch,
|
||||
): Promise<Buffer> {
|
||||
const writer = RecordBatchStreamWriter.writeAll([batch]);
|
||||
return Buffer.from(await writer.toUint8Array());
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize an Arrow Table into a buffer using the Arrow IPC Stream serialization
|
||||
*
|
||||
|
||||
@@ -16,6 +16,18 @@ import {
|
||||
} from "./arrow";
|
||||
import { EmbeddingFunctionConfig, getRegistry } from "./embedding/registry";
|
||||
import { Connection as LanceDbConnection } from "./native";
|
||||
import type {
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
} from "./native";
|
||||
export type {
|
||||
CreateNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
ListNamespacesResponse,
|
||||
};
|
||||
import { sanitizeTable } from "./sanitize";
|
||||
import { LocalTable, Table } from "./table";
|
||||
|
||||
@@ -110,6 +122,41 @@ export interface TableNamesOptions {
|
||||
/** An optional limit to the number of results to return. */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface ListNamespacesOptions {
|
||||
/** Token from a previous response for pagination. */
|
||||
pageToken?: string;
|
||||
/** An optional limit to the number of results to return. */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface CreateNamespaceOptions {
|
||||
/** Creation mode. */
|
||||
mode?: "create" | "exist_ok" | "overwrite";
|
||||
/** Properties to set on the new namespace. */
|
||||
properties?: Record<string, string>;
|
||||
}
|
||||
|
||||
export interface DropNamespaceOptions {
|
||||
/** Whether to skip if the namespace doesn't exist, or fail. */
|
||||
mode?: "skip" | "fail";
|
||||
/** Refuse to drop if non-empty (restrict) or drop recursively (cascade). */
|
||||
behavior?: "restrict" | "cascade";
|
||||
}
|
||||
|
||||
export interface RenameTableOptions {
|
||||
/**
|
||||
* The namespace path of the table being renamed. Defaults to the root
|
||||
* namespace (`[]`) when omitted.
|
||||
*/
|
||||
namespacePath?: string[];
|
||||
/**
|
||||
* The namespace path to move the table to as part of the rename. When
|
||||
* omitted the table stays in `namespacePath`.
|
||||
*/
|
||||
newNamespacePath?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* A LanceDB Connection that allows you to open tables and create new ones.
|
||||
*
|
||||
@@ -268,6 +315,69 @@ export abstract class Connection {
|
||||
*/
|
||||
abstract dropAllTables(namespacePath?: string[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Describe a namespace, returning its properties.
|
||||
*
|
||||
* @param {string[]} namespacePath - The namespace path to describe, in
|
||||
* parent → child order, e.g. `["analytics", "sales"]`.
|
||||
* @returns {Promise<DescribeNamespaceResponse>} The namespace's properties
|
||||
* (may be undefined if the namespace has none).
|
||||
*/
|
||||
abstract describeNamespace(
|
||||
namespacePath: string[],
|
||||
): Promise<DescribeNamespaceResponse>;
|
||||
|
||||
/**
|
||||
* List the immediate child namespaces under the given parent.
|
||||
*
|
||||
* Results may be paginated. To retrieve subsequent pages, pass the
|
||||
* `pageToken` returned by a previous call.
|
||||
*
|
||||
* @param {string[]} namespacePath - The parent namespace path. Defaults
|
||||
* to the root namespace if omitted.
|
||||
* @param {Partial<ListNamespacesOptions>} options - Pagination options
|
||||
* (`pageToken`, `limit`).
|
||||
* @returns {Promise<ListNamespacesResponse>} Child namespace names and
|
||||
* an optional token for fetching the next page.
|
||||
*/
|
||||
abstract listNamespaces(
|
||||
namespacePath?: string[],
|
||||
options?: Partial<ListNamespacesOptions>,
|
||||
): Promise<ListNamespacesResponse>;
|
||||
|
||||
/**
|
||||
* Create a new namespace at the given path.
|
||||
*
|
||||
* @param {string[]} namespacePath - The namespace path to create.
|
||||
* @param {Partial<CreateNamespaceOptions>} options - Creation `mode`
|
||||
* ("create" | "exist_ok" | "overwrite") and optional `properties`
|
||||
* to attach to the namespace.
|
||||
* @returns {Promise<CreateNamespaceResponse>} The properties of the
|
||||
* created namespace and an optional transaction id.
|
||||
*/
|
||||
abstract createNamespace(
|
||||
namespacePath: string[],
|
||||
options?: Partial<CreateNamespaceOptions>,
|
||||
): Promise<CreateNamespaceResponse>;
|
||||
|
||||
/**
|
||||
* Drop a namespace.
|
||||
*
|
||||
* Use `behavior: "cascade"` to also drop everything contained in the
|
||||
* namespace (sub-namespaces and tables). The default `"restrict"`
|
||||
* behavior refuses to drop a non-empty namespace.
|
||||
*
|
||||
* @param {string[]} namespacePath - The namespace path to drop.
|
||||
* @param {Partial<DropNamespaceOptions>} options - `mode` ("skip" | "fail"
|
||||
* for missing-namespace handling) and `behavior` ("restrict" | "cascade").
|
||||
* @returns {Promise<DropNamespaceResponse>} Any properties returned by
|
||||
* the server and an optional transaction id.
|
||||
*/
|
||||
abstract dropNamespace(
|
||||
namespacePath: string[],
|
||||
options?: Partial<DropNamespaceOptions>,
|
||||
): Promise<DropNamespaceResponse>;
|
||||
|
||||
/**
|
||||
* Clone a table from a source table.
|
||||
*
|
||||
@@ -294,6 +404,24 @@ export abstract class Connection {
|
||||
isShallow?: boolean;
|
||||
},
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Rename a table.
|
||||
*
|
||||
* Currently only supported by LanceDB Cloud. Local OSS connections and
|
||||
* namespace-backed connections (via {@link connectNamespace}) reject with
|
||||
* a "not supported" error.
|
||||
*
|
||||
* @param {string} currentName - The current name of the table.
|
||||
* @param {string} newName - The new name for the table.
|
||||
* @param {RenameTableOptions} options - Optional namespace paths. When
|
||||
* `newNamespacePath` is omitted the table stays in `namespacePath`.
|
||||
*/
|
||||
abstract renameTable(
|
||||
currentName: string,
|
||||
newName: string,
|
||||
options?: RenameTableOptions,
|
||||
): Promise<void>;
|
||||
}
|
||||
|
||||
/** @hideconstructor */
|
||||
@@ -515,6 +643,58 @@ export class LocalConnection extends Connection {
|
||||
async dropAllTables(namespacePath?: string[]): Promise<void> {
|
||||
return this.inner.dropAllTables(namespacePath ?? []);
|
||||
}
|
||||
|
||||
describeNamespace(
|
||||
namespacePath: string[],
|
||||
): Promise<DescribeNamespaceResponse> {
|
||||
return this.inner.describeNamespace(namespacePath);
|
||||
}
|
||||
|
||||
listNamespaces(
|
||||
namespacePath?: string[],
|
||||
options?: Partial<ListNamespacesOptions>,
|
||||
): Promise<ListNamespacesResponse> {
|
||||
return this.inner.listNamespaces(
|
||||
namespacePath ?? [],
|
||||
options?.pageToken,
|
||||
options?.limit,
|
||||
);
|
||||
}
|
||||
|
||||
createNamespace(
|
||||
namespacePath: string[],
|
||||
options?: Partial<CreateNamespaceOptions>,
|
||||
): Promise<CreateNamespaceResponse> {
|
||||
return this.inner.createNamespace(
|
||||
namespacePath,
|
||||
options?.mode,
|
||||
options?.properties,
|
||||
);
|
||||
}
|
||||
|
||||
dropNamespace(
|
||||
namespacePath: string[],
|
||||
options?: Partial<DropNamespaceOptions>,
|
||||
): Promise<DropNamespaceResponse> {
|
||||
return this.inner.dropNamespace(
|
||||
namespacePath,
|
||||
options?.mode,
|
||||
options?.behavior,
|
||||
);
|
||||
}
|
||||
|
||||
async renameTable(
|
||||
currentName: string,
|
||||
newName: string,
|
||||
options?: RenameTableOptions,
|
||||
): Promise<void> {
|
||||
return this.inner.renameTable(
|
||||
currentName,
|
||||
newName,
|
||||
options?.namespacePath ?? [],
|
||||
options?.newNamespacePath,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
} from "./connection";
|
||||
|
||||
import {
|
||||
ConnectNamespaceOptions,
|
||||
ConnectionOptions,
|
||||
Connection as LanceDbConnection,
|
||||
JsHeaderProvider as NativeJsHeaderProvider,
|
||||
@@ -22,6 +23,7 @@ export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
|
||||
export {
|
||||
AddColumnsSql,
|
||||
ConnectionOptions,
|
||||
ConnectNamespaceOptions,
|
||||
IndexStatistics,
|
||||
IndexConfig,
|
||||
ClientConfig,
|
||||
@@ -62,6 +64,14 @@ export {
|
||||
CreateTableOptions,
|
||||
TableNamesOptions,
|
||||
OpenTableOptions,
|
||||
ListNamespacesOptions,
|
||||
CreateNamespaceOptions,
|
||||
DropNamespaceOptions,
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
DropNamespaceResponse,
|
||||
DescribeNamespaceResponse,
|
||||
RenameTableOptions,
|
||||
} from "./connection";
|
||||
|
||||
export { Session } from "./native.js";
|
||||
@@ -73,6 +83,7 @@ export {
|
||||
VectorQuery,
|
||||
TakeQuery,
|
||||
QueryExecutionOptions,
|
||||
ColumnOrdering,
|
||||
FullTextSearchOptions,
|
||||
RecordBatchIterator,
|
||||
FullTextQuery,
|
||||
@@ -103,6 +114,8 @@ export {
|
||||
UpdateOptions,
|
||||
OptimizeOptions,
|
||||
Version,
|
||||
WriteProgress,
|
||||
LsmWriteSpec,
|
||||
ColumnAlteration,
|
||||
} from "./table";
|
||||
|
||||
@@ -117,6 +130,7 @@ export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
|
||||
|
||||
export * as embedding from "./embedding";
|
||||
export { permutationBuilder, PermutationBuilder } from "./permutation";
|
||||
export { Scannable, ScannableOptions } from "./scannable";
|
||||
export * as rerankers from "./rerankers";
|
||||
export {
|
||||
SchemaLike,
|
||||
@@ -293,3 +307,197 @@ export async function connect(
|
||||
);
|
||||
return new LocalConnection(nativeConn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for the built-in directory namespace (`"dir"`).
|
||||
*
|
||||
* The directory namespace stores tables under a single root path (local
|
||||
* filesystem or object storage URI). See
|
||||
* {@link https://docs.lancedb.com/namespaces} for the documented surface;
|
||||
* less-common knobs live under {@link DirNamespaceConfig.extraProperties}.
|
||||
*/
|
||||
export interface DirNamespaceConfig {
|
||||
/** Root path or URI containing the LanceDB tables. */
|
||||
root: string;
|
||||
/**
|
||||
* Whether to maintain a namespace manifest at the root. Required for
|
||||
* child namespaces. Defaults to true on the impl side.
|
||||
*/
|
||||
manifestEnabled?: boolean;
|
||||
/**
|
||||
* Additional raw properties passed verbatim to the namespace
|
||||
* implementation (e.g. `storage.*`, `credential_vendor.*`). Typed
|
||||
* fields above take precedence on key collision.
|
||||
*/
|
||||
extraProperties?: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configuration for the built-in REST namespace (`"rest"`).
|
||||
*
|
||||
* The REST namespace talks to a remote catalog server over HTTP. See
|
||||
* {@link https://docs.lancedb.com/namespaces} for the documented surface;
|
||||
* less-common knobs (TLS, metrics) live under
|
||||
* {@link RestNamespaceConfig.extraProperties}.
|
||||
*/
|
||||
export interface RestNamespaceConfig {
|
||||
/** Catalog endpoint URL. */
|
||||
uri: string;
|
||||
/**
|
||||
* HTTP headers forwarded with each request. Keys are passed through
|
||||
* as-is (e.g. `"x-api-key"`, `"Authorization"`).
|
||||
*/
|
||||
headers?: Record<string, string>;
|
||||
/**
|
||||
* Additional raw properties passed verbatim to the namespace
|
||||
* implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`).
|
||||
* Typed fields above take precedence on key collision.
|
||||
*/
|
||||
extraProperties?: Record<string, string>;
|
||||
}
|
||||
|
||||
function dirConfigToProperties(
|
||||
config: DirNamespaceConfig,
|
||||
): Record<string, string> {
|
||||
// Spread the whole input so that unknown keys (e.g. a raw `manifest_enabled`
|
||||
// passed via the dynamic-impl path) flow through instead of being dropped.
|
||||
// Typed transformations layer on top.
|
||||
const { manifestEnabled, extraProperties, ...rest } = config;
|
||||
const properties: Record<string, string> = {
|
||||
...(extraProperties ?? {}),
|
||||
...(rest as Record<string, string>),
|
||||
};
|
||||
if (manifestEnabled !== undefined) {
|
||||
properties.manifest_enabled = String(manifestEnabled);
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
function restConfigToProperties(
|
||||
config: RestNamespaceConfig,
|
||||
): Record<string, string> {
|
||||
const { headers, extraProperties, ...rest } = config;
|
||||
const properties: Record<string, string> = {
|
||||
...(extraProperties ?? {}),
|
||||
...(rest as Record<string, string>),
|
||||
};
|
||||
if (headers) {
|
||||
for (const [name, value] of Object.entries(headers)) {
|
||||
properties[`headers.${name}`] = value;
|
||||
}
|
||||
}
|
||||
return properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB database through a namespace.
|
||||
*
|
||||
* Unlike {@link connect}, which routes by URI scheme (local path vs.
|
||||
* `db://` cloud), `connectNamespace` always returns a namespace-backed
|
||||
* connection. The `implName` selects the namespace implementation:
|
||||
*
|
||||
* - `"dir"` — directory namespace, configured with {@link DirNamespaceConfig}.
|
||||
* - `"rest"` — remote REST catalog, configured with {@link RestNamespaceConfig}.
|
||||
* - Any other string — full module path for a custom implementation,
|
||||
* configured with a free-form string-keyed `properties` map.
|
||||
*
|
||||
* @example Typed dir namespace
|
||||
* ```ts
|
||||
* const db = await connectNamespace("dir", { root: "/path/to/db" });
|
||||
* await db.createTable("users", [{ id: 1 }]);
|
||||
* ```
|
||||
*
|
||||
* @example Typed REST namespace with auth headers
|
||||
* ```ts
|
||||
* const db = await connectNamespace("rest", {
|
||||
* uri: "https://catalog.example.com",
|
||||
* headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
* });
|
||||
* ```
|
||||
*
|
||||
* @example Custom implementation with raw properties
|
||||
* ```ts
|
||||
* const db = await connectNamespace("my.custom.Namespace", {
|
||||
* endpoint: "...",
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export function connectNamespace(
|
||||
implName: "dir",
|
||||
config: DirNamespaceConfig,
|
||||
options?: Partial<ConnectNamespaceOptions>,
|
||||
): Promise<Connection>;
|
||||
/**
|
||||
* Connect through the built-in REST namespace.
|
||||
*
|
||||
* Configured with {@link RestNamespaceConfig}. See the function-level
|
||||
* documentation above for the full surface, examples, and how this
|
||||
* relates to {@link connect}.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const db = await connectNamespace("rest", {
|
||||
* uri: "https://catalog.example.com",
|
||||
* headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export function connectNamespace(
|
||||
implName: "rest",
|
||||
config: RestNamespaceConfig,
|
||||
options?: Partial<ConnectNamespaceOptions>,
|
||||
): Promise<Connection>;
|
||||
/**
|
||||
* Connect through a custom namespace implementation by full module path,
|
||||
* configured with a free-form string-keyed `properties` map. Use the
|
||||
* typed overloads above for the built-in `"dir"` and `"rest"` impls.
|
||||
*
|
||||
* See the function-level documentation above for examples and how this
|
||||
* relates to {@link connect}.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const db = await connectNamespace("my.custom.Namespace", {
|
||||
* endpoint: "...",
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export function connectNamespace(
|
||||
implName: string,
|
||||
properties: Record<string, string>,
|
||||
options?: Partial<ConnectNamespaceOptions>,
|
||||
): Promise<Connection>;
|
||||
export async function connectNamespace(
|
||||
implName: string,
|
||||
configOrProperties:
|
||||
| DirNamespaceConfig
|
||||
| RestNamespaceConfig
|
||||
| Record<string, string>,
|
||||
options?: Partial<ConnectNamespaceOptions>,
|
||||
): Promise<Connection> {
|
||||
let properties: Record<string, string>;
|
||||
if (implName === "dir") {
|
||||
properties = dirConfigToProperties(
|
||||
configOrProperties as DirNamespaceConfig,
|
||||
);
|
||||
} else if (implName === "rest") {
|
||||
properties = restConfigToProperties(
|
||||
configOrProperties as RestNamespaceConfig,
|
||||
);
|
||||
} else {
|
||||
properties = configOrProperties as Record<string, string>;
|
||||
}
|
||||
|
||||
const finalOptions: ConnectNamespaceOptions = (options ??
|
||||
{}) as ConnectNamespaceOptions;
|
||||
finalOptions.storageOptions = cleanseStorageOptions(
|
||||
finalOptions.storageOptions,
|
||||
);
|
||||
|
||||
const nativeConn = await LanceDbConnection.newWithNamespace(
|
||||
implName,
|
||||
properties,
|
||||
finalOptions,
|
||||
);
|
||||
return new LocalConnection(nativeConn);
|
||||
}
|
||||
|
||||
@@ -79,6 +79,12 @@ export interface QueryExecutionOptions {
|
||||
timeoutMs?: number;
|
||||
}
|
||||
|
||||
export interface ColumnOrdering {
|
||||
columnName: string;
|
||||
ascending?: boolean;
|
||||
nullsFirst?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options that control the behavior of a full text search
|
||||
*/
|
||||
@@ -417,6 +423,21 @@ export class StandardQueryBase<
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sort the results by the specified column(s).
|
||||
* @returns This query builder.
|
||||
*/
|
||||
orderBy(ordering: ColumnOrdering | ColumnOrdering[]): this {
|
||||
const orderings = Array.isArray(ordering) ? ordering : [ordering];
|
||||
const normalized = orderings.map((o) => ({
|
||||
columnName: o.columnName,
|
||||
ascending: o.ascending ?? true,
|
||||
nullsFirst: o.nullsFirst ?? false,
|
||||
}));
|
||||
this.doCall((inner) => inner.orderBy(normalized));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip searching un-indexed data. This can make search faster, but will miss
|
||||
* any data that is not yet indexed.
|
||||
|
||||
274
nodejs/lancedb/scannable.ts
Normal file
274
nodejs/lancedb/scannable.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import {
|
||||
Table as ArrowTable,
|
||||
RecordBatch,
|
||||
RecordBatchReader,
|
||||
Schema,
|
||||
} from "apache-arrow";
|
||||
import {
|
||||
fromRecordBatchToStreamBuffer,
|
||||
fromTableToBuffer,
|
||||
makeEmptyTable,
|
||||
} from "./arrow";
|
||||
import { NapiScannable } from "./native.js";
|
||||
|
||||
export interface ScannableOptions {
|
||||
/** Hint about the number of rows. Not validated against the stream. */
|
||||
numRows?: number;
|
||||
/**
|
||||
* Whether the source can be scanned more than once. Defaults to `true` for
|
||||
* `fromTable` / `fromFactory` and `false` for `fromIterable` /
|
||||
* `fromRecordBatchReader`.
|
||||
*/
|
||||
rescannable?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* A data source that can be scanned as a stream of Arrow `RecordBatch`es.
|
||||
*
|
||||
* `Scannable` wraps the schema + optional row count + rescannable flag and
|
||||
* a callback that yields batches one at a time. It is passed to consumers
|
||||
* (e.g. `Table.add`, `createTable`, `mergeInsert` — follow-up work) that
|
||||
* need to pull data without materializing the full dataset in JS memory.
|
||||
*
|
||||
* Batches cross the JS↔Rust boundary as Arrow IPC Stream messages; a fresh
|
||||
* writer serializes each batch, and the Rust side decodes it with
|
||||
* `arrow_ipc::reader::StreamReader`. One batch is in flight at a time.
|
||||
*/
|
||||
export class Scannable {
|
||||
readonly schema: Schema;
|
||||
readonly numRows: number | null;
|
||||
readonly rescannable: boolean;
|
||||
|
||||
/** @hidden */
|
||||
private readonly native: NapiScannable;
|
||||
|
||||
private constructor(
|
||||
native: NapiScannable,
|
||||
schema: Schema,
|
||||
numRows: number | null,
|
||||
rescannable: boolean,
|
||||
) {
|
||||
this.native = native;
|
||||
this.schema = schema;
|
||||
this.numRows = numRows;
|
||||
this.rescannable = rescannable;
|
||||
}
|
||||
|
||||
/** @hidden Access the native handle for passing through to Rust consumers. */
|
||||
get inner(): NapiScannable {
|
||||
return this.native;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Scannable from an explicit schema and a factory that returns a
|
||||
* fresh batch iterator on each call.
|
||||
*
|
||||
* The factory is invoked once per scan. Each iterator yields
|
||||
* `RecordBatch`es matching the declared schema. Use this when you need
|
||||
* direct control over the pull loop — for example, to wrap a streaming
|
||||
* source whose batches are produced lazily.
|
||||
*
|
||||
* @param schema - The Arrow schema of the produced batches.
|
||||
* @param factory - Called at the start of each scan to produce a batch
|
||||
* iterator. Must be idempotent when `rescannable` is true.
|
||||
* @param opts - Optional hints. `rescannable` defaults to `true`; set to
|
||||
* `false` if calling `factory()` twice would not reproduce the same data.
|
||||
*/
|
||||
static async fromFactory(
|
||||
schema: Schema,
|
||||
factory: () =>
|
||||
| AsyncIterable<RecordBatch>
|
||||
| Iterable<RecordBatch>
|
||||
| AsyncIterator<RecordBatch>
|
||||
| Iterator<RecordBatch>,
|
||||
opts: ScannableOptions = {},
|
||||
): Promise<Scannable> {
|
||||
const numRows = opts.numRows ?? null;
|
||||
if (numRows != null && !Number.isInteger(numRows)) {
|
||||
throw new TypeError("numRows must be an integer");
|
||||
}
|
||||
const rescannable = opts.rescannable ?? true;
|
||||
|
||||
let iter: AsyncIterator<RecordBatch> | Iterator<RecordBatch> | null = null;
|
||||
const getNextBatch = async (isStart: boolean): Promise<Buffer | null> => {
|
||||
// `isStart` is true on the first pull of every new scan_as_stream.
|
||||
// Drop any cached iterator so factory() is re-invoked for the next scan
|
||||
if (isStart) {
|
||||
iter = null;
|
||||
}
|
||||
if (iter === null) {
|
||||
iter = normalizeIterator(factory());
|
||||
}
|
||||
const result = await iter.next();
|
||||
if (result.done) {
|
||||
iter = null;
|
||||
return null;
|
||||
}
|
||||
return fromRecordBatchToStreamBuffer(result.value);
|
||||
};
|
||||
|
||||
const schemaBuf = await fromTableToBuffer(makeEmptyTable(schema));
|
||||
const native = new NapiScannable(
|
||||
schemaBuf,
|
||||
numRows,
|
||||
rescannable,
|
||||
getNextBatch,
|
||||
);
|
||||
return new Scannable(native, schema, numRows, rescannable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Scannable from an in-memory Arrow `Table`. Always rescannable;
|
||||
* the table's batches are replayed on each scan.
|
||||
*
|
||||
* The table's row count is authoritative: `opts.numRows` must either be
|
||||
* omitted or equal to `table.numRows`. `opts.rescannable` of `false` is
|
||||
* rejected because in-memory Tables are always rescannable.
|
||||
*/
|
||||
static async fromTable(
|
||||
table: ArrowTable,
|
||||
opts: ScannableOptions = {},
|
||||
): Promise<Scannable> {
|
||||
if (opts.numRows != null && opts.numRows !== table.numRows) {
|
||||
throw new TypeError(
|
||||
`opts.numRows (${opts.numRows}) does not match table.numRows (${table.numRows}). ` +
|
||||
`The table's row count is authoritative; omit numRows or pass the matching value.`,
|
||||
);
|
||||
}
|
||||
if (opts.rescannable === false) {
|
||||
throw new TypeError(
|
||||
`fromTable does not accept rescannable: false. ` +
|
||||
`In-memory Arrow Tables are always rescannable; omit the option or pass true.`,
|
||||
);
|
||||
}
|
||||
return Scannable.fromFactory(table.schema, () => table.batches, {
|
||||
numRows: table.numRows,
|
||||
rescannable: true,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Scannable from an iterable of `RecordBatch`es. `rescannable`
|
||||
* defaults to `false`. Pass an explicit schema so the consumer can
|
||||
* validate before any batch is pulled.
|
||||
*
|
||||
* `opts.rescannable: true` is honest for replayable iterables (Arrays,
|
||||
* Sets, or custom iterables whose `[Symbol.iterator]()` returns a fresh
|
||||
* iterator each call). It is rejected for one-shot iterables (generators,
|
||||
* async generators, or already-an-iterator inputs) because their
|
||||
* `[Symbol.iterator]()` returns the same exhausted object on the second
|
||||
* scan. For replayable sources outside this shape, use
|
||||
* `fromFactory(schema, () => createIter(), { rescannable: true })`.
|
||||
*
|
||||
* Note: when `opts.rescannable` is `true`, the constructor calls
|
||||
* `[Symbol.iterator]()` once on the input to perform the structural check.
|
||||
*/
|
||||
static async fromIterable(
|
||||
schema: Schema,
|
||||
iter: AsyncIterable<RecordBatch> | Iterable<RecordBatch>,
|
||||
opts: ScannableOptions = {},
|
||||
): Promise<Scannable> {
|
||||
if (opts.rescannable === true && isOneShotIterable(iter)) {
|
||||
throw new TypeError(
|
||||
`fromIterable: rescannable: true is not honest for one-shot iterables ` +
|
||||
`(generators, async generators, or iterators where [Symbol.iterator]() ` +
|
||||
`returns the same object). The source would be exhausted after the first scan. ` +
|
||||
`Use fromFactory(schema, () => createIter(), { rescannable: true }) for sources ` +
|
||||
`where each call mints a fresh iterator.`,
|
||||
);
|
||||
}
|
||||
return Scannable.fromFactory(schema, () => iter, {
|
||||
numRows: opts.numRows,
|
||||
rescannable: opts.rescannable ?? false,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Scannable from an Arrow `RecordBatchReader`. A reader can only
|
||||
* be consumed once; `rescannable` defaults to `false`.
|
||||
*
|
||||
* The reader must already be opened (via `.open()`) so its `.schema` is
|
||||
* populated. `RecordBatchReader.from(...)` returns an unopened reader.
|
||||
*
|
||||
* `opts.rescannable: true` is rejected because `RecordBatchReader` is a
|
||||
* self-iterator (its `[Symbol.iterator]()` returns itself), and this
|
||||
* constructor does not call `reader.reset()` between scans, so a second
|
||||
* scan would always see an exhausted reader. For genuinely replayable
|
||||
* sources, use
|
||||
* `fromFactory(schema, () => openReader(), { rescannable: true })`,
|
||||
* which mints a fresh reader on each scan.
|
||||
*/
|
||||
static async fromRecordBatchReader(
|
||||
reader: RecordBatchReader,
|
||||
opts: ScannableOptions = {},
|
||||
): Promise<Scannable> {
|
||||
if (opts.rescannable === true) {
|
||||
throw new TypeError(
|
||||
`fromRecordBatchReader does not accept rescannable: true. ` +
|
||||
`RecordBatchReader is a self-iterator (its [Symbol.iterator]() ` +
|
||||
`returns itself) and would be exhausted after the first scan. ` +
|
||||
`Use fromFactory(schema, () => openReader(), { rescannable: true }) ` +
|
||||
`for sources where each call mints a fresh reader.`,
|
||||
);
|
||||
}
|
||||
return Scannable.fromFactory(reader.schema, () => reader, {
|
||||
numRows: opts.numRows,
|
||||
rescannable: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeIterator<T>(
|
||||
source: AsyncIterable<T> | Iterable<T> | AsyncIterator<T> | Iterator<T>,
|
||||
): AsyncIterator<T> | Iterator<T> {
|
||||
if (source == null) {
|
||||
throw new TypeError("Scannable factory returned null/undefined");
|
||||
}
|
||||
if (
|
||||
typeof (source as AsyncIterable<T>)[Symbol.asyncIterator] === "function"
|
||||
) {
|
||||
return (source as AsyncIterable<T>)[Symbol.asyncIterator]();
|
||||
}
|
||||
if (typeof (source as Iterable<T>)[Symbol.iterator] === "function") {
|
||||
return (source as Iterable<T>)[Symbol.iterator]();
|
||||
}
|
||||
// Already an iterator (has `.next`).
|
||||
if (typeof (source as Iterator<T>).next === "function") {
|
||||
return source as Iterator<T>;
|
||||
}
|
||||
throw new TypeError("Scannable factory returned a non-iterable value");
|
||||
}
|
||||
|
||||
// A "self-iterator" returns the same object from `[Symbol.iterator]()` /
|
||||
// `[Symbol.asyncIterator]()`. Generators behave this way, so they exhaust
|
||||
// after one pass. Replayable iterables (Array, Set, custom) return a fresh
|
||||
// iterator each call. Detection mirrors `normalizeIterator`'s ordering so
|
||||
// classification matches scan-time behavior.
|
||||
function isOneShotIterable(
|
||||
source: AsyncIterable<unknown> | Iterable<unknown>,
|
||||
): boolean {
|
||||
// null/undefined are not one-shot in any meaningful sense; let
|
||||
// `normalizeIterator` raise the actual error at scan time.
|
||||
if (source == null) return false;
|
||||
const ref = source as unknown;
|
||||
if (
|
||||
typeof (source as AsyncIterable<unknown>)[Symbol.asyncIterator] ===
|
||||
"function"
|
||||
) {
|
||||
const it = (source as AsyncIterable<unknown>)[
|
||||
Symbol.asyncIterator
|
||||
]() as unknown;
|
||||
return it === ref;
|
||||
}
|
||||
if (typeof (source as Iterable<unknown>)[Symbol.iterator] === "function") {
|
||||
const it = (source as Iterable<unknown>)[Symbol.iterator]() as unknown;
|
||||
return it === ref;
|
||||
}
|
||||
// Already-an-iterator (has `.next` but no `Symbol.iterator`) is by
|
||||
// definition one-shot.
|
||||
if (typeof (source as { next?: unknown }).next === "function") return true;
|
||||
return false;
|
||||
}
|
||||
@@ -46,6 +46,33 @@ import { sanitizeType } from "./sanitize";
|
||||
import { IntoSql, toSQL } from "./util";
|
||||
export { IndexConfig } from "./native";
|
||||
|
||||
/**
|
||||
* Progress snapshot for a write operation, delivered to the `progress`
|
||||
* callback passed to {@link Table.add}.
|
||||
*/
|
||||
export interface WriteProgress {
|
||||
/** Number of rows written so far. */
|
||||
outputRows: number;
|
||||
/** Number of bytes written so far. */
|
||||
outputBytes: number;
|
||||
/**
|
||||
* Total rows expected, when the input source reports it.
|
||||
*
|
||||
* Always set on the final callback (the one with `done: true`), falling
|
||||
* back to the actual number of rows written when the source could not
|
||||
* report a row count up front.
|
||||
*/
|
||||
totalRows?: number;
|
||||
/** Wall-clock seconds since the write started. */
|
||||
elapsedSeconds: number;
|
||||
/** Number of parallel write tasks currently in flight. */
|
||||
activeTasks: number;
|
||||
/** Total number of parallel write tasks (the write parallelism). */
|
||||
totalTasks: number;
|
||||
/** `true` for the final callback; `false` otherwise. */
|
||||
done: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options for adding data to a table.
|
||||
*/
|
||||
@@ -56,6 +83,28 @@ export interface AddDataOptions {
|
||||
* If "overwrite" then the new data will replace the existing data in the table.
|
||||
*/
|
||||
mode: "append" | "overwrite";
|
||||
|
||||
/**
|
||||
* Optional callback invoked periodically with write progress.
|
||||
*
|
||||
* The callback is fired once per batch written and once more with
|
||||
* `done: true` when the write completes. Calls are dispatched
|
||||
* asynchronously to the JS event loop and never block the write — a slow
|
||||
* callback will queue events rather than back-pressure the writer.
|
||||
*
|
||||
* Errors thrown from the callback are logged with `console.warn` and
|
||||
* swallowed — they do not abort the write.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* await table.add(data, {
|
||||
* progress: (p) => {
|
||||
* console.log(`${p.outputRows}/${p.totalRows ?? "?"} rows`);
|
||||
* },
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
progress: (progress: WriteProgress) => void;
|
||||
}
|
||||
|
||||
export interface UpdateOptions {
|
||||
@@ -106,6 +155,27 @@ export interface Version {
|
||||
metadata: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specification selecting Lance's MemWAL LSM-style write path for
|
||||
* `mergeInsert`.
|
||||
*
|
||||
* `specType` is `"bucket"`, `"identity"`, or `"unsharded"`. For `"bucket"`,
|
||||
* `column` and `numBuckets` are required; for `"identity"`, `column` is
|
||||
* required.
|
||||
*/
|
||||
export interface LsmWriteSpec {
|
||||
/** One of `"bucket"`, `"identity"`, or `"unsharded"`. */
|
||||
specType: "bucket" | "identity" | "unsharded";
|
||||
/** Bucket and identity variants: the sharding column. */
|
||||
column?: string;
|
||||
/** Bucket variant: the number of buckets, in `[1, 1024]`. */
|
||||
numBuckets?: number;
|
||||
/** Names of indexes the MemWAL should keep up to date during writes. */
|
||||
maintainedIndexes?: string[];
|
||||
/** Default `ShardWriter` configuration recorded in the MemWAL index. */
|
||||
writerConfigDefaults?: Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* A Table is a collection of Records in a LanceDB Database.
|
||||
*
|
||||
@@ -449,6 +519,54 @@ export abstract class Table {
|
||||
* containing the new version number of the table after dropping the columns.
|
||||
*/
|
||||
abstract dropColumns(columnNames: string[]): Promise<DropColumnsResult>;
|
||||
/**
|
||||
* Set the unenforced primary key for this table to a single column.
|
||||
*
|
||||
* "Unenforced" means LanceDB does not check uniqueness on writes; the
|
||||
* column is recorded in the schema as the primary key for use by features
|
||||
* such as `merge_insert`. Only single-column primary keys are supported,
|
||||
* and the key cannot be changed once set.
|
||||
* @param {string | string[]} columns The primary key column. A one-element
|
||||
* array is also accepted; passing more than one column is rejected.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
abstract setUnenforcedPrimaryKey(columns: string | string[]): Promise<void>;
|
||||
/**
|
||||
* Install an {@link LsmWriteSpec} on this table, selecting Lance's MemWAL
|
||||
* LSM-style write path for future `mergeInsert` calls.
|
||||
*
|
||||
* `LsmWriteSpec` chooses one of three sharding strategies via `specType`:
|
||||
*
|
||||
* - `"bucket"` — hash-bucket writes by the single-column unenforced primary
|
||||
* key (`column` and `numBuckets` required).
|
||||
* - `"identity"` — shard by the raw value of a scalar `column`.
|
||||
* - `"unsharded"` — route every write to a single shard.
|
||||
*
|
||||
* All variants require the table to have an unenforced primary key
|
||||
* ({@link Table#setUnenforcedPrimaryKey}); bucket sharding additionally
|
||||
* requires it to be the single column being bucketed.
|
||||
* @param {LsmWriteSpec} spec The sharding spec to install.
|
||||
* @returns {Promise<void>}
|
||||
* @example
|
||||
* ```ts
|
||||
* await table.setUnenforcedPrimaryKey("id");
|
||||
* await table.setLsmWriteSpec({
|
||||
* specType: "bucket",
|
||||
* column: "id",
|
||||
* numBuckets: 16,
|
||||
* maintainedIndexes: ["id_idx"],
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
abstract setLsmWriteSpec(spec: LsmWriteSpec): Promise<void>;
|
||||
/**
|
||||
* Remove the {@link LsmWriteSpec} from this table, reverting to the standard
|
||||
* `mergeInsert` write path.
|
||||
*
|
||||
* Errors if no spec is currently set.
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
abstract unsetLsmWriteSpec(): Promise<void>;
|
||||
/** Retrieve the version of the table */
|
||||
|
||||
abstract version(): Promise<number>;
|
||||
@@ -636,7 +754,20 @@ export class LocalTable extends Table {
|
||||
const schema = await this.schema();
|
||||
|
||||
const buffer = await fromDataToBuffer(data, undefined, schema);
|
||||
return await this.inner.add(buffer, mode);
|
||||
// Wrap the user callback so a thrown error doesn't surface as an
|
||||
// unhandled exception (the callback fires from a napi threadsafe
|
||||
// function — exceptions there crash the process).
|
||||
const userProgress = options?.progress;
|
||||
const progress = userProgress
|
||||
? (p: WriteProgress) => {
|
||||
try {
|
||||
userProgress(p);
|
||||
} catch (e) {
|
||||
console.warn("Table.add progress callback threw:", e);
|
||||
}
|
||||
}
|
||||
: undefined;
|
||||
return await this.inner.add(buffer, mode, progress);
|
||||
}
|
||||
|
||||
async update(
|
||||
@@ -897,6 +1028,19 @@ export class LocalTable extends Table {
|
||||
return await this.inner.dropColumns(columnNames);
|
||||
}
|
||||
|
||||
async setUnenforcedPrimaryKey(columns: string | string[]): Promise<void> {
|
||||
const cols = typeof columns === "string" ? [columns] : columns;
|
||||
return await this.inner.setUnenforcedPrimaryKey(cols);
|
||||
}
|
||||
|
||||
async setLsmWriteSpec(spec: LsmWriteSpec): Promise<void> {
|
||||
return await this.inner.setLsmWriteSpec(spec);
|
||||
}
|
||||
|
||||
async unsetLsmWriteSpec(): Promise<void> {
|
||||
return await this.inner.unsetLsmWriteSpec();
|
||||
}
|
||||
|
||||
async version(): Promise<number> {
|
||||
return await this.inner.version();
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
4719
nodejs/package-lock.json
generated
4719
nodejs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.28.0-beta.11",
|
||||
"version": "0.29.1-beta.0",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@@ -38,15 +38,15 @@
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-dynamodb": "^3.33.0",
|
||||
"@aws-sdk/client-kms": "^3.33.0",
|
||||
"@aws-sdk/client-s3": "^3.33.0",
|
||||
"@aws-sdk/client-dynamodb": "3.1003.0",
|
||||
"@aws-sdk/client-kms": "3.1003.0",
|
||||
"@aws-sdk/client-s3": "3.1003.0",
|
||||
"@biomejs/biome": "^1.7.3",
|
||||
"@jest/globals": "^29.7.0",
|
||||
"@napi-rs/cli": "^3.5.1",
|
||||
"@napi-rs/cli": "3.5.1",
|
||||
"@types/axios": "^0.14.0",
|
||||
"@types/jest": "^29.1.2",
|
||||
"@types/node": "^22.7.4",
|
||||
"@types/node": "22.7.4",
|
||||
"@types/tmp": "^0.2.6",
|
||||
"apache-arrow-15": "npm:apache-arrow@15.0.0",
|
||||
"apache-arrow-16": "npm:apache-arrow@16.0.0",
|
||||
@@ -57,9 +57,9 @@
|
||||
"shx": "^0.3.4",
|
||||
"tmp": "^0.2.3",
|
||||
"ts-jest": "^29.1.2",
|
||||
"typedoc": "^0.26.4",
|
||||
"typedoc-plugin-markdown": "^4.2.1",
|
||||
"typescript": "^5.5.4",
|
||||
"typedoc": "0.26.4",
|
||||
"typedoc-plugin-markdown": "4.2.1",
|
||||
"typescript": "5.5.4",
|
||||
"typescript-eslint": "^7.1.0"
|
||||
},
|
||||
"ava": {
|
||||
@@ -68,15 +68,16 @@
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"packageManager": "pnpm@11.1.1",
|
||||
"cpu": ["x64", "arm64"],
|
||||
"os": ["darwin", "linux", "win32"],
|
||||
"scripts": {
|
||||
"artifacts": "napi artifacts",
|
||||
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
|
||||
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
|
||||
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/ && node -e \"require('fs').writeFileSync('dist/package.json', JSON.stringify({name:'@lancedb/lancedb',type:'commonjs'}))\"",
|
||||
"build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
|
||||
"build": "npm run build:debug && npm run tsc",
|
||||
"build-release": "npm run build:release && npm run tsc",
|
||||
"build": "pnpm build:debug && pnpm tsc",
|
||||
"build-release": "pnpm build:release && pnpm tsc",
|
||||
"tsc": "tsc -b",
|
||||
"posttsc": "shx cp lancedb/native.d.ts dist/native.d.ts",
|
||||
"lint-ci": "biome ci .",
|
||||
@@ -86,7 +87,7 @@
|
||||
"lint-fix": "biome check --write . && biome format --write .",
|
||||
"prepublishOnly": "napi prepublish -t npm",
|
||||
"test": "jest --verbose",
|
||||
"integration": "S3_TEST=1 npm run test",
|
||||
"integration": "S3_TEST=1 pnpm test",
|
||||
"universal": "napi universalize",
|
||||
"version": "napi version"
|
||||
},
|
||||
@@ -94,8 +95,8 @@
|
||||
"reflect-metadata": "^0.2.2"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@huggingface/transformers": "^3.0.2",
|
||||
"openai": "^4.29.2"
|
||||
"@huggingface/transformers": "3.0.2",
|
||||
"openai": "4.29.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"apache-arrow": ">=15.0.0 <=18.1.0"
|
||||
|
||||
7317
nodejs/pnpm-lock.yaml
generated
Normal file
7317
nodejs/pnpm-lock.yaml
generated
Normal file
File diff suppressed because it is too large
Load Diff
18
nodejs/pnpm-workspace.yaml
Normal file
18
nodejs/pnpm-workspace.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
# Flat node_modules layout. The @napi-rs/cli build step fails to locate
|
||||
# the cdylib artifact under pnpm's isolated layout; the hoisted linker
|
||||
# mirrors npm's structure and unblocks the native build.
|
||||
nodeLinker: hoisted
|
||||
|
||||
# Block resolution of versions less than 24h old (Shai-Hulud window).
|
||||
# This is the pnpm 11 default but pinned here so it's visible to
|
||||
# reviewers and survives a future pnpm major flipping the default.
|
||||
minimumReleaseAge: 1440
|
||||
|
||||
# Fail install if a transitive dep tries to run an unapproved script.
|
||||
strictDepBuilds: true
|
||||
|
||||
allowBuilds:
|
||||
'@biomejs/biome': true
|
||||
onnxruntime-node: true
|
||||
protobufjs: true
|
||||
sharp: true
|
||||
@@ -8,12 +8,16 @@ use lancedb::database::{CreateTableMode, Database};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::*;
|
||||
|
||||
use crate::ConnectNamespaceOptions;
|
||||
use crate::ConnectionOptions;
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::header::JsHeaderProvider;
|
||||
use crate::table::Table;
|
||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection, connect_namespace};
|
||||
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, DescribeNamespaceRequest, DropNamespaceRequest, ListNamespacesRequest,
|
||||
};
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
|
||||
#[napi]
|
||||
@@ -21,6 +25,29 @@ pub struct Connection {
|
||||
inner: Option<LanceDBConnection>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DescribeNamespaceResponse {
|
||||
pub properties: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct ListNamespacesResponse {
|
||||
pub namespaces: Vec<String>,
|
||||
pub page_token: Option<String>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct CreateNamespaceResponse {
|
||||
pub properties: Option<HashMap<String, String>>,
|
||||
pub transaction_id: Option<String>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
pub struct DropNamespaceResponse {
|
||||
pub properties: Option<HashMap<String, String>>,
|
||||
pub transaction_id: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl Connection {
|
||||
pub(crate) fn inner_new(inner: LanceDBConnection) -> Self {
|
||||
Self { inner: Some(inner) }
|
||||
@@ -106,6 +133,39 @@ impl Connection {
|
||||
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
||||
}
|
||||
|
||||
/// Create a new Connection instance backed by a namespace implementation.
|
||||
#[napi(factory)]
|
||||
pub async fn new_with_namespace(
|
||||
impl_name: String,
|
||||
properties: HashMap<String, String>,
|
||||
options: ConnectNamespaceOptions,
|
||||
) -> napi::Result<Self> {
|
||||
if impl_name.is_empty() {
|
||||
return Err(napi::Error::from_reason(
|
||||
"implName must be a non-empty string",
|
||||
));
|
||||
}
|
||||
|
||||
let mut builder = connect_namespace(&impl_name, properties);
|
||||
if let Some(interval) = options.read_consistency_interval {
|
||||
builder =
|
||||
builder.read_consistency_interval(std::time::Duration::from_secs_f64(interval));
|
||||
}
|
||||
if let Some(storage_options) = options.storage_options {
|
||||
for (key, value) in storage_options {
|
||||
builder = builder.storage_option(key, value);
|
||||
}
|
||||
}
|
||||
if let Some(namespace_client_properties) = options.namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
if let Some(session) = options.session {
|
||||
builder = builder.session(session.inner.clone());
|
||||
}
|
||||
|
||||
Ok(Self::inner_new(builder.execute().await.default_error()?))
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn display(&self) -> napi::Result<String> {
|
||||
Ok(self.get_inner()?.to_string())
|
||||
@@ -273,4 +333,149 @@ impl Connection {
|
||||
let ns = namespace_path.unwrap_or_default();
|
||||
self.get_inner()?.drop_all_tables(&ns).await.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
/// Describe a namespace and return its properties.
|
||||
pub async fn describe_namespace(
|
||||
&self,
|
||||
namespace_path: Vec<String>,
|
||||
) -> napi::Result<DescribeNamespaceResponse> {
|
||||
let req = DescribeNamespaceRequest {
|
||||
id: Some(namespace_path),
|
||||
..Default::default()
|
||||
};
|
||||
let resp = self
|
||||
.get_inner()?
|
||||
.describe_namespace(req)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(DescribeNamespaceResponse {
|
||||
properties: resp.properties,
|
||||
})
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
/// List child namespaces under the given namespace path
|
||||
pub async fn list_namespaces(
|
||||
&self,
|
||||
namespace_path: Option<Vec<String>>,
|
||||
page_token: Option<String>,
|
||||
limit: Option<u32>,
|
||||
) -> napi::Result<ListNamespacesResponse> {
|
||||
let req = ListNamespacesRequest {
|
||||
id: namespace_path,
|
||||
page_token,
|
||||
limit: limit.map(|l| l as i32),
|
||||
..Default::default()
|
||||
};
|
||||
let resp = self
|
||||
.get_inner()?
|
||||
.list_namespaces(req)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(ListNamespacesResponse {
|
||||
namespaces: resp.namespaces,
|
||||
page_token: resp.page_token,
|
||||
})
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
/// Create a new namespace with optional properties.
|
||||
pub async fn create_namespace(
|
||||
&self,
|
||||
namespace_path: Vec<String>,
|
||||
mode: Option<String>,
|
||||
properties: Option<HashMap<String, String>>,
|
||||
) -> napi::Result<CreateNamespaceResponse> {
|
||||
let mode_str = mode
|
||||
.map(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Ok("Create".to_string()),
|
||||
"exist_ok" => Ok("ExistOk".to_string()),
|
||||
"overwrite" => Ok("Overwrite".to_string()),
|
||||
_ => Err(napi::Error::from_reason(format!(
|
||||
"Invalid mode '{}': expected one of 'create', 'exist_ok', 'overwrite'",
|
||||
m
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let req = CreateNamespaceRequest {
|
||||
id: Some(namespace_path),
|
||||
mode: mode_str,
|
||||
properties,
|
||||
..Default::default()
|
||||
};
|
||||
let resp = self
|
||||
.get_inner()?
|
||||
.create_namespace(req)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(CreateNamespaceResponse {
|
||||
properties: resp.properties,
|
||||
transaction_id: resp.transaction_id,
|
||||
})
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
/// Drop a namespace.
|
||||
pub async fn drop_namespace(
|
||||
&self,
|
||||
namespace_path: Vec<String>,
|
||||
mode: Option<String>,
|
||||
behavior: Option<String>,
|
||||
) -> napi::Result<DropNamespaceResponse> {
|
||||
let mode_str = mode
|
||||
.map(|m| match m.to_lowercase().as_str() {
|
||||
"skip" => Ok("Skip".to_string()),
|
||||
"fail" => Ok("Fail".to_string()),
|
||||
_ => Err(napi::Error::from_reason(format!(
|
||||
"Invalid mode '{}': expected one of 'skip', 'fail'",
|
||||
m
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let behavior_str = behavior
|
||||
.map(|b| match b.to_lowercase().as_str() {
|
||||
"restrict" => Ok("Restrict".to_string()),
|
||||
"cascade" => Ok("Cascade".to_string()),
|
||||
_ => Err(napi::Error::from_reason(format!(
|
||||
"Invalid behavior '{}': expected one of 'restrict', 'cascade'",
|
||||
b
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let req = DropNamespaceRequest {
|
||||
id: Some(namespace_path),
|
||||
mode: mode_str,
|
||||
behavior: behavior_str,
|
||||
..Default::default()
|
||||
};
|
||||
let resp = self
|
||||
.get_inner()?
|
||||
.drop_namespace(req)
|
||||
.await
|
||||
.default_error()?;
|
||||
Ok(DropNamespaceResponse {
|
||||
properties: resp.properties,
|
||||
transaction_id: resp.transaction_id,
|
||||
})
|
||||
}
|
||||
|
||||
/// Rename a table. `current_namespace_path` and `new_namespace_path` default to
|
||||
/// the root namespace when omitted; the caller is expected to either pass both
|
||||
/// or pass neither.
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn rename_table(
|
||||
&self,
|
||||
current_name: String,
|
||||
new_name: String,
|
||||
current_namespace_path: Option<Vec<String>>,
|
||||
new_namespace_path: Option<Vec<String>>,
|
||||
) -> napi::Result<()> {
|
||||
let cur_ns = current_namespace_path.unwrap_or_default();
|
||||
let new_ns = new_namespace_path.unwrap_or_default();
|
||||
self.get_inner()?
|
||||
.rename_table(¤t_name, &new_name, &cur_ns, &new_ns)
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ pub mod permutation;
|
||||
mod query;
|
||||
pub mod remote;
|
||||
mod rerankers;
|
||||
mod scannable;
|
||||
mod session;
|
||||
mod table;
|
||||
mod util;
|
||||
@@ -67,6 +68,26 @@ pub struct OpenTableOptions {
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
#[napi(object)]
|
||||
#[derive(Debug)]
|
||||
pub struct ConnectNamespaceOptions {
|
||||
/// The interval, in seconds, at which to check for updates to the table
|
||||
/// from other processes. If None, then consistency is not checked. For
|
||||
/// performance reasons, this is the default. For strong consistency, set
|
||||
/// this to zero seconds. Then every read will check for updates from other
|
||||
/// processes. As a compromise, you can set this to a non-zero value for
|
||||
/// eventual consistency.
|
||||
pub read_consistency_interval: Option<f64>,
|
||||
/// Configuration for object storage. The available options are described
|
||||
/// at https://docs.lancedb.com/storage/
|
||||
pub storage_options: Option<HashMap<String, String>>,
|
||||
/// Extra properties for the backing namespace client.
|
||||
pub namespace_client_properties: Option<HashMap<String, String>>,
|
||||
/// The session to use for this connection. Holds shared caches and other
|
||||
/// session-specific state.
|
||||
pub session: Option<session::Session>,
|
||||
}
|
||||
|
||||
#[napi_derive::module_init]
|
||||
fn init() {
|
||||
let env = Env::new()
|
||||
|
||||
@@ -3,6 +3,12 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::error::convert_error;
|
||||
use crate::iterator::RecordBatchIterator;
|
||||
use crate::rerankers::RerankHybridCallbackArgs;
|
||||
use crate::rerankers::Reranker;
|
||||
use crate::util::{parse_distance_type, schema_to_buffer};
|
||||
use arrow_array::{
|
||||
Array, Float16Array as ArrowFloat16Array, Float32Array as ArrowFloat32Array,
|
||||
Float64Array as ArrowFloat64Array, UInt8Array as ArrowUInt8Array,
|
||||
@@ -19,16 +25,27 @@ use lancedb::query::QueryBase;
|
||||
use lancedb::query::QueryExecutionOptions;
|
||||
use lancedb::query::Select;
|
||||
use lancedb::query::TakeQuery as LanceDbTakeQuery;
|
||||
use lancedb::query::VectorQuery as LanceDbVectorQuery;
|
||||
use lancedb::query::{ColumnOrdering as LanceDbColumnOrdering, VectorQuery as LanceDbVectorQuery};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::error::NapiErrorExt;
|
||||
use crate::error::convert_error;
|
||||
use crate::iterator::RecordBatchIterator;
|
||||
use crate::rerankers::RerankHybridCallbackArgs;
|
||||
use crate::rerankers::Reranker;
|
||||
use crate::util::{parse_distance_type, schema_to_buffer};
|
||||
#[napi(object)]
|
||||
pub struct ColumnOrdering {
|
||||
pub ascending: bool,
|
||||
pub nulls_first: bool,
|
||||
pub column_name: String,
|
||||
}
|
||||
|
||||
impl From<ColumnOrdering> for LanceDbColumnOrdering {
|
||||
fn from(value: ColumnOrdering) -> Self {
|
||||
match (value.ascending, value.nulls_first) {
|
||||
(true, true) => Self::asc_nulls_first(value.column_name),
|
||||
(true, false) => Self::asc_nulls_last(value.column_name),
|
||||
(false, true) => Self::desc_nulls_first(value.column_name),
|
||||
(false, false) => Self::desc_nulls_last(value.column_name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bytes_to_arrow_array(data: Uint8Array, dtype: String) -> napi::Result<Arc<dyn Array>> {
|
||||
let buf = arrow_buffer::Buffer::from(data.to_vec());
|
||||
@@ -128,6 +145,18 @@ impl Query {
|
||||
self.inner = self.inner.clone().with_row_id();
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<ColumnOrdering>>) -> napi::Result<()> {
|
||||
let ordering = ordering.map(|ordering| {
|
||||
ordering
|
||||
.into_iter()
|
||||
.map(LanceDbColumnOrdering::from)
|
||||
.collect()
|
||||
});
|
||||
self.inner = self.inner.clone().order_by(ordering);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn output_schema(&self) -> napi::Result<Buffer> {
|
||||
let schema = self.inner.output_schema().await.default_error()?;
|
||||
@@ -328,6 +357,18 @@ impl VectorQuery {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi]
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<ColumnOrdering>>) -> napi::Result<()> {
|
||||
let ordering = ordering.map(|ordering| {
|
||||
ordering
|
||||
.into_iter()
|
||||
.map(LanceDbColumnOrdering::from)
|
||||
.collect()
|
||||
});
|
||||
self.inner = self.inner.clone().order_by(ordering);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn output_schema(&self) -> napi::Result<Buffer> {
|
||||
let schema = self.inner.output_schema().await.default_error()?;
|
||||
|
||||
253
nodejs/src/scannable.rs
Normal file
253
nodejs/src/scannable.rs
Normal file
@@ -0,0 +1,253 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
//! NodeJS binding for the [`lancedb::data::scannable::Scannable`] trait.
|
||||
//!
|
||||
//! The JS side supplies a `getNextBatch(isStart)` callback that returns the
|
||||
//! next Arrow `RecordBatch` encoded as a self-contained Arrow IPC Stream
|
||||
//! message (schema message + record batch message + EOS marker) wrapped in a
|
||||
//! `Buffer`, or `null` when the stream is exhausted. The Rust side parses
|
||||
//! each buffer with `arrow_ipc::reader::StreamReader`, validates every
|
||||
//! standalone batch stream against the declared schema, and yields decoded
|
||||
//! `RecordBatch`es as a [`SendableRecordBatchStream`].
|
||||
//!
|
||||
//! `isStart` is `true` on the first `getNextBatch` call of each new
|
||||
//! `scan_as_stream` and `false` thereafter. JS uses it to drop any cached
|
||||
//! iterator and re-invoke its factory at scan boundaries, so retries
|
||||
//! triggered by mid-stream failures restart at batch 0.
|
||||
|
||||
use std::io::Cursor;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_ipc::reader::StreamReader;
|
||||
use arrow_schema::SchemaRef;
|
||||
use futures::stream::once;
|
||||
use lancedb::arrow::{SendableRecordBatchStream, SimpleRecordBatchStream};
|
||||
use lancedb::data::scannable::Scannable as LanceScannable;
|
||||
use lancedb::ipc::ipc_file_to_schema;
|
||||
use lancedb::{Error, Result as LanceResult};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi::threadsafe_function::ThreadsafeFunction;
|
||||
use napi_derive::napi;
|
||||
|
||||
/// Threadsafe handle to the JS `getNextBatch` callback. The callback takes a
|
||||
/// single boolean `isStart` (`true` on the first call of each new scan) and
|
||||
/// returns a Promise that resolves to a `Buffer` containing one IPC Stream
|
||||
/// message, or `null` at end-of-stream.
|
||||
type GetNextBatchFn = ThreadsafeFunction<bool, Promise<Option<Buffer>>, bool, Status, false>;
|
||||
|
||||
/// A Rust-side view of a JS-constructed `Scannable`.
|
||||
///
|
||||
/// Held in JS as the return value of the `Scannable` class constructor. When
|
||||
/// passed to a consumer that accepts `impl lancedb::data::scannable::Scannable`,
|
||||
/// the consumer invokes `scan_as_stream()` to pull batches through the JS
|
||||
/// callback.
|
||||
#[napi]
|
||||
pub struct NapiScannable {
|
||||
schema: SchemaRef,
|
||||
num_rows: Option<usize>,
|
||||
rescannable: bool,
|
||||
// `ThreadsafeFunction` is not `Clone`; wrap in `Arc` so the stream
|
||||
// returned by `scan_as_stream` can own a handle independent of `self`.
|
||||
get_next_batch: Arc<GetNextBatchFn>,
|
||||
// Tracks whether a scan has already started; used to enforce one-shot
|
||||
// semantics on non-rescannable sources.
|
||||
scanned: bool,
|
||||
}
|
||||
|
||||
#[napi]
|
||||
impl NapiScannable {
|
||||
/// Construct a new `NapiScannable`.
|
||||
///
|
||||
/// - `schema_buf` — Arrow IPC File buffer carrying only the schema (no batches).
|
||||
/// - `num_rows` — optional row count hint; not validated against the stream.
|
||||
/// - `rescannable` — whether `get_next_batch` may be re-driven after the
|
||||
/// scan completes.
|
||||
/// - `get_next_batch` -- JS callback that yields the next batch as an Arrow
|
||||
/// IPC Stream message wrapped in a `Buffer`, or `null` at EOF. The
|
||||
/// `isStart` argument is `true` on the first call of each new scan;
|
||||
/// JS uses it to discard any cached iterator before pulling.
|
||||
#[napi(constructor)]
|
||||
pub fn new(
|
||||
schema_buf: Buffer,
|
||||
num_rows: Option<i64>,
|
||||
rescannable: bool,
|
||||
get_next_batch: Function<bool, Promise<Option<Buffer>>>,
|
||||
) -> napi::Result<Self> {
|
||||
let schema = ipc_file_to_schema(schema_buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Invalid schema buffer: {}", e)))?;
|
||||
let num_rows = num_rows
|
||||
.map(|n| {
|
||||
usize::try_from(n)
|
||||
.map_err(|_| napi::Error::from_reason("num_rows must be non-negative"))
|
||||
})
|
||||
.transpose()?;
|
||||
let get_next_batch = Arc::new(get_next_batch.build_threadsafe_function().build()?);
|
||||
Ok(Self {
|
||||
schema,
|
||||
num_rows,
|
||||
rescannable,
|
||||
get_next_batch,
|
||||
scanned: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for NapiScannable {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("NapiScannable")
|
||||
.field("schema", &self.schema)
|
||||
.field("num_rows", &self.num_rows)
|
||||
.field("rescannable", &self.rescannable)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl LanceScannable for NapiScannable {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
let schema = self.schema.clone();
|
||||
|
||||
// One-shot enforcement for non-rescannable sources: return a stream
|
||||
// whose first item is an error.
|
||||
if self.scanned && !self.rescannable {
|
||||
let err_stream = once(async {
|
||||
Err(Error::InvalidInput {
|
||||
message: "Scannable has already been consumed (non-rescannable source)"
|
||||
.to_string(),
|
||||
})
|
||||
});
|
||||
return Box::pin(SimpleRecordBatchStream::new(err_stream, schema));
|
||||
}
|
||||
self.scanned = true;
|
||||
|
||||
let tsfn = Arc::clone(&self.get_next_batch);
|
||||
let declared_schema = schema.clone();
|
||||
|
||||
// State threaded through the unfold. `is_first_pull` starts true so
|
||||
// the first call into JS signals a new-scan boundary; JS uses it to
|
||||
// reset any cached iterator before factory()-ing a fresh one.
|
||||
let initial = State {
|
||||
tsfn,
|
||||
batch_index: 0,
|
||||
declared_schema,
|
||||
errored: false,
|
||||
is_first_pull: true,
|
||||
};
|
||||
|
||||
let stream = futures::stream::unfold(initial, |mut state| async move {
|
||||
if state.errored {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Pull the next IPC Stream buffer from JS. `is_first_pull` is
|
||||
// consumed here and cleared so subsequent pulls continue the
|
||||
// same scan rather than restarting it.
|
||||
let is_start = state.is_first_pull;
|
||||
state.is_first_pull = false;
|
||||
let buf = match pull_next(&state.tsfn, is_start).await {
|
||||
Ok(Some(buf)) => buf,
|
||||
Ok(None) => return None,
|
||||
Err(e) => {
|
||||
state.errored = true;
|
||||
return Some((Err(e), state));
|
||||
}
|
||||
};
|
||||
|
||||
match decode_one_batch(buf.as_ref(), &state.declared_schema) {
|
||||
Ok(batch) => {
|
||||
state.batch_index += 1;
|
||||
Some((Ok(batch), state))
|
||||
}
|
||||
Err(e) => {
|
||||
let tagged = Error::Runtime {
|
||||
message: format!(
|
||||
"[scannable/rust-bridge] failure at batch index {}: {}",
|
||||
state.batch_index, e
|
||||
),
|
||||
};
|
||||
state.errored = true;
|
||||
Some((Err(tagged), state))
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Box::pin(SimpleRecordBatchStream::new(stream, schema))
|
||||
}
|
||||
|
||||
fn num_rows(&self) -> Option<usize> {
|
||||
self.num_rows
|
||||
}
|
||||
|
||||
fn rescannable(&self) -> bool {
|
||||
self.rescannable
|
||||
}
|
||||
}
|
||||
|
||||
struct State {
|
||||
tsfn: Arc<GetNextBatchFn>,
|
||||
batch_index: usize,
|
||||
declared_schema: SchemaRef,
|
||||
errored: bool,
|
||||
/// True for the very first pull of a new scan. Forwarded to JS so the
|
||||
/// callback can drop any cached iterator and call its factory fresh,
|
||||
/// which makes rescannable sources restart at batch 0 even when the
|
||||
/// previous scan ended mid-stream.
|
||||
is_first_pull: bool,
|
||||
}
|
||||
|
||||
/// Invoke the JS callback and await its Promise. `is_start` is forwarded to
|
||||
/// the JS side as the `isStart` argument so it can reset its iterator at the
|
||||
/// scan boundary. Errors on the JS side surface here as rejected promises
|
||||
/// and are tunneled back as `lancedb::Error::Runtime`.
|
||||
async fn pull_next(tsfn: &GetNextBatchFn, is_start: bool) -> LanceResult<Option<Buffer>> {
|
||||
let promise = tsfn
|
||||
.call_async(is_start)
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!(
|
||||
"[scannable/js-factory] napi error status={}, reason={}",
|
||||
e.status, e.reason
|
||||
),
|
||||
})?;
|
||||
promise.await.map_err(|e| Error::Runtime {
|
||||
message: format!(
|
||||
"[scannable/js-iterator] napi error status={}, reason={}",
|
||||
e.status, e.reason
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
/// Decode one IPC Stream buffer (schema + batch + EOS) into a `RecordBatch`.
|
||||
/// Each buffer is a standalone IPC stream, so every decoded stream schema must
|
||||
/// match the one declared at construction.
|
||||
fn decode_one_batch(buf: &[u8], declared: &SchemaRef) -> LanceResult<RecordBatch> {
|
||||
let reader = StreamReader::try_new(Cursor::new(buf), None).map_err(|e| Error::Runtime {
|
||||
message: format!("failed to open IPC stream reader: {}", e),
|
||||
})?;
|
||||
|
||||
let actual = reader.schema();
|
||||
if actual.as_ref() != declared.as_ref() {
|
||||
return Err(Error::InvalidInput {
|
||||
message: format!(
|
||||
"declared schema does not match stream schema: declared={:?} actual={:?}",
|
||||
declared, actual
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
let mut iter = reader;
|
||||
let batch = iter
|
||||
.next()
|
||||
.ok_or_else(|| Error::Runtime {
|
||||
message: "IPC stream contained schema but no record batch".to_string(),
|
||||
})?
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("failed to decode record batch: {}", e),
|
||||
})?;
|
||||
Ok(batch)
|
||||
}
|
||||
@@ -9,6 +9,7 @@ use lancedb::table::{
|
||||
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
|
||||
};
|
||||
use napi::bindgen_prelude::*;
|
||||
use napi::threadsafe_function::{ThreadsafeFunction, ThreadsafeFunctionCallMode};
|
||||
use napi_derive::napi;
|
||||
|
||||
use crate::error::NapiErrorExt;
|
||||
@@ -67,8 +68,16 @@ impl Table {
|
||||
schema_to_buffer(&schema)
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
|
||||
#[napi(
|
||||
catch_unwind,
|
||||
ts_args_type = "buf: Buffer, mode: string, progressCallback?: (progress: WriteProgressInfo) => void"
|
||||
)]
|
||||
pub async fn add(
|
||||
&self,
|
||||
buf: Buffer,
|
||||
mode: String,
|
||||
progress_callback: Option<ProgressFn>,
|
||||
) -> napi::Result<AddResult> {
|
||||
let batches = ipc_file_to_batches(buf.to_vec())
|
||||
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
|
||||
let batches = batches
|
||||
@@ -92,6 +101,19 @@ impl Table {
|
||||
return Err(napi::Error::from_reason(format!("Invalid mode: {}", mode)));
|
||||
};
|
||||
|
||||
if let Some(tsfn) = progress_callback {
|
||||
op = op.progress(move |p| {
|
||||
// NonBlocking: dispatch onto the JS event loop without
|
||||
// blocking the writer thread. With napi-rs's default
|
||||
// unbounded queue, events are not dropped — a slow JS
|
||||
// callback will just queue them.
|
||||
tsfn.call(
|
||||
WriteProgressInfo::from(p),
|
||||
ThreadsafeFunctionCallMode::NonBlocking,
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
let res = op.execute().await.default_error()?;
|
||||
Ok(res.into())
|
||||
}
|
||||
@@ -344,6 +366,31 @@ impl Table {
|
||||
Ok(res.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn set_unenforced_primary_key(&self, columns: Vec<String>) -> napi::Result<()> {
|
||||
self.inner_ref()?
|
||||
.set_unenforced_primary_key(columns)
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn set_lsm_write_spec(&self, spec: LsmWriteSpec) -> napi::Result<()> {
|
||||
let native_spec = lancedb::table::LsmWriteSpec::try_from(spec)?;
|
||||
self.inner_ref()?
|
||||
.set_lsm_write_spec(native_spec)
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn unset_lsm_write_spec(&self) -> napi::Result<()> {
|
||||
self.inner_ref()?
|
||||
.unset_lsm_write_spec()
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn version(&self) -> napi::Result<i64> {
|
||||
self.inner_ref()?
|
||||
@@ -538,6 +585,63 @@ impl From<lancedb::index::IndexConfig> for IndexConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/// Specification selecting Lance's MemWAL LSM-style write path for
|
||||
/// `mergeInsert`.
|
||||
///
|
||||
/// `specType` must be `"bucket"`, `"identity"`, or `"unsharded"`. For
|
||||
/// `"bucket"`, `column` and `numBuckets` are required; for `"identity"`,
|
||||
/// `column` is required.
|
||||
#[napi(object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LsmWriteSpec {
|
||||
/// One of `"bucket"`, `"identity"`, or `"unsharded"`.
|
||||
pub spec_type: String,
|
||||
/// Bucket and identity variants: the sharding column.
|
||||
pub column: Option<String>,
|
||||
/// Bucket variant: the number of buckets, in `[1, 1024]`.
|
||||
pub num_buckets: Option<u32>,
|
||||
/// Names of indexes the MemWAL should keep up to date during writes.
|
||||
pub maintained_indexes: Option<Vec<String>>,
|
||||
/// Default `ShardWriter` configuration recorded in the MemWAL index.
|
||||
pub writer_config_defaults: Option<HashMap<String, String>>,
|
||||
}
|
||||
|
||||
impl TryFrom<LsmWriteSpec> for lancedb::table::LsmWriteSpec {
|
||||
type Error = napi::Error;
|
||||
|
||||
fn try_from(value: LsmWriteSpec) -> napi::Result<Self> {
|
||||
let maintained = value.maintained_indexes.unwrap_or_default();
|
||||
let writer_config_defaults = value.writer_config_defaults.unwrap_or_default();
|
||||
let spec = match value.spec_type.as_str() {
|
||||
"bucket" => {
|
||||
let column = value.column.ok_or_else(|| {
|
||||
napi::Error::from_reason("LsmWriteSpec bucket requires `column`")
|
||||
})?;
|
||||
let num_buckets = value.num_buckets.ok_or_else(|| {
|
||||
napi::Error::from_reason("LsmWriteSpec bucket requires `numBuckets`")
|
||||
})?;
|
||||
Self::bucket(column, num_buckets)
|
||||
}
|
||||
"identity" => {
|
||||
let column = value.column.ok_or_else(|| {
|
||||
napi::Error::from_reason("LsmWriteSpec identity requires `column`")
|
||||
})?;
|
||||
Self::identity(column)
|
||||
}
|
||||
"unsharded" => Self::unsharded(),
|
||||
other => {
|
||||
return Err(napi::Error::from_reason(format!(
|
||||
"LsmWriteSpec `specType` must be 'bucket', 'identity', or 'unsharded', got '{}'",
|
||||
other
|
||||
)));
|
||||
}
|
||||
};
|
||||
Ok(spec
|
||||
.with_maintained_indexes(maintained)
|
||||
.with_writer_config_defaults(writer_config_defaults))
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics about a compaction operation.
|
||||
#[napi(object)]
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -572,6 +676,44 @@ pub struct OptimizeStats {
|
||||
pub prune: RemovalStats,
|
||||
}
|
||||
|
||||
/// Progress snapshot for a write operation, delivered to the JS callback
|
||||
/// passed to `Table.add`.
|
||||
#[napi(object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WriteProgressInfo {
|
||||
/// Number of rows written so far.
|
||||
pub output_rows: i64,
|
||||
/// Number of bytes written so far.
|
||||
pub output_bytes: i64,
|
||||
/// Total rows expected, if the input source reports it.
|
||||
/// Always set on the final callback (where `done` is `true`).
|
||||
pub total_rows: Option<i64>,
|
||||
/// Wall-clock seconds since monitoring started.
|
||||
pub elapsed_seconds: f64,
|
||||
/// Number of parallel write tasks currently in flight.
|
||||
pub active_tasks: i64,
|
||||
/// Total number of parallel write tasks (the write parallelism).
|
||||
pub total_tasks: i64,
|
||||
/// `true` for the final callback; `false` otherwise.
|
||||
pub done: bool,
|
||||
}
|
||||
|
||||
impl From<&lancedb::table::write_progress::WriteProgress> for WriteProgressInfo {
|
||||
fn from(p: &lancedb::table::write_progress::WriteProgress) -> Self {
|
||||
Self {
|
||||
output_rows: p.output_rows() as i64,
|
||||
output_bytes: p.output_bytes() as i64,
|
||||
total_rows: p.total_rows().map(|n| n as i64),
|
||||
elapsed_seconds: p.elapsed().as_secs_f64(),
|
||||
active_tasks: p.active_tasks() as i64,
|
||||
total_tasks: p.total_tasks() as i64,
|
||||
done: p.done(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type ProgressFn = ThreadsafeFunction<WriteProgressInfo, (), WriteProgressInfo, Status, false>;
|
||||
|
||||
/// A definition of a column alteration. The alteration changes the column at
|
||||
/// `path` to have the new name `name`, to be nullable if `nullable` is true,
|
||||
/// and to have the data type `data_type`. At least one of `rename` or `nullable`
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.31.0-beta.11"
|
||||
current_version = "0.32.1-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -4,16 +4,26 @@ code is in the `src/` directory and the Python bindings are in the `lancedb/` di
|
||||
|
||||
Common commands:
|
||||
|
||||
* Bootstrap dev env: `uv run --extra tests --extra dev maturin develop --extras tests,dev`
|
||||
* Build: `make develop`
|
||||
* Format: `make format`
|
||||
* Lint: `make check`
|
||||
* Fix lints: `make fix`
|
||||
* Test: `make test`
|
||||
* Doc test: `make doctest`
|
||||
* Test: `uv run --extra tests pytest python/tests -vv --durations=10 -m "not slow and not s3_test"`
|
||||
* Run specific test: `uv run --extra tests pytest python/tests/<test_file>.py::<test_name> -q`
|
||||
* Doc test: `uv run --extra tests pytest --doctest-modules python/lancedb`
|
||||
|
||||
Use the uv-managed environment declared by `uv.lock` for Python validation. Do
|
||||
not treat system `python`, global `pytest`, or missing editable-install errors
|
||||
as final blockers; bootstrap or enter the uv environment instead. `make test`
|
||||
and `make doctest` assume the development environment is already prepared.
|
||||
|
||||
Before committing changes, run lints and then formatting.
|
||||
|
||||
When you change the Rust code, you will need to recompile the Python bindings: `make develop`.
|
||||
When you change the Rust code, PyO3 binding code, or see a missing/stale
|
||||
`lancedb._lancedb`, recompile the Python bindings with
|
||||
`uv run --extra tests --extra dev maturin develop --extras tests,dev` before
|
||||
running tests.
|
||||
|
||||
When you export new types from Rust to Python, you must manually update `python/lancedb/_lancedb.pyi`
|
||||
with the corresponding type hints. You can run `pyright` to check for type errors in the Python code.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.31.0-beta.11"
|
||||
version = "0.32.1-beta.0"
|
||||
publish = false
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
@@ -19,6 +19,7 @@ arrow = { version = "58.0.0", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
bytes = "1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
datafusion-common.workspace = true
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-namespace-impls.workspace = true
|
||||
|
||||
@@ -51,7 +51,7 @@ class PyExpr:
|
||||
def to_sql(self) -> str: ...
|
||||
|
||||
def expr_col(name: str) -> PyExpr: ...
|
||||
def expr_lit(value: Union[bool, int, float, str]) -> PyExpr: ...
|
||||
def expr_lit(value: Union[bool, int, float, str, bytes]) -> PyExpr: ...
|
||||
def expr_func(name: str, args: List[PyExpr]) -> PyExpr: ...
|
||||
|
||||
class Session:
|
||||
@@ -217,6 +217,9 @@ class Table:
|
||||
async def uri(self) -> str: ...
|
||||
async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
|
||||
async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
|
||||
async def set_unenforced_primary_key(self, columns: List[str]) -> None: ...
|
||||
async def set_lsm_write_spec(self, spec: LsmWriteSpec) -> None: ...
|
||||
async def unset_lsm_write_spec(self) -> None: ...
|
||||
@property
|
||||
def tags(self) -> Tags: ...
|
||||
def query(self) -> Query: ...
|
||||
@@ -255,6 +258,11 @@ class RecordBatchStream:
|
||||
def __aiter__(self) -> "RecordBatchStream": ...
|
||||
async def __anext__(self) -> pa.RecordBatch: ...
|
||||
|
||||
class ColumnOrdering(TypedDict):
|
||||
column_name: str
|
||||
ascending: bool
|
||||
nulls_first: bool
|
||||
|
||||
class Query:
|
||||
def where(self, filter: str): ...
|
||||
def where_expr(self, expr: PyExpr): ...
|
||||
@@ -268,6 +276,7 @@ class Query:
|
||||
def postfilter(self): ...
|
||||
def nearest_to(self, query_vec: pa.Array) -> VectorQuery: ...
|
||||
def nearest_to_text(self, query: dict) -> FTSQuery: ...
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
||||
async def output_schema(self) -> pa.Schema: ...
|
||||
async def execute(
|
||||
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
||||
@@ -296,6 +305,7 @@ class FTSQuery:
|
||||
def get_query(self) -> str: ...
|
||||
def add_query_vector(self, query_vec: pa.Array) -> None: ...
|
||||
def nearest_to(self, query_vec: pa.Array) -> HybridQuery: ...
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
||||
async def output_schema(self) -> pa.Schema: ...
|
||||
async def execute(
|
||||
self, max_batch_length: Optional[int], timeout: Optional[timedelta]
|
||||
@@ -321,6 +331,7 @@ class VectorQuery:
|
||||
def maximum_nprobes(self, maximum_nprobes: int): ...
|
||||
def bypass_vector_index(self): ...
|
||||
def nearest_to_text(self, query: dict) -> HybridQuery: ...
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
||||
def to_query_request(self) -> PyQueryRequest: ...
|
||||
|
||||
class HybridQuery:
|
||||
@@ -339,6 +350,7 @@ class HybridQuery:
|
||||
def minimum_nprobes(self, minimum_nprobes: int): ...
|
||||
def maximum_nprobes(self, maximum_nprobes: int): ...
|
||||
def bypass_vector_index(self): ...
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]): ...
|
||||
def to_vector_query(self) -> VectorQuery: ...
|
||||
def to_fts_query(self) -> FTSQuery: ...
|
||||
def get_limit(self) -> int: ...
|
||||
@@ -368,6 +380,7 @@ class PyQueryRequest:
|
||||
bypass_vector_index: Optional[bool]
|
||||
postfilter: Optional[bool]
|
||||
norm: Optional[str]
|
||||
order_by: Optional[List[ColumnOrdering]]
|
||||
|
||||
class CompactionStats:
|
||||
fragments_removed: int
|
||||
@@ -408,6 +421,37 @@ class MergeResult:
|
||||
num_deleted_rows: int
|
||||
num_attempts: int
|
||||
|
||||
class LsmWriteSpec:
|
||||
"""Specification selecting Lance's MemWAL LSM-style write path for
|
||||
`merge_insert`."""
|
||||
|
||||
@staticmethod
|
||||
def bucket(column: str, num_buckets: int) -> "LsmWriteSpec": ...
|
||||
@staticmethod
|
||||
def identity(column: str) -> "LsmWriteSpec": ...
|
||||
@staticmethod
|
||||
def unsharded() -> "LsmWriteSpec": ...
|
||||
def with_maintained_indexes(self, indexes: List[str]) -> "LsmWriteSpec":
|
||||
"""Return a copy of this spec asking the MemWAL to keep the named
|
||||
indexes up to date as rows are appended."""
|
||||
...
|
||||
def with_writer_config_defaults(self, defaults: Dict[str, str]) -> "LsmWriteSpec":
|
||||
"""Return a copy of this spec recording the given default
|
||||
`ShardWriter` configuration in the MemWAL index."""
|
||||
...
|
||||
@property
|
||||
def spec_type(self) -> str:
|
||||
"""One of 'bucket', 'identity', or 'unsharded'."""
|
||||
...
|
||||
@property
|
||||
def column(self) -> Optional[str]: ...
|
||||
@property
|
||||
def num_buckets(self) -> Optional[int]: ...
|
||||
@property
|
||||
def maintained_indexes(self) -> List[str]: ...
|
||||
@property
|
||||
def writer_config_defaults(self) -> Dict[str, str]: ...
|
||||
|
||||
class AddColumnsResult:
|
||||
version: int
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ def _coerce(value: "ExprLike") -> "Expr":
|
||||
|
||||
|
||||
# Type alias used in annotations.
|
||||
ExprLike = Union["Expr", bool, int, float, str]
|
||||
ExprLike = Union["Expr", bool, int, float, str, bytes]
|
||||
|
||||
|
||||
class Expr:
|
||||
@@ -261,13 +261,13 @@ def col(name: str) -> Expr:
|
||||
return Expr(expr_col(name))
|
||||
|
||||
|
||||
def lit(value: Union[bool, int, float, str]) -> Expr:
|
||||
def lit(value: Union[bool, int, float, str, bytes]) -> Expr:
|
||||
"""Create a literal (constant) value expression.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value:
|
||||
A Python ``bool``, ``int``, ``float``, or ``str``.
|
||||
A Python ``bool``, ``int``, ``float``, ``str``, or ``bytes``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
@@ -6,22 +6,44 @@
|
||||
from typing import Optional
|
||||
|
||||
|
||||
_CREATE_NAMESPACE_MODES = frozenset({"create", "exist_ok", "overwrite"})
|
||||
_DROP_NAMESPACE_MODES = frozenset({"SKIP", "FAIL"})
|
||||
_DROP_NAMESPACE_BEHAVIORS = frozenset({"RESTRICT", "CASCADE"})
|
||||
|
||||
|
||||
def _normalize_create_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize create namespace mode to lowercase (API expects lowercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.lower()
|
||||
normalized = mode.lower()
|
||||
if normalized not in _CREATE_NAMESPACE_MODES:
|
||||
raise ValueError(
|
||||
f"Invalid create namespace mode {mode!r}: "
|
||||
f"expected one of 'create', 'exist_ok', 'overwrite'"
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_drop_namespace_mode(mode: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace mode to uppercase (API expects uppercase)."""
|
||||
if mode is None:
|
||||
return None
|
||||
return mode.upper()
|
||||
normalized = mode.upper()
|
||||
if normalized not in _DROP_NAMESPACE_MODES:
|
||||
raise ValueError(
|
||||
f"Invalid drop namespace mode {mode!r}: expected one of 'skip', 'fail'"
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def _normalize_drop_namespace_behavior(behavior: Optional[str]) -> Optional[str]:
|
||||
"""Normalize drop namespace behavior to uppercase (API expects uppercase)."""
|
||||
if behavior is None:
|
||||
return None
|
||||
return behavior.upper()
|
||||
normalized = behavior.upper()
|
||||
if normalized not in _DROP_NAMESPACE_BEHAVIORS:
|
||||
raise ValueError(
|
||||
f"Invalid drop namespace behavior {behavior!r}: "
|
||||
f"expected one of 'restrict', 'cascade'"
|
||||
)
|
||||
return normalized
|
||||
|
||||
@@ -968,22 +968,32 @@ class Permutation:
|
||||
new.transform_fn = transform
|
||||
return new
|
||||
|
||||
def take_offsets(self, offsets: list[int]) -> Any:
|
||||
"""
|
||||
Take rows from the permutation by offset
|
||||
|
||||
The returned value is passed through the permutation's current transform,
|
||||
so `with_format` and `with_transform` affect this method in the same way
|
||||
they affect iteration.
|
||||
"""
|
||||
|
||||
async def do_take_offsets():
|
||||
return await self.reader.take_offsets(offsets, selection=self.selection)
|
||||
|
||||
batch = LOOP.run(do_take_offsets())
|
||||
return self.transform_fn(batch)
|
||||
|
||||
def __getitem__(self, index: int) -> Any:
|
||||
"""
|
||||
Returns a single row from the permutation by offset
|
||||
"""
|
||||
return self.__getitems__([index])
|
||||
return self.take_offsets([index])
|
||||
|
||||
def __getitems__(self, indices: list[int]) -> Any:
|
||||
"""
|
||||
Returns rows from the permutation by offset
|
||||
"""
|
||||
|
||||
async def do_getitems():
|
||||
return await self.reader.take_offsets(indices, selection=self.selection)
|
||||
|
||||
batch = LOOP.run(do_getitems())
|
||||
return self.transform_fn(batch)
|
||||
return self.take_offsets(indices)
|
||||
|
||||
@deprecated(details="Use with_skip instead")
|
||||
def skip(self, skip: int) -> "Permutation":
|
||||
|
||||
@@ -92,6 +92,12 @@ def ensure_vector_query(
|
||||
return val
|
||||
|
||||
|
||||
class ColumnOrdering(pydantic.BaseModel):
|
||||
column_name: str
|
||||
ascending: bool = True
|
||||
nulls_first: bool = False
|
||||
|
||||
|
||||
class FullTextQueryType(str, Enum):
|
||||
MATCH = "match"
|
||||
MATCH_PHRASE = "match_phrase"
|
||||
@@ -504,6 +510,8 @@ class Query(pydantic.BaseModel):
|
||||
# Bypass the vector index and use a brute force search
|
||||
bypass_vector_index: Optional[bool] = None
|
||||
|
||||
order_by: Optional[List[ColumnOrdering]] = None
|
||||
|
||||
@classmethod
|
||||
def from_inner(cls, req: PyQueryRequest) -> Self:
|
||||
query = cls()
|
||||
@@ -524,6 +532,8 @@ class Query(pydantic.BaseModel):
|
||||
query.refine_factor = req.refine_factor
|
||||
query.bypass_vector_index = req.bypass_vector_index
|
||||
query.postfilter = req.postfilter
|
||||
if req.order_by is not None:
|
||||
query.order_by = [ColumnOrdering(**o) for o in req.order_by]
|
||||
if req.full_text_search is not None:
|
||||
query.full_text_query = FullTextSearchQuery(
|
||||
columns=None,
|
||||
@@ -572,9 +582,22 @@ class LanceQueryBuilder(ABC):
|
||||
If "auto", the query type is inferred based on the query.
|
||||
vector_column_name: str
|
||||
The name of the vector column to use for vector search.
|
||||
ordering_field_name: Optional[str]
|
||||
.. deprecated:: 0.27.0
|
||||
Use ``order_by()`` method instead.
|
||||
fts_columns: Optional[Union[str, List[str]]]
|
||||
The columns to search in for full text search.
|
||||
fast_search: bool
|
||||
Skip flat search of unindexed data.
|
||||
"""
|
||||
if ordering_field_name is not None:
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"ordering_field_name is deprecated, use .order_by() method instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
# Check hybrid search first as it supports empty query pattern
|
||||
if query_type == "hybrid":
|
||||
# hybrid fts and vector query
|
||||
@@ -671,6 +694,7 @@ class LanceQueryBuilder(ABC):
|
||||
self._text = None
|
||||
self._ef = None
|
||||
self._bypass_vector_index = None
|
||||
self._order_by = None
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.3.1",
|
||||
@@ -694,6 +718,7 @@ class LanceQueryBuilder(ABC):
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
*,
|
||||
timeout: Optional[timedelta] = None,
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
"""
|
||||
Execute the query and return the results as a pandas DataFrame.
|
||||
@@ -711,9 +736,12 @@ class LanceQueryBuilder(ABC):
|
||||
timeout: Optional[timedelta]
|
||||
The maximum time to wait for the query to complete.
|
||||
If None, wait indefinitely.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
tbl = flatten_columns(self.to_arrow(timeout=timeout), flatten)
|
||||
return tbl.to_pandas()
|
||||
return tbl.to_pandas(**kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def to_arrow(self, *, timeout: Optional[timedelta] = None) -> pa.Table:
|
||||
@@ -947,6 +975,24 @@ class LanceQueryBuilder(ABC):
|
||||
""" # noqa: E501
|
||||
return self._table._explain_plan(self.to_query_object(), verbose=verbose)
|
||||
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]) -> Self:
|
||||
"""
|
||||
Set the ordering for the results.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ordering: Optional[List[ColumnOrdering]]
|
||||
The ordering to use for the results. If None, then the default ordering
|
||||
will be used.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceQueryBuilder
|
||||
The LanceQueryBuilder object.
|
||||
"""
|
||||
self._order_by = ordering
|
||||
return self
|
||||
|
||||
def analyze_plan(self) -> str:
|
||||
"""
|
||||
Run the query and return its execution plan with runtime metrics.
|
||||
@@ -1314,6 +1360,7 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
fast_search=self._fast_search,
|
||||
ef=self._ef,
|
||||
bypass_vector_index=self._bypass_vector_index,
|
||||
order_by=self._order_by,
|
||||
)
|
||||
|
||||
def to_batches(
|
||||
@@ -1465,7 +1512,9 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
super().__init__(table)
|
||||
self._query = query
|
||||
self._phrase_query = False
|
||||
self.ordering_field_name = ordering_field_name
|
||||
# Deprecated compatibility parameter. Native FTS ordering is now
|
||||
# configured through order_by(); LanceQueryBuilder.create emits the warning.
|
||||
_ = ordering_field_name
|
||||
self._reranker = None
|
||||
self._fast_search = fast_search
|
||||
if isinstance(fts_columns, str):
|
||||
@@ -1514,6 +1563,7 @@ class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
),
|
||||
offset=self._offset,
|
||||
fast_search=self._fast_search,
|
||||
order_by=self._order_by,
|
||||
)
|
||||
|
||||
def output_schema(self) -> pa.Schema:
|
||||
@@ -1579,6 +1629,7 @@ class LanceEmptyQueryBuilder(LanceQueryBuilder):
|
||||
limit=self._limit,
|
||||
with_row_id=self._with_row_id,
|
||||
offset=self._offset,
|
||||
order_by=self._order_by,
|
||||
)
|
||||
|
||||
def output_schema(self) -> pa.Schema:
|
||||
@@ -2305,6 +2356,7 @@ class AsyncQueryBase(object):
|
||||
self,
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
"""
|
||||
Execute the query and collect the results into a pandas DataFrame.
|
||||
@@ -2337,10 +2389,13 @@ class AsyncQueryBase(object):
|
||||
The maximum time to wait for the query to complete.
|
||||
If not specified, no timeout is applied. If the query does not
|
||||
complete within the specified time, an error will be raised.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
return (
|
||||
flatten_columns(await self.to_arrow(timeout=timeout), flatten)
|
||||
).to_pandas()
|
||||
).to_pandas(**kwargs)
|
||||
|
||||
async def to_polars(
|
||||
self,
|
||||
@@ -2502,6 +2557,27 @@ class AsyncStandardQuery(AsyncQueryBase):
|
||||
self._inner.offset(offset)
|
||||
return self
|
||||
|
||||
def order_by(self, ordering: Optional[List[ColumnOrdering]]) -> Self:
|
||||
"""
|
||||
Set the ordering for the results.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ordering: Optional[List[ColumnOrdering]]
|
||||
The ordering to use for the results. If None, then the default ordering
|
||||
will be used.
|
||||
"""
|
||||
if ordering is None:
|
||||
self._inner.order_by(None)
|
||||
else:
|
||||
self._inner.order_by(
|
||||
[
|
||||
o.model_dump() if hasattr(o, "model_dump") else o.dict()
|
||||
for o in ordering
|
||||
]
|
||||
)
|
||||
return self
|
||||
|
||||
def fast_search(self) -> Self:
|
||||
"""
|
||||
Skip searching un-indexed data.
|
||||
@@ -3321,6 +3397,7 @@ class BaseQueryBuilder(object):
|
||||
self,
|
||||
flatten: Optional[Union[int, bool]] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
**kwargs,
|
||||
) -> "pd.DataFrame":
|
||||
"""
|
||||
Execute the query and collect the results into a pandas DataFrame.
|
||||
@@ -3353,8 +3430,11 @@ class BaseQueryBuilder(object):
|
||||
The maximum time to wait for the query to complete.
|
||||
If not specified, no timeout is applied. If the query does not
|
||||
complete within the specified time, an error will be raised.
|
||||
**kwargs
|
||||
Forwarded to pyarrow.Table.to_pandas after query execution and
|
||||
optional flattening.
|
||||
"""
|
||||
return LOOP.run(self._inner.to_pandas(flatten, timeout))
|
||||
return LOOP.run(self._inner.to_pandas(flatten, timeout, **kwargs))
|
||||
|
||||
def to_polars(
|
||||
self,
|
||||
|
||||
@@ -14,6 +14,7 @@ from lancedb._lancedb import (
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
IndexConfig,
|
||||
LsmWriteSpec,
|
||||
MergeResult,
|
||||
UpdateResult,
|
||||
)
|
||||
@@ -39,7 +40,7 @@ from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
from lancedb.table import _normalize_progress
|
||||
|
||||
from ..query import LanceVectorQueryBuilder, LanceQueryBuilder, LanceTakeQueryBuilder
|
||||
from ..table import AsyncTable, IndexStatistics, Query, Table, Tags
|
||||
from ..table import AsyncTable, BlobMode, IndexStatistics, Query, Table, Tags
|
||||
from ..types import BaseTokenizerType
|
||||
|
||||
|
||||
@@ -100,7 +101,7 @@ class RemoteTable(Table):
|
||||
"""to_arrow() is not yet supported on LanceDB cloud."""
|
||||
raise NotImplementedError("to_arrow() is not yet supported on LanceDB cloud.")
|
||||
|
||||
def to_pandas(self):
|
||||
def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs):
|
||||
"""to_pandas() is not yet supported on LanceDB cloud."""
|
||||
raise NotImplementedError("to_pandas() is not yet supported on LanceDB cloud.")
|
||||
|
||||
@@ -655,6 +656,18 @@ class RemoteTable(Table):
|
||||
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
|
||||
return LOOP.run(self._table.drop_columns(columns))
|
||||
|
||||
def set_unenforced_primary_key(self, columns: Union[str, Iterable[str]]) -> None:
|
||||
"""Not supported on LanceDB Cloud."""
|
||||
return LOOP.run(self._table.set_unenforced_primary_key(columns))
|
||||
|
||||
def set_lsm_write_spec(self, spec: "LsmWriteSpec") -> None:
|
||||
"""Not supported on LanceDB Cloud."""
|
||||
return LOOP.run(self._table.set_lsm_write_spec(spec))
|
||||
|
||||
def unset_lsm_write_spec(self) -> None:
|
||||
"""Not supported on LanceDB Cloud."""
|
||||
return LOOP.run(self._table.unset_lsm_write_spec())
|
||||
|
||||
def drop_index(self, index_name: str):
|
||||
return LOOP.run(self._table.drop_index(index_name))
|
||||
|
||||
|
||||
@@ -87,6 +87,8 @@ from .util import (
|
||||
)
|
||||
from .index import lang_mapping
|
||||
|
||||
BlobMode = Literal["lazy", "bytes", "descriptions"]
|
||||
|
||||
_MODEL_BACKED_TOKENIZER_PREFIXES = ("jieba", "lindera")
|
||||
_MODEL_BACKED_TOKENIZER_ERRORS = (
|
||||
"unknown base tokenizer",
|
||||
@@ -154,6 +156,7 @@ if TYPE_CHECKING:
|
||||
AlterColumnsResult,
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
LsmWriteSpec,
|
||||
MergeResult,
|
||||
UpdateResult,
|
||||
)
|
||||
@@ -759,14 +762,22 @@ class Table(ABC):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def to_pandas(self) -> "pandas.DataFrame":
|
||||
def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pandas.DataFrame":
|
||||
"""Return the table as a pandas DataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how blob columns are returned for backends that support
|
||||
Lance blob-aware pandas conversion.
|
||||
**kwargs
|
||||
Forwarded to PyArrow / Lance pandas conversion.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
return self.to_arrow().to_pandas()
|
||||
return self.to_arrow().to_pandas(**kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def to_arrow(self) -> pa.Table:
|
||||
@@ -2182,14 +2193,27 @@ class LanceTable(Table):
|
||||
"""Return the first n rows of the table."""
|
||||
return LOOP.run(self._table.head(n))
|
||||
|
||||
def to_pandas(self) -> "pd.DataFrame":
|
||||
def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pd.DataFrame":
|
||||
"""Return the table as a pandas DataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how Lance blob columns are returned.
|
||||
**kwargs
|
||||
Forwarded to Lance pandas conversion.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
return self.to_arrow().to_pandas()
|
||||
if blob_mode == "lazy" and (
|
||||
self._namespace_client is not None
|
||||
or get_uri_scheme(self._dataset_path) == "memory"
|
||||
):
|
||||
return self.to_arrow().to_pandas(**kwargs)
|
||||
|
||||
return self.to_lance().to_pandas(blob_mode=blob_mode, **kwargs)
|
||||
|
||||
def to_arrow(self) -> pa.Table:
|
||||
"""Return the table as a pyarrow Table.
|
||||
@@ -2518,11 +2542,6 @@ class LanceTable(Table):
|
||||
"at a time. To search over multiple text fields, create a "
|
||||
"separate FTS index for each field."
|
||||
)
|
||||
if "." in field_names:
|
||||
raise ValueError(
|
||||
"Native FTS indexes can only be created on top-level fields. "
|
||||
f"Received nested field path: {field_names!r}."
|
||||
)
|
||||
|
||||
if tokenizer_name is None:
|
||||
tokenizer_configs = {
|
||||
@@ -3263,6 +3282,21 @@ class LanceTable(Table):
|
||||
def drop_columns(self, columns: Iterable[str]) -> DropColumnsResult:
|
||||
return LOOP.run(self._table.drop_columns(columns))
|
||||
|
||||
def set_unenforced_primary_key(self, columns: Union[str, Iterable[str]]) -> None:
|
||||
"""Set the unenforced primary key. See
|
||||
[`AsyncTable.set_unenforced_primary_key`][lancedb.AsyncTable.set_unenforced_primary_key]."""
|
||||
return LOOP.run(self._table.set_unenforced_primary_key(columns))
|
||||
|
||||
def set_lsm_write_spec(self, spec: "LsmWriteSpec") -> None:
|
||||
"""Install an LsmWriteSpec. See
|
||||
[`AsyncTable.set_lsm_write_spec`][lancedb.AsyncTable.set_lsm_write_spec]."""
|
||||
return LOOP.run(self._table.set_lsm_write_spec(spec))
|
||||
|
||||
def unset_lsm_write_spec(self) -> None:
|
||||
"""Remove the LsmWriteSpec. See
|
||||
[`AsyncTable.unset_lsm_write_spec`][lancedb.AsyncTable.unset_lsm_write_spec]."""
|
||||
return LOOP.run(self._table.unset_lsm_write_spec())
|
||||
|
||||
def uses_v2_manifest_paths(self) -> bool:
|
||||
"""
|
||||
Check if the table is using the new v2 manifest paths.
|
||||
@@ -3808,6 +3842,69 @@ class AsyncTable:
|
||||
Any attempt to use the table after it has been closed will raise an error."""
|
||||
return self._inner.close()
|
||||
|
||||
async def set_unenforced_primary_key(
|
||||
self, columns: Union[str, Iterable[str]]
|
||||
) -> None:
|
||||
"""Set the unenforced primary key for this table to the given
|
||||
ordered list of columns.
|
||||
|
||||
"Unenforced" means LanceDB does not check uniqueness on writes; the
|
||||
columns are recorded in the schema as the primary key so that
|
||||
features such as `merge_insert` can use them. Calling this again
|
||||
replaces any previously-set primary key.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
columns : str or Iterable[str]
|
||||
Either a single column name (single-column key) or an ordered
|
||||
iterable of column names (composite key). Each column dtype
|
||||
must be one of: int32, int64, utf8, large_utf8, binary,
|
||||
large_binary, fixed_size_binary.
|
||||
"""
|
||||
if isinstance(columns, str):
|
||||
columns = [columns]
|
||||
else:
|
||||
columns = list(columns)
|
||||
await self._inner.set_unenforced_primary_key(columns)
|
||||
|
||||
async def set_lsm_write_spec(self, spec: "LsmWriteSpec") -> None:
|
||||
"""Install an LsmWriteSpec on this table.
|
||||
|
||||
The spec selects Lance's MemWAL LSM-style write path for future
|
||||
`merge_insert` calls. ``LsmWriteSpec`` chooses one of three sharding
|
||||
strategies:
|
||||
|
||||
- ``LsmWriteSpec.bucket(column, num_buckets)`` — hash-bucket writes by
|
||||
the single-column unenforced primary key.
|
||||
- ``LsmWriteSpec.identity(column)`` — shard by the raw value of a
|
||||
scalar column.
|
||||
- ``LsmWriteSpec.unsharded()`` — route every write to a single shard.
|
||||
|
||||
All variants require the table to have an unenforced primary key set
|
||||
via [`set_unenforced_primary_key`]; bucket sharding additionally
|
||||
requires it to be the single column being bucketed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
spec : LsmWriteSpec
|
||||
The sharding spec to install.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from lancedb._lancedb import LsmWriteSpec
|
||||
>>> # table.set_unenforced_primary_key("id")
|
||||
>>> # table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 16))
|
||||
"""
|
||||
await self._inner.set_lsm_write_spec(spec)
|
||||
|
||||
async def unset_lsm_write_spec(self) -> None:
|
||||
"""Remove the LsmWriteSpec from this table.
|
||||
|
||||
Reverts to the standard `merge_insert` write path. Errors if no spec
|
||||
is currently set.
|
||||
"""
|
||||
await self._inner.unset_lsm_write_spec()
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of the table."""
|
||||
@@ -3866,14 +3963,39 @@ class AsyncTable:
|
||||
"""
|
||||
return AsyncQuery(self._inner.query())
|
||||
|
||||
async def to_pandas(self) -> "pd.DataFrame":
|
||||
async def _to_lance(self, **kwargs) -> lance.LanceDataset:
|
||||
try:
|
||||
import lance
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"The lance library is required to use this function. "
|
||||
"Please install with `pip install pylance`."
|
||||
)
|
||||
|
||||
return lance.dataset(
|
||||
await self.uri(),
|
||||
version=await self.version(),
|
||||
storage_options=await self.latest_storage_options(),
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
async def to_pandas(self, blob_mode: BlobMode = "lazy", **kwargs) -> "pd.DataFrame":
|
||||
"""Return the table as a pandas DataFrame.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
blob_mode: str, default "lazy"
|
||||
Controls how Lance blob columns are returned.
|
||||
**kwargs
|
||||
Forwarded to PyArrow / Lance pandas conversion.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pd.DataFrame
|
||||
"""
|
||||
return (await self.to_arrow()).to_pandas()
|
||||
if blob_mode == "lazy":
|
||||
return (await self.to_arrow()).to_pandas(**kwargs)
|
||||
return (await self._to_lance()).to_pandas(blob_mode=blob_mode, **kwargs)
|
||||
|
||||
async def to_arrow(self) -> pa.Table:
|
||||
"""Return the table as a pyarrow Table.
|
||||
@@ -4512,6 +4634,8 @@ class AsyncTable:
|
||||
async_query = async_query.fast_search()
|
||||
if query.with_row_id:
|
||||
async_query = async_query.with_row_id()
|
||||
if query.order_by:
|
||||
async_query = async_query.order_by(query.order_by)
|
||||
|
||||
if query.vector:
|
||||
async_query = async_query.nearest_to(query.vector).distance_range(
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
segmenter:
|
||||
mode: "normal"
|
||||
dictionary:
|
||||
path: "./python/tests/models/lindera/ipadic/main"
|
||||
dictionary: "./python/tests/models/lindera/ipadic/main"
|
||||
|
||||
Binary file not shown.
@@ -914,6 +914,29 @@ def test_local_namespace_operations(tmp_path):
|
||||
assert db.list_namespaces().namespaces == []
|
||||
|
||||
|
||||
def test_create_namespace_invalid_mode_raises(tmp_path):
|
||||
"""Unrecognized create namespace modes raise a clear error."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
with pytest.raises(ValueError, match="Invalid create namespace mode"):
|
||||
db.create_namespace(["child"], mode="frobnicate")
|
||||
|
||||
|
||||
def test_drop_namespace_invalid_mode_raises(tmp_path):
|
||||
"""Unrecognized drop namespace modes raise a clear error."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
db.create_namespace(["child"])
|
||||
with pytest.raises(ValueError, match="Invalid drop namespace mode"):
|
||||
db.drop_namespace(["child"], mode="frobnicate")
|
||||
|
||||
|
||||
def test_drop_namespace_invalid_behavior_raises(tmp_path):
|
||||
"""Unrecognized drop namespace behaviors raise a clear error."""
|
||||
db = lancedb.connect(tmp_path)
|
||||
db.create_namespace(["child"])
|
||||
with pytest.raises(ValueError, match="Invalid drop namespace behavior"):
|
||||
db.drop_namespace(["child"], behavior="frobnicate")
|
||||
|
||||
|
||||
def test_clone_table_latest_version(tmp_path):
|
||||
"""Test cloning a table with the latest version (default behavior)"""
|
||||
import os
|
||||
|
||||
@@ -29,6 +29,7 @@ from lancedb.query import (
|
||||
MultiMatchQuery,
|
||||
PhraseQuery,
|
||||
BooleanQuery,
|
||||
ColumnOrdering,
|
||||
Occur,
|
||||
LanceFtsQueryBuilder,
|
||||
)
|
||||
@@ -116,8 +117,7 @@ def lindera_ipadic(language_model_home):
|
||||
config_path.write_text(
|
||||
"segmenter:\n"
|
||||
' mode: "normal"\n'
|
||||
" dictionary:\n"
|
||||
f' path: "{extracted_model.resolve().as_posix()}"\n',
|
||||
f' dictionary: "{extracted_model.resolve().as_posix()}"\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
@@ -500,6 +500,36 @@ async def test_search_fts_specify_column_async(async_table):
|
||||
pass
|
||||
|
||||
|
||||
def test_search_order_by_descending(table):
|
||||
table.create_fts_index("text")
|
||||
rows = (
|
||||
table.search("puppy")
|
||||
.order_by([ColumnOrdering(column_name="count", ascending=False)])
|
||||
.limit(20)
|
||||
.select(["text", "count"])
|
||||
.to_list()
|
||||
)
|
||||
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"], reverse=True) == rows
|
||||
|
||||
|
||||
def test_search_order_by_ascending(table):
|
||||
table.create_fts_index("text")
|
||||
rows = (
|
||||
table.search("puppy")
|
||||
.order_by([ColumnOrdering(column_name="count", ascending=True)])
|
||||
.limit(20)
|
||||
.select(["text", "count"])
|
||||
.to_list()
|
||||
)
|
||||
|
||||
for r in rows:
|
||||
assert "puppy" in r["text"]
|
||||
assert sorted(rows, key=lambda x: x["count"]) == rows
|
||||
|
||||
|
||||
def test_create_index_from_table(tmp_path, table):
|
||||
table.create_fts_index("text")
|
||||
df = table.search("puppy").limit(5).select(["text"]).to_pandas()
|
||||
@@ -533,8 +563,19 @@ def test_create_index_multiple_columns(tmp_path, table):
|
||||
|
||||
|
||||
def test_nested_schema(tmp_path, table):
|
||||
with pytest.raises(ValueError, match="top-level fields"):
|
||||
table.create_fts_index("nested.text")
|
||||
table.create_fts_index("nested.text")
|
||||
indices = table.list_indices()
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type == "FTS"
|
||||
assert indices[0].columns == ["nested.text"]
|
||||
|
||||
results = (
|
||||
table.search("puppy", query_type="fts", fts_columns="nested.text")
|
||||
.limit(5)
|
||||
.to_list()
|
||||
)
|
||||
assert len(results) > 0
|
||||
assert all("puppy" in row["nested"]["text"] for row in results)
|
||||
|
||||
|
||||
def test_search_index_with_filter(table):
|
||||
|
||||
149
python/python/tests/test_lsm_write_spec.py
Normal file
149
python/python/tests/test_lsm_write_spec.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Tests for installing and clearing an LsmWriteSpec via
|
||||
`Table.set_lsm_write_spec` / `Table.unset_lsm_write_spec`.
|
||||
"""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import lancedb
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from lancedb._lancedb import LsmWriteSpec
|
||||
|
||||
SCHEMA = pa.schema(
|
||||
[
|
||||
pa.field("id", pa.utf8(), nullable=False),
|
||||
pa.field("v", pa.int32(), nullable=False),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _batch(ids, vs):
|
||||
return pa.RecordBatch.from_arrays(
|
||||
[pa.array(ids, type=pa.utf8()), pa.array(vs, type=pa.int32())],
|
||||
schema=SCHEMA,
|
||||
)
|
||||
|
||||
|
||||
def _reader(ids, vs):
|
||||
return pa.RecordBatchReader.from_batches(SCHEMA, [_batch(ids, vs)])
|
||||
|
||||
|
||||
def _make_table(tmp_path):
|
||||
db = lancedb.connect(tmp_path, read_consistency_interval=timedelta(seconds=0))
|
||||
table = db.create_table("t", _reader(["seed"], [0]))
|
||||
return db, table
|
||||
|
||||
|
||||
def test_set_lsm_write_spec_validates(tmp_path):
|
||||
_db, table = _make_table(tmp_path)
|
||||
|
||||
# No PK set yet.
|
||||
with pytest.raises(Exception, match="primary key"):
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
|
||||
|
||||
table.set_unenforced_primary_key("id")
|
||||
|
||||
# Column mismatch.
|
||||
with pytest.raises(Exception, match="match"):
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("v", 4))
|
||||
|
||||
# Out-of-range num_buckets.
|
||||
with pytest.raises(Exception, match="num_buckets"):
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 0))
|
||||
with pytest.raises(Exception, match="num_buckets"):
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 1025))
|
||||
|
||||
# Happy path then mutation rejected.
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
|
||||
with pytest.raises(Exception, match="mutation"):
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 8))
|
||||
|
||||
|
||||
def test_unset_lsm_write_spec(tmp_path):
|
||||
_db, table = _make_table(tmp_path)
|
||||
|
||||
# unset errors when no spec is set.
|
||||
with pytest.raises(Exception, match="no LSM write spec"):
|
||||
table.unset_lsm_write_spec()
|
||||
|
||||
# Install a spec, then remove it; afterwards a fresh spec can be set.
|
||||
table.set_unenforced_primary_key("id")
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
|
||||
table.unset_lsm_write_spec()
|
||||
# A second unset errors — there is no spec left to remove.
|
||||
with pytest.raises(Exception, match="no LSM write spec"):
|
||||
table.unset_lsm_write_spec()
|
||||
table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 8))
|
||||
|
||||
|
||||
def test_set_unsharded_spec(tmp_path):
|
||||
_db, table = _make_table(tmp_path)
|
||||
# Lance MemWAL still requires a primary key on the dataset; Unsharded
|
||||
# just skips per-row hashing.
|
||||
table.set_unenforced_primary_key("id")
|
||||
table.set_lsm_write_spec(LsmWriteSpec.unsharded())
|
||||
table.unset_lsm_write_spec()
|
||||
|
||||
|
||||
def test_lsm_write_spec_repr():
|
||||
s = LsmWriteSpec.bucket("id", 4)
|
||||
assert s.spec_type == "bucket"
|
||||
assert s.column == "id"
|
||||
assert s.num_buckets == 4
|
||||
assert s.maintained_indexes == []
|
||||
assert "bucket" in repr(s)
|
||||
assert "id" in repr(s)
|
||||
assert "4" in repr(s)
|
||||
|
||||
u = LsmWriteSpec.unsharded()
|
||||
assert u.spec_type == "unsharded"
|
||||
assert u.column is None
|
||||
assert u.num_buckets is None
|
||||
assert "unsharded" in repr(u)
|
||||
|
||||
|
||||
def test_lsm_write_spec_with_maintained_indexes():
|
||||
s = LsmWriteSpec.bucket("id", 4).with_maintained_indexes(["idx_a", "idx_b"])
|
||||
assert s.maintained_indexes == ["idx_a", "idx_b"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_set_unset_lsm_write_spec(tmp_path):
|
||||
db = await lancedb.connect_async(
|
||||
tmp_path, read_consistency_interval=timedelta(seconds=0)
|
||||
)
|
||||
table = await db.create_table(
|
||||
"t",
|
||||
pa.RecordBatchReader.from_batches(SCHEMA, [_batch(["seed"], [0])]),
|
||||
)
|
||||
|
||||
await table.set_unenforced_primary_key("id")
|
||||
await table.set_lsm_write_spec(LsmWriteSpec.bucket("id", 4))
|
||||
await table.unset_lsm_write_spec()
|
||||
# A second unset errors.
|
||||
with pytest.raises(Exception, match="no LSM write spec"):
|
||||
await table.unset_lsm_write_spec()
|
||||
|
||||
|
||||
def test_set_identity_spec(tmp_path):
|
||||
_db, table = _make_table(tmp_path)
|
||||
# Identity sharding still requires an unenforced primary key on the
|
||||
# table; it shards by the raw value of the given column.
|
||||
table.set_unenforced_primary_key("id")
|
||||
table.set_lsm_write_spec(LsmWriteSpec.identity("v"))
|
||||
table.unset_lsm_write_spec()
|
||||
|
||||
|
||||
def test_lsm_write_spec_identity_and_writer_config_defaults():
|
||||
s = LsmWriteSpec.identity("v")
|
||||
assert s.spec_type == "identity"
|
||||
assert s.column == "v"
|
||||
assert s.num_buckets is None
|
||||
assert "identity" in repr(s)
|
||||
|
||||
s = s.with_writer_config_defaults({"durable_write": "false"})
|
||||
assert s.writer_config_defaults == {"durable_write": "false"}
|
||||
assert "durable_write" in repr(s)
|
||||
@@ -1080,3 +1080,29 @@ def test_getitems_invalid_offset(some_permutation: Permutation):
|
||||
"""Test __getitems__ with an out-of-range offset raises an error."""
|
||||
with pytest.raises(Exception):
|
||||
some_permutation.__getitems__([999999])
|
||||
|
||||
|
||||
def test_take_offsets(some_permutation: Permutation):
|
||||
result = some_permutation.take_offsets([0, 1, 2])
|
||||
|
||||
assert isinstance(result, list)
|
||||
assert "id" in result[0]
|
||||
assert "value" in result[0]
|
||||
assert len(result) == 3
|
||||
|
||||
|
||||
def test_take_offsets_empty_identity_permutation(mem_db):
|
||||
tbl = mem_db.create_table(
|
||||
"test_table", pa.table({"id": range(10), "value": range(10)})
|
||||
)
|
||||
permutation = Permutation.identity(tbl)
|
||||
|
||||
result = permutation.take_offsets([])
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_take_offsets_empty_permutation(some_permutation: Permutation):
|
||||
result = some_permutation.take_offsets([])
|
||||
|
||||
assert result == []
|
||||
|
||||
79
python/python/tests/test_primary_key.py
Normal file
79
python/python/tests/test_primary_key.py
Normal file
@@ -0,0 +1,79 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""Tests for Table.set_unenforced_primary_key."""
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
import lancedb
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
|
||||
|
||||
def _empty_table(path, schema):
|
||||
db = lancedb.connect(path, read_consistency_interval=timedelta(seconds=0))
|
||||
return db.create_table("t", schema=schema)
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_accepts_string_or_one_element_list(tmp_path):
|
||||
schema = pa.schema([pa.field("id", pa.int64(), nullable=False)])
|
||||
|
||||
# Bare string.
|
||||
table = _empty_table(tmp_path / "s", schema)
|
||||
table.set_unenforced_primary_key("id")
|
||||
|
||||
# One-element list.
|
||||
table = _empty_table(tmp_path / "l", schema)
|
||||
table.set_unenforced_primary_key(["id"])
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_rejects_compound_and_empty(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path,
|
||||
pa.schema(
|
||||
[
|
||||
pa.field("a", pa.utf8(), nullable=False),
|
||||
pa.field("b", pa.int64(), nullable=False),
|
||||
]
|
||||
),
|
||||
)
|
||||
# Compound keys are not supported.
|
||||
with pytest.raises(Exception, match="compound"):
|
||||
table.set_unenforced_primary_key(["a", "b"])
|
||||
# Empty input.
|
||||
with pytest.raises(Exception, match="required"):
|
||||
table.set_unenforced_primary_key([])
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_is_immutable(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path,
|
||||
pa.schema(
|
||||
[
|
||||
pa.field("a", pa.utf8(), nullable=False),
|
||||
pa.field("b", pa.int64(), nullable=False),
|
||||
]
|
||||
),
|
||||
)
|
||||
table.set_unenforced_primary_key("a")
|
||||
# The primary key cannot be changed or re-set once installed.
|
||||
with pytest.raises(Exception, match="already set"):
|
||||
table.set_unenforced_primary_key("b")
|
||||
with pytest.raises(Exception, match="already set"):
|
||||
table.set_unenforced_primary_key("a")
|
||||
|
||||
|
||||
def test_set_unenforced_primary_key_validates(tmp_path):
|
||||
table = _empty_table(
|
||||
tmp_path / "t", pa.schema([pa.field("id", pa.utf8(), nullable=False)])
|
||||
)
|
||||
# Unknown column.
|
||||
with pytest.raises(Exception, match="not found"):
|
||||
table.set_unenforced_primary_key("nonexistent")
|
||||
|
||||
# Unsupported dtype (Float32 not in the supported set).
|
||||
bad = _empty_table(
|
||||
tmp_path / "bad", pa.schema([pa.field("id", pa.float32(), nullable=False)])
|
||||
)
|
||||
with pytest.raises(Exception, match="not supported"):
|
||||
bad.set_unenforced_primary_key("id")
|
||||
@@ -25,6 +25,7 @@ from lancedb.query import (
|
||||
AsyncHybridQuery,
|
||||
AsyncQueryBase,
|
||||
AsyncVectorQuery,
|
||||
ColumnOrdering,
|
||||
LanceVectorQueryBuilder,
|
||||
MatchQuery,
|
||||
PhraseQuery,
|
||||
@@ -164,6 +165,87 @@ def test_offset(table):
|
||||
assert len(results_with_offset.to_pandas()) == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_query_to_pandas_kwargs(table, table_async):
|
||||
sync_df = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
.select(["id"])
|
||||
.limit(1)
|
||||
.to_pandas(split_blocks=True)
|
||||
)
|
||||
assert sync_df["id"].tolist() == [1]
|
||||
|
||||
async_df = await (
|
||||
table_async.query().select(["id"]).limit(2).to_pandas(split_blocks=True)
|
||||
)
|
||||
assert async_df["id"].tolist() == [1, 2]
|
||||
|
||||
|
||||
def test_order_by_plain_query(mem_db):
|
||||
table = mem_db.create_table(
|
||||
"test_order_by",
|
||||
pa.table(
|
||||
{
|
||||
"group": [1, 1, 1, 2],
|
||||
"score": [None, 1.0, 1.0, 0.5],
|
||||
"name": ["z", "b", "a", "c"],
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
res = (
|
||||
table.search()
|
||||
.order_by(
|
||||
[
|
||||
ColumnOrdering(column_name="group", ascending=True, nulls_first=False),
|
||||
ColumnOrdering(column_name="score", ascending=True, nulls_first=True),
|
||||
ColumnOrdering(column_name="name", ascending=True, nulls_first=False),
|
||||
]
|
||||
)
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
assert res.select(["group", "score", "name"]).to_pylist() == [
|
||||
{"group": 1, "score": None, "name": "z"},
|
||||
{"group": 1, "score": 1.0, "name": "a"},
|
||||
{"group": 1, "score": 1.0, "name": "b"},
|
||||
{"group": 2, "score": 0.5, "name": "c"},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_order_by_async_query(mem_db_async: AsyncConnection):
|
||||
table = await mem_db_async.create_table(
|
||||
"test_order_by_async",
|
||||
pa.table(
|
||||
{
|
||||
"group": [1, 1, 1, 2],
|
||||
"score": [None, 1.0, 1.0, 0.5],
|
||||
"name": ["z", "b", "a", "c"],
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
res = await (
|
||||
table.query()
|
||||
.order_by(
|
||||
[
|
||||
ColumnOrdering(column_name="group", ascending=True, nulls_first=False),
|
||||
ColumnOrdering(column_name="score", ascending=True, nulls_first=True),
|
||||
ColumnOrdering(column_name="name", ascending=True, nulls_first=False),
|
||||
]
|
||||
)
|
||||
.to_arrow()
|
||||
)
|
||||
|
||||
assert res.select(["group", "score", "name"]).to_pylist() == [
|
||||
{"group": 1, "score": None, "name": "z"},
|
||||
{"group": 1, "score": 1.0, "name": "a"},
|
||||
{"group": 1, "score": 1.0, "name": "b"},
|
||||
{"group": 2, "score": 0.5, "name": "c"},
|
||||
]
|
||||
|
||||
|
||||
def test_query_builder(table):
|
||||
rs = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
|
||||
@@ -16,6 +16,7 @@ from packaging.version import Version
|
||||
|
||||
import lancedb
|
||||
from lancedb.conftest import MockTextEmbeddingFunction
|
||||
from lancedb.query import ColumnOrdering
|
||||
from lancedb.remote import ClientConfig
|
||||
from lancedb.remote.errors import HttpError, RetryError
|
||||
import pytest
|
||||
@@ -268,6 +269,25 @@ def test_table_unimplemented_functions():
|
||||
table.to_pandas()
|
||||
|
||||
|
||||
def test_table_to_pandas_not_supported():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/create/?mode=create":
|
||||
request.send_response(200)
|
||||
request.send_header("Content-Type", "application/json")
|
||||
request.end_headers()
|
||||
request.wfile.write(b"{}")
|
||||
else:
|
||||
request.send_response(404)
|
||||
request.end_headers()
|
||||
|
||||
with mock_lancedb_connection(handler) as db:
|
||||
table = db.create_table("test", [{"id": 1}])
|
||||
with pytest.raises(NotImplementedError):
|
||||
table.to_pandas()
|
||||
with pytest.raises(NotImplementedError):
|
||||
table.to_pandas(blob_mode="bytes", split_blocks=True)
|
||||
|
||||
|
||||
def test_table_add_in_threadpool():
|
||||
def handler(request):
|
||||
if request.path == "/v1/table/test/insert/":
|
||||
@@ -660,6 +680,18 @@ def test_query_sync_maximal():
|
||||
"ef": None,
|
||||
"filter": "id > 0",
|
||||
"columns": ["id", "name"],
|
||||
"order_by": [
|
||||
{
|
||||
"column_name": "score",
|
||||
"ascending": False,
|
||||
"nulls_first": True,
|
||||
},
|
||||
{
|
||||
"column_name": "id",
|
||||
"ascending": True,
|
||||
"nulls_first": False,
|
||||
},
|
||||
],
|
||||
"vector_column": "vector2",
|
||||
"fast_search": True,
|
||||
"with_row_id": True,
|
||||
@@ -677,6 +709,14 @@ def test_query_sync_maximal():
|
||||
.refine_factor(10)
|
||||
.nprobes(5)
|
||||
.where("id > 0", prefilter=True)
|
||||
.order_by(
|
||||
[
|
||||
ColumnOrdering(
|
||||
column_name="score", ascending=False, nulls_first=True
|
||||
),
|
||||
ColumnOrdering(column_name="id", ascending=True, nulls_first=False),
|
||||
]
|
||||
)
|
||||
.with_row_id(True)
|
||||
.select(["id", "name"])
|
||||
.to_list()
|
||||
|
||||
@@ -47,6 +47,85 @@ def test_basic(mem_db: DBConnection):
|
||||
assert table.to_arrow() == expected_data
|
||||
|
||||
|
||||
def test_table_to_pandas_default_matches_arrow(tmp_db: DBConnection):
|
||||
pd = pytest.importorskip("pandas")
|
||||
data = pa.table({"id": [1, 2], "text": ["one", "two"]})
|
||||
table = tmp_db.create_table("test_to_pandas_old_call", data=data)
|
||||
|
||||
expected = data.to_pandas()
|
||||
pd.testing.assert_frame_equal(table.to_pandas(), expected)
|
||||
|
||||
|
||||
def test_table_to_pandas_blob_bytes(tmp_db: DBConnection):
|
||||
pytest.importorskip("lance")
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
table = tmp_db.create_table("test_to_pandas_blob_bytes", data=data)
|
||||
|
||||
df = table.to_pandas(blob_mode="bytes")
|
||||
|
||||
assert df["blob"].tolist() == [b"hello", b"world"]
|
||||
|
||||
|
||||
def test_table_to_pandas_kwargs(tmp_db: DBConnection):
|
||||
pd = pytest.importorskip("pandas")
|
||||
data = pa.table({"id": pa.array([1, 2], pa.int64())})
|
||||
table = tmp_db.create_table("test_to_pandas_kwargs", data=data)
|
||||
|
||||
df = table.to_pandas(types_mapper=pd.ArrowDtype)
|
||||
|
||||
assert str(df["id"].dtype) == "int64[pyarrow]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_table_to_pandas_blob_bytes(tmp_db_async: AsyncConnection):
|
||||
pytest.importorskip("lance")
|
||||
data = pa.table(
|
||||
{
|
||||
"id": pa.array([1, 2], pa.int64()),
|
||||
"blob": pa.array([b"hello", b"world"], pa.large_binary()),
|
||||
},
|
||||
schema=pa.schema(
|
||||
[
|
||||
pa.field("id", pa.int64()),
|
||||
pa.field(
|
||||
"blob", pa.large_binary(), metadata={"lance-encoding:blob": "true"}
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
table = await tmp_db_async.create_table(
|
||||
"test_async_to_pandas_blob_bytes", data=data
|
||||
)
|
||||
|
||||
df = await table.to_pandas(blob_mode="bytes")
|
||||
|
||||
assert df["blob"].tolist() == [b"hello", b"world"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_table_to_pandas_kwargs(tmp_db_async: AsyncConnection):
|
||||
pd = pytest.importorskip("pandas")
|
||||
data = pa.table({"id": pa.array([1, 2], pa.int64())})
|
||||
table = await tmp_db_async.create_table("test_async_to_pandas_kwargs", data=data)
|
||||
|
||||
df = await table.to_pandas(types_mapper=pd.ArrowDtype)
|
||||
|
||||
assert str(df["id"].dtype) == "int64[pyarrow]"
|
||||
|
||||
|
||||
def test_create_table_infers_large_int_vectors(mem_db: DBConnection):
|
||||
data = [{"vector": [0, 300]}]
|
||||
|
||||
@@ -1811,6 +1890,55 @@ def test_create_scalar_index(mem_db: DBConnection):
|
||||
assert scalar_index.name == "custom_y_index"
|
||||
|
||||
|
||||
def test_create_index_nested_field_paths(mem_db: DBConnection):
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("metadata", pa.struct([pa.field("user_id", pa.int32())])),
|
||||
pa.field(
|
||||
"image",
|
||||
pa.struct([pa.field("embedding", pa.list_(pa.float32(), 2))]),
|
||||
),
|
||||
]
|
||||
)
|
||||
data = pa.Table.from_pylist(
|
||||
[
|
||||
{
|
||||
"metadata": {"user_id": i},
|
||||
"image": {"embedding": [float(i), float(i + 1)]},
|
||||
}
|
||||
for i in range(256)
|
||||
],
|
||||
schema=schema,
|
||||
)
|
||||
table = mem_db.create_table("nested_index_paths", data=data)
|
||||
|
||||
table.create_scalar_index("metadata.user_id", name="metadata_user_id_idx")
|
||||
table.create_index(
|
||||
vector_column_name="image.embedding",
|
||||
num_partitions=1,
|
||||
num_sub_vectors=1,
|
||||
name="image_embedding_idx",
|
||||
)
|
||||
|
||||
indices = sorted(table.list_indices(), key=lambda idx: idx.name)
|
||||
assert [(idx.name, idx.index_type, idx.columns) for idx in indices] == [
|
||||
("image_embedding_idx", "IvfPq", ["image.embedding"]),
|
||||
("metadata_user_id_idx", "BTree", ["metadata.user_id"]),
|
||||
]
|
||||
|
||||
vector_results = (
|
||||
table.search([0.0, 1.0], vector_column_name="image.embedding")
|
||||
.limit(1)
|
||||
.to_list()
|
||||
)
|
||||
assert len(vector_results) == 1
|
||||
assert vector_results[0]["metadata"]["user_id"] == 0
|
||||
|
||||
filtered_results = table.search().where("metadata.user_id = 42").limit(1).to_list()
|
||||
assert len(filtered_results) == 1
|
||||
assert filtered_results[0]["metadata"]["user_id"] == 42
|
||||
|
||||
|
||||
def test_empty_query(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
"my_table",
|
||||
|
||||
@@ -395,12 +395,17 @@ impl Connection {
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::CreateNamespaceRequest;
|
||||
// Mode is now a string field
|
||||
let mode_str = mode.and_then(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Some("Create".to_string()),
|
||||
"exist_ok" => Some("ExistOk".to_string()),
|
||||
"overwrite" => Some("Overwrite".to_string()),
|
||||
_ => None,
|
||||
});
|
||||
let mode_str = mode
|
||||
.map(|m| match m.to_lowercase().as_str() {
|
||||
"create" => Ok("Create".to_string()),
|
||||
"exist_ok" => Ok("ExistOk".to_string()),
|
||||
"overwrite" => Ok("Overwrite".to_string()),
|
||||
_ => Err(PyValueError::new_err(format!(
|
||||
"Invalid mode {:?}: expected one of 'create', 'exist_ok', 'overwrite'",
|
||||
m
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let request = CreateNamespaceRequest {
|
||||
id: Some(namespace_path),
|
||||
mode: mode_str,
|
||||
@@ -428,16 +433,26 @@ impl Connection {
|
||||
future_into_py(py, async move {
|
||||
use lance_namespace::models::DropNamespaceRequest;
|
||||
// Mode and Behavior are now string fields
|
||||
let mode_str = mode.and_then(|m| match m.to_uppercase().as_str() {
|
||||
"SKIP" => Some("Skip".to_string()),
|
||||
"FAIL" => Some("Fail".to_string()),
|
||||
_ => None,
|
||||
});
|
||||
let behavior_str = behavior.and_then(|b| match b.to_uppercase().as_str() {
|
||||
"RESTRICT" => Some("Restrict".to_string()),
|
||||
"CASCADE" => Some("Cascade".to_string()),
|
||||
_ => None,
|
||||
});
|
||||
let mode_str = mode
|
||||
.map(|m| match m.to_uppercase().as_str() {
|
||||
"SKIP" => Ok("Skip".to_string()),
|
||||
"FAIL" => Ok("Fail".to_string()),
|
||||
_ => Err(PyValueError::new_err(format!(
|
||||
"Invalid mode {:?}: expected one of 'skip', 'fail'",
|
||||
m
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let behavior_str = behavior
|
||||
.map(|b| match b.to_uppercase().as_str() {
|
||||
"RESTRICT" => Ok("Restrict".to_string()),
|
||||
"CASCADE" => Ok("Cascade".to_string()),
|
||||
_ => Err(PyValueError::new_err(format!(
|
||||
"Invalid behavior {:?}: expected one of 'restrict', 'cascade'",
|
||||
b
|
||||
))),
|
||||
})
|
||||
.transpose()?;
|
||||
let request = DropNamespaceRequest {
|
||||
id: Some(namespace_path),
|
||||
mode: mode_str,
|
||||
|
||||
@@ -8,7 +8,9 @@
|
||||
//! DataFusion [`Expr`] nodes, bypassing SQL string parsing.
|
||||
|
||||
use arrow::{datatypes::DataType, pyarrow::PyArrowType};
|
||||
use datafusion_common::ScalarValue;
|
||||
use lancedb::expr::{DfExpr, col as ldb_col, contains, expr_cast, lit as df_lit, lower, upper};
|
||||
use pyo3::types::PyBytes;
|
||||
use pyo3::{Bound, PyAny, PyResult, exceptions::PyValueError, prelude::*, pyfunction};
|
||||
|
||||
/// A type-safe DataFusion expression.
|
||||
@@ -141,7 +143,7 @@ pub fn expr_col(name: &str) -> PyExpr {
|
||||
|
||||
/// Create a literal value expression.
|
||||
///
|
||||
/// Supported Python types: `bool`, `int`, `float`, `str`.
|
||||
/// Supported Python types: `bool`, `int`, `float`, `str`, `bytes`.
|
||||
#[pyfunction]
|
||||
pub fn expr_lit(value: Bound<'_, PyAny>) -> PyResult<PyExpr> {
|
||||
// bool must be checked before int because bool is a subclass of int in Python
|
||||
@@ -157,8 +159,12 @@ pub fn expr_lit(value: Bound<'_, PyAny>) -> PyResult<PyExpr> {
|
||||
if let Ok(s) = value.extract::<String>() {
|
||||
return Ok(PyExpr(df_lit(s)));
|
||||
}
|
||||
if value.is_instance_of::<PyBytes>() {
|
||||
let bytes = value.extract::<Vec<u8>>()?;
|
||||
return Ok(PyExpr(df_lit(ScalarValue::Binary(Some(bytes)))));
|
||||
}
|
||||
Err(PyValueError::new_err(format!(
|
||||
"unsupported literal type: {}. Supported: bool, int, float, str",
|
||||
"unsupported literal type: {}. Supported: bool, int, float, str, bytes",
|
||||
value.get_type().name()?
|
||||
)))
|
||||
}
|
||||
|
||||
@@ -15,8 +15,8 @@ use pyo3::{
|
||||
use query::{FTSQuery, HybridQuery, Query, VectorQuery};
|
||||
use session::Session;
|
||||
use table::{
|
||||
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, MergeResult,
|
||||
Table, UpdateResult,
|
||||
AddColumnsResult, AddResult, AlterColumnsResult, DeleteResult, DropColumnsResult, LsmWriteSpec,
|
||||
MergeResult, Table, UpdateResult,
|
||||
};
|
||||
|
||||
pub mod arrow;
|
||||
@@ -52,6 +52,7 @@ pub fn _lancedb(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<AlterColumnsResult>()?;
|
||||
m.add_class::<AddResult>()?;
|
||||
m.add_class::<MergeResult>()?;
|
||||
m.add_class::<LsmWriteSpec>()?;
|
||||
m.add_class::<DeleteResult>()?;
|
||||
m.add_class::<DropColumnsResult>()?;
|
||||
m.add_class::<UpdateResult>()?;
|
||||
|
||||
@@ -23,7 +23,7 @@ use lancedb::query::QueryBase;
|
||||
use lancedb::query::QueryExecutionOptions;
|
||||
use lancedb::query::QueryFilter;
|
||||
use lancedb::query::{
|
||||
ExecutableQuery, Query as LanceDbQuery, Select, TakeQuery as LanceDbTakeQuery,
|
||||
ColumnOrdering, ExecutableQuery, Query as LanceDbQuery, Select, TakeQuery as LanceDbTakeQuery,
|
||||
VectorQuery as LanceDbVectorQuery,
|
||||
};
|
||||
use lancedb::table::AnyQuery;
|
||||
@@ -207,6 +207,48 @@ impl<'py> IntoPyObject<'py> for PyLanceDB<FtsQuery> {
|
||||
#[derive(Clone)]
|
||||
pub struct PyQueryVectors(Vec<Arc<dyn Array>>);
|
||||
|
||||
#[derive(Clone, FromPyObject)]
|
||||
#[pyo3(from_item_all)]
|
||||
pub struct PyColumnOrdering {
|
||||
pub column_name: String,
|
||||
pub ascending: bool,
|
||||
pub nulls_first: bool,
|
||||
}
|
||||
|
||||
impl From<ColumnOrdering> for PyColumnOrdering {
|
||||
fn from(ordering: ColumnOrdering) -> Self {
|
||||
Self {
|
||||
column_name: ordering.column_name,
|
||||
ascending: ordering.ascending,
|
||||
nulls_first: ordering.nulls_first,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyColumnOrdering> for ColumnOrdering {
|
||||
fn from(ordering: PyColumnOrdering) -> Self {
|
||||
Self {
|
||||
column_name: ordering.column_name,
|
||||
ascending: ordering.ascending,
|
||||
nulls_first: ordering.nulls_first,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'py> IntoPyObject<'py> for PyColumnOrdering {
|
||||
type Target = PyDict;
|
||||
type Output = Bound<'py, Self::Target>;
|
||||
type Error = PyErr;
|
||||
|
||||
fn into_pyobject(self, py: pyo3::Python<'py>) -> PyResult<Self::Output> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("column_name", self.column_name)?;
|
||||
dict.set_item("ascending", self.ascending)?;
|
||||
dict.set_item("nulls_first", self.nulls_first)?;
|
||||
Ok(dict)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'py> IntoPyObject<'py> for PyQueryVectors {
|
||||
type Target = PyList;
|
||||
type Output = Bound<'py, Self::Target>;
|
||||
@@ -246,6 +288,7 @@ pub struct PyQueryRequest {
|
||||
pub bypass_vector_index: Option<bool>,
|
||||
pub postfilter: Option<bool>,
|
||||
pub norm: Option<String>,
|
||||
pub order_by: Option<Vec<PyColumnOrdering>>,
|
||||
}
|
||||
|
||||
impl From<AnyQuery> for PyQueryRequest {
|
||||
@@ -273,6 +316,9 @@ impl From<AnyQuery> for PyQueryRequest {
|
||||
bypass_vector_index: None,
|
||||
postfilter: None,
|
||||
norm: None,
|
||||
order_by: query_request
|
||||
.order_by
|
||||
.map(|order_by| order_by.into_iter().map(PyColumnOrdering::from).collect()),
|
||||
},
|
||||
AnyQuery::VectorQuery(vector_query) => Self {
|
||||
limit: vector_query.base.limit,
|
||||
@@ -297,6 +343,10 @@ impl From<AnyQuery> for PyQueryRequest {
|
||||
bypass_vector_index: Some(!vector_query.use_index),
|
||||
postfilter: Some(!vector_query.base.prefilter),
|
||||
norm: vector_query.base.norm.map(|n| n.to_string()),
|
||||
order_by: vector_query
|
||||
.base
|
||||
.order_by
|
||||
.map(|order_by| order_by.into_iter().map(PyColumnOrdering::from).collect()),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -475,6 +525,13 @@ impl Query {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<PyColumnOrdering>>) -> PyResult<()> {
|
||||
let ordering =
|
||||
ordering.map(|ordering| ordering.into_iter().map(ColumnOrdering::from).collect());
|
||||
self.inner = self.inner.clone().order_by(ordering);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[pyo3(signature = ())]
|
||||
pub fn output_schema(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner.clone();
|
||||
@@ -647,6 +704,13 @@ impl FTSQuery {
|
||||
self.inner = self.inner.clone().offset(offset as usize);
|
||||
}
|
||||
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<PyColumnOrdering>>) -> PyResult<()> {
|
||||
let ordering =
|
||||
ordering.map(|ordering| ordering.into_iter().map(ColumnOrdering::from).collect());
|
||||
self.inner = self.inner.clone().order_by(ordering);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner = self.inner.clone().fast_search();
|
||||
}
|
||||
@@ -782,6 +846,13 @@ impl VectorQuery {
|
||||
self.inner = self.inner.clone().offset(offset as usize);
|
||||
}
|
||||
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<PyColumnOrdering>>) -> PyResult<()> {
|
||||
let ordering =
|
||||
ordering.map(|ordering| ordering.into_iter().map(ColumnOrdering::from).collect());
|
||||
self.inner = self.inner.clone().order_by(ordering);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner = self.inner.clone().fast_search();
|
||||
}
|
||||
@@ -954,6 +1025,12 @@ impl HybridQuery {
|
||||
self.inner_fts.offset(offset);
|
||||
}
|
||||
|
||||
pub fn order_by(&mut self, ordering: Option<Vec<PyColumnOrdering>>) -> PyResult<()> {
|
||||
self.inner_vec.order_by(ordering.clone())?;
|
||||
self.inner_fts.order_by(ordering)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fast_search(&mut self) {
|
||||
self.inner_vec.fast_search();
|
||||
self.inner_fts.fast_search();
|
||||
|
||||
@@ -171,6 +171,141 @@ impl From<lancedb::table::MergeResult> for MergeResult {
|
||||
}
|
||||
}
|
||||
|
||||
/// Specification selecting Lance's MemWAL LSM-style write path for
|
||||
/// `merge_insert`.
|
||||
///
|
||||
/// Constructed via the `bucket(...)`, `identity(...)`, or `unsharded()`
|
||||
/// classmethods, then optionally chain `with_maintained_indexes(...)` and
|
||||
/// `with_writer_config_defaults(...)`.
|
||||
#[pyclass(from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LsmWriteSpec {
|
||||
inner: lancedb::table::LsmWriteSpec,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl LsmWriteSpec {
|
||||
/// Hash-bucket sharding by the unenforced primary key column.
|
||||
#[staticmethod]
|
||||
pub fn bucket(column: String, num_buckets: u32) -> Self {
|
||||
Self {
|
||||
inner: lancedb::table::LsmWriteSpec::bucket(column, num_buckets),
|
||||
}
|
||||
}
|
||||
|
||||
/// Identity sharding — shard by the raw value of `column`.
|
||||
#[staticmethod]
|
||||
pub fn identity(column: String) -> Self {
|
||||
Self {
|
||||
inner: lancedb::table::LsmWriteSpec::identity(column),
|
||||
}
|
||||
}
|
||||
|
||||
/// No sharding — every `merge_insert` call writes to a single
|
||||
/// MemWAL shard.
|
||||
#[staticmethod]
|
||||
pub fn unsharded() -> Self {
|
||||
Self {
|
||||
inner: lancedb::table::LsmWriteSpec::unsharded(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace the list of indexes the MemWAL should keep up to date as
|
||||
/// rows are appended. Each name must reference an index that
|
||||
/// already exists on the table at the time `set_lsm_write_spec`
|
||||
/// is called.
|
||||
pub fn with_maintained_indexes(&self, indexes: Vec<String>) -> Self {
|
||||
Self {
|
||||
inner: self.inner.clone().with_maintained_indexes(indexes),
|
||||
}
|
||||
}
|
||||
|
||||
/// Replace the default `ShardWriter` configuration recorded in the
|
||||
/// MemWAL index, so every writer starts from the same defaults.
|
||||
pub fn with_writer_config_defaults(&self, defaults: HashMap<String, String>) -> Self {
|
||||
Self {
|
||||
inner: self.inner.clone().with_writer_config_defaults(defaults),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn __repr__(&self) -> String {
|
||||
match &self.inner {
|
||||
lancedb::table::LsmWriteSpec::Bucket {
|
||||
column,
|
||||
num_buckets,
|
||||
maintained_indexes,
|
||||
writer_config_defaults,
|
||||
} => format!(
|
||||
"LsmWriteSpec.bucket(column={:?}, num_buckets={}, maintained_indexes={:?}, writer_config_defaults={:?})",
|
||||
column, num_buckets, maintained_indexes, writer_config_defaults,
|
||||
),
|
||||
lancedb::table::LsmWriteSpec::Identity {
|
||||
column,
|
||||
maintained_indexes,
|
||||
writer_config_defaults,
|
||||
} => format!(
|
||||
"LsmWriteSpec.identity(column={:?}, maintained_indexes={:?}, writer_config_defaults={:?})",
|
||||
column, maintained_indexes, writer_config_defaults,
|
||||
),
|
||||
lancedb::table::LsmWriteSpec::Unsharded {
|
||||
maintained_indexes,
|
||||
writer_config_defaults,
|
||||
} => format!(
|
||||
"LsmWriteSpec.unsharded(maintained_indexes={:?}, writer_config_defaults={:?})",
|
||||
maintained_indexes, writer_config_defaults,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
/// Discriminator string identifying the variant ("bucket", "identity",
|
||||
/// or "unsharded").
|
||||
#[getter]
|
||||
pub fn spec_type(&self) -> &'static str {
|
||||
match &self.inner {
|
||||
lancedb::table::LsmWriteSpec::Bucket { .. } => "bucket",
|
||||
lancedb::table::LsmWriteSpec::Identity { .. } => "identity",
|
||||
lancedb::table::LsmWriteSpec::Unsharded { .. } => "unsharded",
|
||||
}
|
||||
}
|
||||
|
||||
/// Bucket and identity variants: the sharding column. `None` for unsharded.
|
||||
#[getter]
|
||||
pub fn column(&self) -> Option<String> {
|
||||
match &self.inner {
|
||||
lancedb::table::LsmWriteSpec::Bucket { column, .. }
|
||||
| lancedb::table::LsmWriteSpec::Identity { column, .. } => Some(column.clone()),
|
||||
lancedb::table::LsmWriteSpec::Unsharded { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Bucket variant only: the number of buckets.
|
||||
#[getter]
|
||||
pub fn num_buckets(&self) -> Option<u32> {
|
||||
match &self.inner {
|
||||
lancedb::table::LsmWriteSpec::Bucket { num_buckets, .. } => Some(*num_buckets),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Names of indexes the MemWAL should keep up to date during writes.
|
||||
#[getter]
|
||||
pub fn maintained_indexes(&self) -> Vec<String> {
|
||||
self.inner.maintained_indexes().to_vec()
|
||||
}
|
||||
|
||||
/// Default `ShardWriter` configuration recorded by this spec.
|
||||
#[getter]
|
||||
pub fn writer_config_defaults(&self) -> HashMap<String, String> {
|
||||
self.inner.writer_config_defaults().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LsmWriteSpec> for lancedb::table::LsmWriteSpec {
|
||||
fn from(spec: LsmWriteSpec) -> Self {
|
||||
spec.inner
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(get_all, from_py_object)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AddColumnsResult {
|
||||
@@ -805,6 +940,37 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_unenforced_primary_key<'a>(
|
||||
self_: PyRef<'a, Self>,
|
||||
columns: Vec<String>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner
|
||||
.set_unenforced_primary_key(columns)
|
||||
.await
|
||||
.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn set_lsm_write_spec<'a>(
|
||||
self_: PyRef<'a, Self>,
|
||||
spec: LsmWriteSpec,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
let native_spec = lancedb::table::LsmWriteSpec::from(spec);
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.set_lsm_write_spec(native_spec).await.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn unset_lsm_write_spec(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.unset_lsm_write_spec().await.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn uses_v2_manifest_paths(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
|
||||
@@ -33,6 +33,14 @@ class TestExprConstruction:
|
||||
e = lit(True)
|
||||
assert isinstance(e, Expr)
|
||||
|
||||
def test_lit_bytes(self):
|
||||
e = lit(b"\xde\xad\xbe\xef")
|
||||
assert isinstance(e, Expr)
|
||||
|
||||
def test_lit_bytes_empty(self):
|
||||
e = lit(b"")
|
||||
assert isinstance(e, Expr)
|
||||
|
||||
def test_lit_unsupported_type_raises(self):
|
||||
with pytest.raises(Exception):
|
||||
lit([1, 2, 3])
|
||||
@@ -135,6 +143,43 @@ class TestExprOperators:
|
||||
assert e.to_sql() == "(name = 'alice')"
|
||||
|
||||
|
||||
class TestExprBytesLiteral:
|
||||
def test_bytes_to_sql(self):
|
||||
e = lit(b"\xde\xad\xbe\xef")
|
||||
assert e.to_sql() == "X'DEADBEEF'"
|
||||
|
||||
def test_empty_bytes_to_sql(self):
|
||||
e = lit(b"")
|
||||
assert e.to_sql() == "X''"
|
||||
|
||||
def test_bytes_repr(self):
|
||||
e = lit(b"\x01\x02")
|
||||
assert repr(e) == "Expr(X'0102')"
|
||||
|
||||
def test_bytes_equality_expr_sql(self):
|
||||
e = col("data") == lit(b"\xca\xfe")
|
||||
assert e.to_sql() == "(data = X'CAFE')"
|
||||
|
||||
def test_bytes_ne_expr_sql(self):
|
||||
e = col("data") != lit(b"\xff")
|
||||
assert e.to_sql() == "(data <> X'FF')"
|
||||
|
||||
def test_bytes_compound_expr_sql(self):
|
||||
e = (col("data") == lit(b"\x01")) & (col("id") > lit(5))
|
||||
assert e.to_sql() == "((data = X'01') AND (id > 5))"
|
||||
|
||||
def test_bytes_in_function_call(self):
|
||||
# Regression test: binary literals inside scalar function calls
|
||||
# used to fail because DataFusion's unparser does not support Binary
|
||||
# scalars. Now handled via a placeholder-substitution rewrite.
|
||||
e = func("contains", col("data"), lit(b"\xff"))
|
||||
assert e.to_sql() == "contains(data, X'FF')"
|
||||
|
||||
def test_bytes_in_not(self):
|
||||
e = ~(col("data") == lit(b"\xff"))
|
||||
assert e.to_sql() == "NOT (data = X'FF')"
|
||||
|
||||
|
||||
class TestExprStringMethods:
|
||||
def test_lower(self):
|
||||
e = col("name").lower()
|
||||
@@ -385,3 +430,44 @@ class TestColNamingIntegration:
|
||||
)
|
||||
assert "upper_name" in result.schema.names
|
||||
assert sorted(result["upper_name"].to_pylist()) == ["ALICE", "BOB", "CHARLIE"]
|
||||
|
||||
|
||||
# ── bytes / binary column integration tests ───────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def binary_table(tmp_path):
|
||||
db = lancedb.connect(str(tmp_path))
|
||||
data = pa.table(
|
||||
{
|
||||
"id": [1, 2, 3],
|
||||
"payload": pa.array(
|
||||
[b"\x01\x02", b"\xca\xfe", b"\xff\x00"],
|
||||
type=pa.binary(),
|
||||
),
|
||||
}
|
||||
)
|
||||
return db.create_table("binary_test", data)
|
||||
|
||||
|
||||
class TestExprBytesIntegration:
|
||||
def test_binary_equality_filter(self, binary_table):
|
||||
result = (
|
||||
binary_table.search().where(col("payload") == lit(b"\xca\xfe")).to_arrow()
|
||||
)
|
||||
assert result.num_rows == 1
|
||||
assert result["id"][0].as_py() == 2
|
||||
|
||||
def test_binary_ne_filter(self, binary_table):
|
||||
result = (
|
||||
binary_table.search().where(col("payload") != lit(b"\x01\x02")).to_arrow()
|
||||
)
|
||||
assert result.num_rows == 2
|
||||
|
||||
def test_binary_compound_filter(self, binary_table):
|
||||
result = (
|
||||
binary_table.search()
|
||||
.where((col("payload") == lit(b"\x01\x02")) | (col("id") == lit(3)))
|
||||
.to_arrow()
|
||||
)
|
||||
assert result.num_rows == 2
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.28.0-beta.11"
|
||||
version = "0.29.1-beta.0"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user