mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-23 05:19:58 +00:00
Compare commits
149 Commits
myriel/doc
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f7b24d1a9 | ||
|
|
f9540724b7 | ||
|
|
aeac9c7644 | ||
|
|
6ddd271627 | ||
|
|
f0d7520bdf | ||
|
|
7ef8bafd51 | ||
|
|
aed4a7c98e | ||
|
|
273ba18426 | ||
|
|
8b94308cf2 | ||
|
|
0b7b27481e | ||
|
|
e1f9b011f8 | ||
|
|
d664b8739f | ||
|
|
20bec61ecb | ||
|
|
45255be42c | ||
|
|
93c2cf2f59 | ||
|
|
9d29c83f81 | ||
|
|
2a6143b5bd | ||
|
|
b2242886e0 | ||
|
|
199904ab35 | ||
|
|
1fa888615f | ||
|
|
40967f3baa | ||
|
|
0bfc7de32c | ||
|
|
d43880a585 | ||
|
|
59a886958b | ||
|
|
c36f6746d1 | ||
|
|
25ce6d311f | ||
|
|
92a4e46f9f | ||
|
|
845641c480 | ||
|
|
d96404c635 | ||
|
|
02d31ee412 | ||
|
|
308623577d | ||
|
|
8ee3ae378f | ||
|
|
3372a2aae0 | ||
|
|
4cfcd95320 | ||
|
|
a70ff04bc9 | ||
|
|
a9daa18be9 | ||
|
|
3f2e3986e9 | ||
|
|
bf55feb9b6 | ||
|
|
8f8e06a2da | ||
|
|
03eab0f091 | ||
|
|
143184c0ae | ||
|
|
dadb042978 | ||
|
|
5a19cf15a6 | ||
|
|
3dcec724b7 | ||
|
|
86a6bb9fcb | ||
|
|
b59d1007d3 | ||
|
|
56a16b1728 | ||
|
|
b7afed9beb | ||
|
|
5cbbaa2e4a | ||
|
|
1b6bd2498e | ||
|
|
285da9db1d | ||
|
|
ad8306c96b | ||
|
|
3594538509 | ||
|
|
917aabd077 | ||
|
|
5ec12c9971 | ||
|
|
d0ce489b21 | ||
|
|
d7e02c8181 | ||
|
|
70958f6366 | ||
|
|
1ac745eb18 | ||
|
|
1357fe8aa1 | ||
|
|
0d78929893 | ||
|
|
9e2a68541e | ||
|
|
1aa0fd16e7 | ||
|
|
fec2a05629 | ||
|
|
79a1cd60ee | ||
|
|
88807a59a4 | ||
|
|
e0e7e01ea8 | ||
|
|
a416ebc11d | ||
|
|
f941054baf | ||
|
|
1a81c46505 | ||
|
|
82b25a71e9 | ||
|
|
13c613d45f | ||
|
|
e07389a36c | ||
|
|
e7e9e80b1d | ||
|
|
247fb58400 | ||
|
|
504bdc471c | ||
|
|
d617cdef4a | ||
|
|
356d7046fd | ||
|
|
48e5caabda | ||
|
|
d6cc68f671 | ||
|
|
55eacfa685 | ||
|
|
222e3264ab | ||
|
|
13505026cb | ||
|
|
b0800b4b71 | ||
|
|
1befebf614 | ||
|
|
1ab60fae7f | ||
|
|
e921c90c1b | ||
|
|
05a4ea646a | ||
|
|
ebbeeff4e0 | ||
|
|
407ca53f92 | ||
|
|
ff71d7e552 | ||
|
|
2261eb95a0 | ||
|
|
5b397e410b | ||
|
|
b5a39bffec | ||
|
|
5e1e9add07 | ||
|
|
97e9938dfe | ||
|
|
1d4b92e01e | ||
|
|
4c9fc3044b | ||
|
|
0ebc8d45a8 | ||
|
|
f7d78c3420 | ||
|
|
6ea6884260 | ||
|
|
b1d791a299 | ||
|
|
8da74dcb37 | ||
|
|
3c7419b392 | ||
|
|
e612686fdb | ||
|
|
e77d57a5b6 | ||
|
|
9391ad1450 | ||
|
|
79960b254e | ||
|
|
d19c64e29b | ||
|
|
06d5612443 | ||
|
|
45f96f4151 | ||
|
|
f744b785f8 | ||
|
|
2e3f745820 | ||
|
|
683aaed716 | ||
|
|
48f7b20daa | ||
|
|
4dd399ca29 | ||
|
|
e6f1da31dc | ||
|
|
a9ea785b15 | ||
|
|
cc38453391 | ||
|
|
47747287b6 | ||
|
|
0847e666a0 | ||
|
|
981f8427e6 | ||
|
|
f6846004ca | ||
|
|
faf8973624 | ||
|
|
fabe37274f | ||
|
|
6839ac3509 | ||
|
|
b88422e515 | ||
|
|
8d60685ede | ||
|
|
04285a4a4e | ||
|
|
d4a41b5663 | ||
|
|
adc3daa462 | ||
|
|
acbfa6c012 | ||
|
|
d602e9f98c | ||
|
|
ad09234d59 | ||
|
|
0c34ffb252 | ||
|
|
d9f333d828 | ||
|
|
bb809abd4b | ||
|
|
c87530f7a3 | ||
|
|
1eb1beecd6 | ||
|
|
ce550e6c45 | ||
|
|
d3bae1f3a3 | ||
|
|
dcf53c4506 | ||
|
|
941eada703 | ||
|
|
ed640a76d9 | ||
|
|
296205ef96 | ||
|
|
16beaaa656 | ||
|
|
4ff87b1f4a | ||
|
|
0532ef2358 | ||
|
|
dcf7334c1f |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.21.2"
|
||||
current_version = "0.22.3-beta.5"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
45
.github/actions/create-failure-issue/action.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
name: Create Failure Issue
|
||||
description: Creates a GitHub issue if any jobs in the workflow failed
|
||||
|
||||
inputs:
|
||||
job-results:
|
||||
description: 'JSON string of job results from needs context'
|
||||
required: true
|
||||
workflow-name:
|
||||
description: 'Name of the workflow'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Check for failures and create issue
|
||||
shell: bash
|
||||
env:
|
||||
JOB_RESULTS: ${{ inputs.job-results }}
|
||||
WORKFLOW_NAME: ${{ inputs.workflow-name }}
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: |
|
||||
# Check if any job failed
|
||||
if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
|
||||
echo "Detected job failures, creating issue..."
|
||||
|
||||
# Extract failed job names
|
||||
FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
|
||||
|
||||
# Create issue with workflow name, failed jobs, and run URL
|
||||
gh issue create \
|
||||
--title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
|
||||
--body "The workflow **$WORKFLOW_NAME** failed during execution.
|
||||
|
||||
**Failed jobs:** $FAILED_JOBS
|
||||
|
||||
**Run URL:** $RUN_URL
|
||||
|
||||
Please investigate the failed jobs and address any issues." \
|
||||
--label "ci"
|
||||
|
||||
echo "Issue created successfully"
|
||||
else
|
||||
echo "No job failures detected, skipping issue creation"
|
||||
fi
|
||||
14
.github/workflows/cargo-publish.yml
vendored
14
.github/workflows/cargo-publish.yml
vendored
@@ -38,3 +38,17 @@ jobs:
|
||||
- name: Publish the package
|
||||
run: |
|
||||
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
107
.github/workflows/codex-update-lance-dependency.yml
vendored
Normal file
107
.github/workflows/codex-update-lance-dependency.yml
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
name: Codex Update Lance Dependency
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Tag name from Lance"
|
||||
required: true
|
||||
type: string
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Tag name from Lance"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
actions: read
|
||||
|
||||
jobs:
|
||||
update:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Show inputs
|
||||
run: |
|
||||
echo "tag = ${{ inputs.tag }}"
|
||||
|
||||
- name: Checkout Repo LanceDB
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: true
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Install Codex CLI
|
||||
run: npm install -g @openai/codex
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: stable
|
||||
components: clippy, rustfmt
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y protobuf-compiler libssl-dev
|
||||
|
||||
- name: Install cargo-info
|
||||
run: cargo install cargo-info
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: python3 -m pip install --upgrade pip packaging
|
||||
|
||||
- name: Configure git user
|
||||
run: |
|
||||
git config user.name "lancedb automation"
|
||||
git config user.email "robot@lancedb.com"
|
||||
|
||||
- name: Configure Codex authentication
|
||||
env:
|
||||
CODEX_TOKEN_B64: ${{ secrets.CODEX_TOKEN }}
|
||||
run: |
|
||||
if [ -z "${CODEX_TOKEN_B64}" ]; then
|
||||
echo "Repository secret CODEX_TOKEN is not defined; skipping Codex execution."
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p ~/.codex
|
||||
echo "${CODEX_TOKEN_B64}" | base64 --decode > ~/.codex/auth.json
|
||||
|
||||
- name: Run Codex to update Lance dependency
|
||||
env:
|
||||
TAG: ${{ inputs.tag }}
|
||||
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
VERSION="${TAG#refs/tags/}"
|
||||
VERSION="${VERSION#v}"
|
||||
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
|
||||
|
||||
Follow these steps exactly:
|
||||
1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
|
||||
2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
|
||||
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
|
||||
7. Push the branch to origin. If the branch already exists, force-push your changes.
|
||||
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
|
||||
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
|
||||
Constraints:
|
||||
- Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
|
||||
- Do not merge the PR.
|
||||
- If any command fails, diagnose and fix the issue instead of aborting.
|
||||
EOF
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
14
.github/workflows/docs.yml
vendored
14
.github/workflows/docs.yml
vendored
@@ -56,22 +56,12 @@ jobs:
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
cache-dependency-path: docs/package-lock.json
|
||||
- name: Install node dependencies
|
||||
working-directory: node
|
||||
working-directory: nodejs
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Build node
|
||||
working-directory: node
|
||||
run: |
|
||||
npm ci
|
||||
npm run build
|
||||
npm run tsc
|
||||
- name: Create markdown files
|
||||
working-directory: node
|
||||
run: |
|
||||
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
|
||||
- name: Build docs
|
||||
working-directory: docs
|
||||
run: |
|
||||
|
||||
51
.github/workflows/docs_test.yml
vendored
51
.github/workflows/docs_test.yml
vendored
@@ -24,7 +24,8 @@ env:
|
||||
jobs:
|
||||
test-python:
|
||||
name: Test doc python code
|
||||
runs-on: ubuntu-24.04
|
||||
runs-on: warp-ubuntu-2204-x64-8x
|
||||
timeout-minutes: 60
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
@@ -58,51 +59,3 @@ jobs:
|
||||
run: |
|
||||
cd docs/test/python
|
||||
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
|
||||
test-node:
|
||||
name: Test doc nodejs code
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 60
|
||||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Print CPU capabilities
|
||||
run: cat /proc/cpuinfo
|
||||
- name: Set up Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
- name: Install protobuf
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
- name: Install dependecies needed for ubuntu
|
||||
run: |
|
||||
sudo apt install -y libssl-dev
|
||||
rustup update && rustup default
|
||||
- name: Rust cache
|
||||
uses: swatinem/rust-cache@v2
|
||||
- name: Install node dependencies
|
||||
run: |
|
||||
sudo swapoff -a
|
||||
sudo fallocate -l 8G /swapfile
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
sudo swapon --show
|
||||
cd node
|
||||
npm ci
|
||||
npm run build-release
|
||||
cd ../docs
|
||||
npm install
|
||||
- name: Test
|
||||
env:
|
||||
LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
|
||||
LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
|
||||
run: |
|
||||
cd docs
|
||||
npm t
|
||||
|
||||
15
.github/workflows/java-publish.yml
vendored
15
.github/workflows/java-publish.yml
vendored
@@ -43,7 +43,6 @@ jobs:
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
toolchain: "1.81.0"
|
||||
cache-workspaces: "./java/core/lancedb-jni"
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
@@ -112,3 +111,17 @@ jobs:
|
||||
env:
|
||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [linux-arm64, linux-x86, macos-arm64]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
7
.github/workflows/nodejs.yml
vendored
7
.github/workflows/nodejs.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- nodejs/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
@@ -79,7 +80,7 @@ jobs:
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
@@ -116,7 +117,7 @@ jobs:
|
||||
set -e
|
||||
npm ci
|
||||
npm run docs
|
||||
if ! git diff --exit-code -- . ':(exclude)Cargo.lock'; then
|
||||
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
@@ -137,7 +138,7 @@ jobs:
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: node/package-lock.json
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
|
||||
14
.github/workflows/npm-publish.yml
vendored
14
.github/workflows/npm-publish.yml
vendored
@@ -365,3 +365,17 @@ jobs:
|
||||
ARGS="$ARGS --tag preview"
|
||||
fi
|
||||
npm publish $ARGS
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-lancedb, test-lancedb, publish]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
18
.github/workflows/pypi-publish.yml
vendored
18
.github/workflows/pypi-publish.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
mac:
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 90
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
- target: x86_64-apple-darwin
|
||||
runner: macos-13
|
||||
- target: aarch64-apple-darwin
|
||||
runner: macos-14
|
||||
runner: warp-macos-14-arm64-6x
|
||||
env:
|
||||
MACOSX_DEPLOYMENT_TARGET: 10.15
|
||||
steps:
|
||||
@@ -173,3 +173,17 @@ jobs:
|
||||
generate_release_notes: false
|
||||
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
|
||||
body: ${{ steps.python_release_notes.outputs.changelog }}
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [linux, mac, windows]
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
with:
|
||||
job-results: ${{ toJSON(needs) }}
|
||||
workflow-name: ${{ github.workflow }}
|
||||
|
||||
1
.github/workflows/python.yml
vendored
1
.github/workflows/python.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
- main
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- python/**
|
||||
- .github/workflows/python.yml
|
||||
|
||||
|
||||
13
.github/workflows/rust.yml
vendored
13
.github/workflows/rust.yml
vendored
@@ -96,6 +96,7 @@ jobs:
|
||||
# Need up-to-date compilers for kernels
|
||||
CC: clang-18
|
||||
CXX: clang++-18
|
||||
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -117,15 +118,17 @@ jobs:
|
||||
sudo chmod 600 /swapfile
|
||||
sudo mkswap /swapfile
|
||||
sudo swapon /swapfile
|
||||
- name: Start S3 integration test environment
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Build
|
||||
run: cargo build --all-features --tests --locked --examples
|
||||
- name: Run tests
|
||||
run: cargo test --all-features --locked
|
||||
- name: Run feature tests
|
||||
run: make -C ./lancedb feature-tests
|
||||
- name: Run examples
|
||||
run: cargo run --example simple --locked
|
||||
- name: Run remote tests
|
||||
# Running this requires access to secrets, so skip if this is
|
||||
# a PR from a fork.
|
||||
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
|
||||
run: make -C ./lancedb remote-tests
|
||||
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
|
||||
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
26
.github/workflows/trigger-vectordb-recipes.yml
vendored
@@ -1,26 +0,0 @@
|
||||
name: Trigger vectordb-recipers workflow
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/trigger-vectordb-recipes.yml
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Trigger vectordb-recipes workflow
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
|
||||
script: |
|
||||
const result = await github.rest.actions.createWorkflowDispatch({
|
||||
owner: 'lancedb',
|
||||
repo: 'vectordb-recipes',
|
||||
workflow_id: 'examples-test.yml',
|
||||
ref: 'main'
|
||||
});
|
||||
console.log(result);
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -31,9 +31,6 @@ python/dist
|
||||
*.node
|
||||
**/node_modules
|
||||
**/.DS_Store
|
||||
node/dist
|
||||
node/examples/**/package-lock.json
|
||||
node/examples/**/dist
|
||||
nodejs/lancedb/native*
|
||||
dist
|
||||
|
||||
|
||||
101
AGENTS.md
Normal file
101
AGENTS.md
Normal file
@@ -0,0 +1,101 @@
|
||||
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
|
||||
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
|
||||
remote (against LanceDB Cloud).
|
||||
|
||||
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
|
||||
|
||||
Project layout:
|
||||
|
||||
* `rust/lancedb`: The LanceDB core Rust implementation.
|
||||
* `python`: The Python bindings, using PyO3.
|
||||
* `nodejs`: The Typescript bindings, using napi-rs
|
||||
* `java`: The Java bindings
|
||||
|
||||
Common commands:
|
||||
|
||||
* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
|
||||
* Run tests: `cargo test --quiet --features remote --tests`
|
||||
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
|
||||
* Lint: `cargo clippy --quiet --features remote --tests --examples`
|
||||
* Format: `cargo fmt --all`
|
||||
|
||||
Before committing changes, run formatting.
|
||||
|
||||
## Coding tips
|
||||
|
||||
* When writing Rust doctests for things that require a connection or table reference,
|
||||
write them as a function instead of a fully executable test. This allows type checking
|
||||
to run but avoids needing a full test environment. For example:
|
||||
```rust
|
||||
/// ```
|
||||
/// use lance_index::scalar::FullTextSearchQuery;
|
||||
/// use lancedb::query::{QueryBase, ExecutableQuery};
|
||||
///
|
||||
/// # use lancedb::Table;
|
||||
/// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
|
||||
/// let results = table.query()
|
||||
/// .full_text_search(FullTextSearchQuery::new("hello world".into()))
|
||||
/// .execute()
|
||||
/// .await?;
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
```
|
||||
|
||||
## Example plan: adding a new method on Table
|
||||
|
||||
Adding a new method involves first adding it to the Rust core, then exposing it
|
||||
in the Python and TypeScript bindings. There are both local and remote tables.
|
||||
Remote tables are implemented via a HTTP API and require the `remote` cargo
|
||||
feature flag to be enabled. Python has both sync and async methods.
|
||||
|
||||
Rust core changes:
|
||||
|
||||
1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
|
||||
2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
|
||||
3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
|
||||
* Test with unit test in `rust/lancedb/src/table.rs`.
|
||||
4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
|
||||
* Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
|
||||
|
||||
Python bindings changes:
|
||||
|
||||
1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
|
||||
2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
|
||||
3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
|
||||
4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
|
||||
5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
|
||||
* Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
|
||||
6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
|
||||
7. Add unit test in `python/tests/test_table.py`.
|
||||
|
||||
TypeScript bindings changes:
|
||||
|
||||
1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
|
||||
2. Run `npm run build` to generate TypeScript definitions.
|
||||
3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
|
||||
4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
|
||||
* Note: despite the name, this class is also used for remote tables.
|
||||
5. Add test in `nodejs/__test__/table.test.ts`.
|
||||
6. Run `npm run docs` to generate TypeScript documentation.
|
||||
|
||||
## Review Guidelines
|
||||
|
||||
Please consider the following when reviewing code contributions.
|
||||
|
||||
### Rust API design
|
||||
* Design public APIs so they can be evolved easily in the future without breaking
|
||||
changes. Often this means using builder patterns or options structs instead of
|
||||
long argument lists.
|
||||
* For public APIs, prefer inputs that use `Into<T>` or `AsRef<T>` traits to allow
|
||||
more flexible inputs. For example, use `name: Into<String>` instead of `name: String`,
|
||||
so we don't have to write `func("my_string".to_string())`.
|
||||
|
||||
### Testing
|
||||
* Ensure all new public APIs have documentation and examples.
|
||||
* Ensure that all bugfixes and features have corresponding tests. **We do not merge
|
||||
code without tests.**
|
||||
|
||||
### Documentation
|
||||
* New features must include updates to the rust documentation comments. Link to
|
||||
relevant structs and methods to increase the value of documentation.
|
||||
24
CLAUDE.md
24
CLAUDE.md
@@ -1,24 +0,0 @@
|
||||
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
|
||||
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
|
||||
remote (against LanceDB Cloud).
|
||||
|
||||
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
|
||||
|
||||
Project layout:
|
||||
|
||||
* `rust/lancedb`: The LanceDB core Rust implementation.
|
||||
* `python`: The Python bindings, using PyO3.
|
||||
* `nodejs`: The Typescript bindings, using napi-rs
|
||||
* `java`: The Java bindings
|
||||
|
||||
(`rust/ffi` and `node/` are for a deprecated package. You can ignore them.)
|
||||
|
||||
Common commands:
|
||||
|
||||
* Check for compiler errors: `cargo check --features remote --tests --examples`
|
||||
* Run tests: `cargo test --features remote --tests`
|
||||
* Run specific test: `cargo test --features remote -p <package_name> --test <test_name>`
|
||||
* Lint: `cargo clippy --features remote --tests --examples`
|
||||
* Format: `cargo fmt --all`
|
||||
|
||||
Before committing changes, run formatting.
|
||||
2540
Cargo.lock
generated
2540
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
72
Cargo.toml
72
Cargo.toml
@@ -1,10 +1,5 @@
|
||||
[workspace]
|
||||
members = [
|
||||
"rust/lancedb",
|
||||
"nodejs",
|
||||
"python",
|
||||
"java/core/lancedb-jni",
|
||||
]
|
||||
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
@@ -20,32 +15,37 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=0.32.1", "features" = [
|
||||
"dynamodb",
|
||||
], "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-io = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-index = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-linalg = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-table = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-testing = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-datafusion = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-encoding = { "version" = "=0.32.1", "tag" = "v0.32.1-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance = { "version" = "=0.39.0", default-features = false }
|
||||
lance-core = "=0.39.0"
|
||||
lance-datagen = "=0.39.0"
|
||||
lance-file = "=0.39.0"
|
||||
lance-io = { "version" = "=0.39.0", default-features = false }
|
||||
lance-index = "=0.39.0"
|
||||
lance-linalg = "=0.39.0"
|
||||
lance-namespace = "=0.39.0"
|
||||
lance-namespace-impls = { "version" = "=0.39.0", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"] }
|
||||
lance-table = "=0.39.0"
|
||||
lance-testing = "=0.39.0"
|
||||
lance-datafusion = "=0.39.0"
|
||||
lance-encoding = "=0.39.0"
|
||||
lance-arrow = "=0.39.0"
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "55.1", optional = false }
|
||||
arrow-array = "55.1"
|
||||
arrow-data = "55.1"
|
||||
arrow-ipc = "55.1"
|
||||
arrow-ord = "55.1"
|
||||
arrow-schema = "55.1"
|
||||
arrow-arith = "55.1"
|
||||
arrow-cast = "55.1"
|
||||
arrow = { version = "56.2", optional = false }
|
||||
arrow-array = "56.2"
|
||||
arrow-data = "56.2"
|
||||
arrow-ipc = "56.2"
|
||||
arrow-ord = "56.2"
|
||||
arrow-schema = "56.2"
|
||||
arrow-select = "56.2"
|
||||
arrow-cast = "56.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "48.0", default-features = false }
|
||||
datafusion-catalog = "48.0"
|
||||
datafusion-common = { version = "48.0", default-features = false }
|
||||
datafusion-execution = "48.0"
|
||||
datafusion-expr = "48.0"
|
||||
datafusion-physical-plan = "48.0"
|
||||
datafusion = { version = "50.1", default-features = false }
|
||||
datafusion-catalog = "50.1"
|
||||
datafusion-common = { version = "50.1", default-features = false }
|
||||
datafusion-execution = "50.1"
|
||||
datafusion-expr = "50.1"
|
||||
datafusion-physical-plan = "50.1"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.6.0", default-features = false, features = [
|
||||
"num-traits",
|
||||
@@ -55,19 +55,11 @@ log = "0.4"
|
||||
moka = { version = "0.12", features = ["future"] }
|
||||
object_store = "0.12.0"
|
||||
pin-project = "1.0.7"
|
||||
rand = "0.9"
|
||||
snafu = "0.8"
|
||||
url = "2"
|
||||
num-traits = "0.2"
|
||||
rand = "0.9"
|
||||
regex = "1.10"
|
||||
lazy_static = "1"
|
||||
semver = "1.0.25"
|
||||
# Temporary pins to work around downstream issues
|
||||
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
|
||||
chrono = "=0.4.41"
|
||||
# https://github.com/RustCrypto/formats/issues/1684
|
||||
base64ct = "=1.6.0"
|
||||
# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
|
||||
crunchy = "=0.2.2"
|
||||
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
|
||||
bytemuck_derive = ">=1.8.1, <1.9.0"
|
||||
chrono = "0.4"
|
||||
|
||||
4
ci/create_lancedb_test_connection.sh
Executable file
4
ci/create_lancedb_test_connection.sh
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
export RUST_LOG=info
|
||||
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"
|
||||
18
ci/run_with_docker_compose.sh
Executable file
18
ci/run_with_docker_compose.sh
Executable file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# A script for running the given command together with a docker compose environment.
|
||||
#
|
||||
|
||||
# Bring down the docker setup once the command is done running.
|
||||
tear_down() {
|
||||
docker compose -p fixture down
|
||||
}
|
||||
trap tear_down EXIT
|
||||
|
||||
set +xe
|
||||
|
||||
# Clean up any existing docker setup and bring up a new one.
|
||||
docker compose -p fixture up --detach --wait || exit 1
|
||||
|
||||
"${@}"
|
||||
68
ci/run_with_test_connection.sh
Executable file
68
ci/run_with_test_connection.sh
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# A script for running the given command together with the lancedb cli.
|
||||
#
|
||||
|
||||
die() {
|
||||
echo $?
|
||||
exit 1
|
||||
}
|
||||
|
||||
check_command_exists() {
|
||||
command="${1}"
|
||||
which ${command} &> /dev/null || \
|
||||
die "Unable to locate command: ${command}. Did you install it?"
|
||||
}
|
||||
|
||||
if [[ ! -e ./lancedb ]]; then
|
||||
if [[ -v SOPHON_READ_TOKEN ]]; then
|
||||
INPUT="lancedb-linux-x64"
|
||||
gh release \
|
||||
--repo lancedb/lancedb \
|
||||
download ci-support-binaries \
|
||||
--pattern "${INPUT}" \
|
||||
|| die "failed to fetch cli."
|
||||
check_command_exists openssl
|
||||
openssl enc -aes-256-cbc \
|
||||
-d -pbkdf2 \
|
||||
-pass "env:SOPHON_READ_TOKEN" \
|
||||
-in "${INPUT}" \
|
||||
-out ./lancedb-linux-x64.tar.gz \
|
||||
|| die "openssl failed"
|
||||
TARGET="${INPUT}.tar.gz"
|
||||
else
|
||||
ARCH="x64"
|
||||
if [[ $OSTYPE == 'darwin'* ]]; then
|
||||
UNAME=$(uname -m)
|
||||
if [[ $UNAME == 'arm64' ]]; then
|
||||
ARCH='arm64'
|
||||
fi
|
||||
OSTYPE="macos"
|
||||
elif [[ $OSTYPE == 'linux'* ]]; then
|
||||
if [[ $UNAME == 'aarch64' ]]; then
|
||||
ARCH='arm64'
|
||||
fi
|
||||
OSTYPE="linux"
|
||||
else
|
||||
die "unknown OSTYPE: $OSTYPE"
|
||||
fi
|
||||
|
||||
check_command_exists gh
|
||||
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
|
||||
gh release \
|
||||
--repo lancedb/sophon \
|
||||
download lancedb-cli-v0.0.3 \
|
||||
--pattern "${TARGET}" \
|
||||
|| die "failed to fetch cli."
|
||||
fi
|
||||
|
||||
check_command_exists tar
|
||||
tar xvf "${TARGET}" || die "tar failed."
|
||||
[[ -e ./lancedb ]] || die "failed to extract lancedb."
|
||||
fi
|
||||
|
||||
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
|
||||
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
|
||||
|
||||
"${@}"
|
||||
@@ -1,4 +1,5 @@
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
|
||||
@@ -18,8 +19,12 @@ def run_command(command: str) -> str:
|
||||
|
||||
def get_latest_stable_version() -> str:
|
||||
version_line = run_command("cargo info lance | grep '^version:'")
|
||||
version = version_line.split(" ")[1].strip()
|
||||
return version
|
||||
# Example output: "version: 0.35.0 (latest 0.37.0)"
|
||||
match = re.search(r'\(latest ([0-9.]+)\)', version_line)
|
||||
if match:
|
||||
return match.group(1)
|
||||
# Fallback: use the first version after 'version:'
|
||||
return version_line.split("version:")[1].split()[0].strip()
|
||||
|
||||
|
||||
def get_latest_preview_version() -> str:
|
||||
@@ -50,10 +55,56 @@ def extract_features(line: str) -> list:
|
||||
match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
|
||||
if match:
|
||||
features_str = match.group(1)
|
||||
return [f.strip('"') for f in features_str.split(",") if len(f) > 0]
|
||||
return [f.strip().strip('"') for f in features_str.split(",") if f.strip()]
|
||||
return []
|
||||
|
||||
|
||||
def extract_default_features(line: str) -> bool:
|
||||
"""
|
||||
Checks if default-features = false is present in a line in Cargo.toml.
|
||||
Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
|
||||
Returns: True if default-features = false is present, False otherwise
|
||||
"""
|
||||
import re
|
||||
|
||||
match = re.search(r'default-features\s*=\s*false', line)
|
||||
return match is not None
|
||||
|
||||
|
||||
def dict_to_toml_line(package_name: str, config: dict) -> str:
|
||||
"""
|
||||
Converts a configuration dictionary to a TOML dependency line.
|
||||
Dictionary insertion order is preserved (Python 3.7+), so the caller
|
||||
controls the order of fields in the output.
|
||||
|
||||
Args:
|
||||
package_name: The name of the package (e.g., "lance", "lance-io")
|
||||
config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
|
||||
The order of keys in this dict determines the order in the output.
|
||||
|
||||
Returns:
|
||||
A properly formatted TOML line with a trailing newline
|
||||
"""
|
||||
# If only version is specified, use simple format
|
||||
if len(config) == 1 and "version" in config:
|
||||
return f'{package_name} = "{config["version"]}"\n'
|
||||
|
||||
# Otherwise, use inline table format
|
||||
parts = []
|
||||
for key, value in config.items():
|
||||
if key == "default-features" and not value:
|
||||
parts.append("default-features = false")
|
||||
elif key == "features":
|
||||
parts.append(f'"features" = {json.dumps(value)}')
|
||||
elif isinstance(value, str):
|
||||
parts.append(f'"{key}" = "{value}"')
|
||||
else:
|
||||
# This shouldn't happen with our current usage
|
||||
parts.append(f'"{key}" = {json.dumps(value)}')
|
||||
|
||||
return f'{package_name} = {{ {", ".join(parts)} }}\n'
|
||||
|
||||
|
||||
def update_cargo_toml(line_updater):
|
||||
"""
|
||||
Updates the Cargo.toml file by applying the line_updater function to each line.
|
||||
@@ -67,20 +118,27 @@ def update_cargo_toml(line_updater):
|
||||
is_parsing_lance_line = False
|
||||
for line in lines:
|
||||
if line.startswith("lance"):
|
||||
# Update the line using the provided function
|
||||
if line.strip().endswith("}"):
|
||||
# Check if this is a single-line or multi-line entry
|
||||
# Single-line entries either:
|
||||
# 1. End with } (complete inline table)
|
||||
# 2. End with " (simple version string)
|
||||
# Multi-line entries start with { but don't end with }
|
||||
if line.strip().endswith("}") or line.strip().endswith('"'):
|
||||
# Single-line entry - process immediately
|
||||
new_lines.append(line_updater(line))
|
||||
else:
|
||||
elif "{" in line and not line.strip().endswith("}"):
|
||||
# Multi-line entry - start accumulating
|
||||
lance_line = line
|
||||
is_parsing_lance_line = True
|
||||
else:
|
||||
# Single-line entry without quotes or braces (shouldn't happen but handle it)
|
||||
new_lines.append(line_updater(line))
|
||||
elif is_parsing_lance_line:
|
||||
lance_line += line
|
||||
if line.strip().endswith("}"):
|
||||
new_lines.append(line_updater(lance_line))
|
||||
lance_line = ""
|
||||
is_parsing_lance_line = False
|
||||
else:
|
||||
print("doesn't end with }:", line)
|
||||
else:
|
||||
# Keep the line unchanged
|
||||
new_lines.append(line)
|
||||
@@ -92,18 +150,25 @@ def update_cargo_toml(line_updater):
|
||||
def set_stable_version(version: str):
|
||||
"""
|
||||
Sets lines to
|
||||
lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }
|
||||
lance-io = "=0.29.0"
|
||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
|
||||
lance-io = { "version" = "=0.29.0", default-features = false }
|
||||
...
|
||||
"""
|
||||
|
||||
def line_updater(line: str) -> str:
|
||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||
|
||||
# Build config in desired order: version, default-features, features
|
||||
config = {"version": f"={version}"}
|
||||
|
||||
if extract_default_features(line):
|
||||
config["default-features"] = False
|
||||
|
||||
features = extract_features(line)
|
||||
if features:
|
||||
return f'{package_name} = {{ "version" = "={version}", "features" = {json.dumps(features)} }}\n'
|
||||
else:
|
||||
return f'{package_name} = "={version}"\n'
|
||||
config["features"] = features
|
||||
|
||||
return dict_to_toml_line(package_name, config)
|
||||
|
||||
update_cargo_toml(line_updater)
|
||||
|
||||
@@ -111,19 +176,27 @@ def set_stable_version(version: str):
|
||||
def set_preview_version(version: str):
|
||||
"""
|
||||
Sets lines to
|
||||
lance = { "version" = "=0.29.0", "features" = ["dynamodb"], tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||
lance-io = { version = "=0.29.0", tag = "v0.29.0-beta.2", git="https://github.com/lancedb/lance.git" }
|
||||
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
|
||||
...
|
||||
"""
|
||||
|
||||
def line_updater(line: str) -> str:
|
||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||
# Build config in desired order: version, default-features, features, tag, git
|
||||
config = {"version": f"={version}"}
|
||||
|
||||
if extract_default_features(line):
|
||||
config["default-features"] = False
|
||||
|
||||
features = extract_features(line)
|
||||
base_version = version.split("-")[0] # Get the base version without beta suffix
|
||||
if features:
|
||||
return f'{package_name} = {{ "version" = "={base_version}", "features" = {json.dumps(features)}, "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
|
||||
else:
|
||||
return f'{package_name} = {{ "version" = "={base_version}", "tag" = "v{version}", "git" = "https://github.com/lancedb/lance.git" }}\n'
|
||||
config["features"] = features
|
||||
|
||||
config["tag"] = f"v{version}"
|
||||
config["git"] = "https://github.com/lancedb/lance.git"
|
||||
|
||||
return dict_to_toml_line(package_name, config)
|
||||
|
||||
update_cargo_toml(line_updater)
|
||||
|
||||
@@ -131,18 +204,25 @@ def set_preview_version(version: str):
|
||||
def set_local_version():
|
||||
"""
|
||||
Sets lines to
|
||||
lance = { path = "../lance/rust/lance", features = ["dynamodb"] }
|
||||
lance-io = { path = "../lance/rust/lance-io" }
|
||||
lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
|
||||
lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
|
||||
...
|
||||
"""
|
||||
|
||||
def line_updater(line: str) -> str:
|
||||
package_name = line.split("=", maxsplit=1)[0].strip()
|
||||
|
||||
# Build config in desired order: path, default-features, features
|
||||
config = {"path": f"../lance/rust/{package_name}"}
|
||||
|
||||
if extract_default_features(line):
|
||||
config["default-features"] = False
|
||||
|
||||
features = extract_features(line)
|
||||
if features:
|
||||
return f'{package_name} = {{ "path" = "../lance/rust/{package_name}", "features" = {json.dumps(features)} }}\n'
|
||||
else:
|
||||
return f'{package_name} = {{ "path" = "../lance/rust/{package_name}" }}\n'
|
||||
config["features"] = features
|
||||
|
||||
return dict_to_toml_line(package_name, config)
|
||||
|
||||
update_cargo_toml(line_updater)
|
||||
|
||||
|
||||
@@ -15,16 +15,13 @@ cargo metadata --quiet > /dev/null
|
||||
pushd nodejs || exit 1
|
||||
npm install --package-lock-only --silent
|
||||
popd
|
||||
pushd node || exit 1
|
||||
npm install --package-lock-only --silent
|
||||
popd
|
||||
|
||||
if git diff --quiet --exit-code; then
|
||||
echo "No lockfile changes to commit; skipping amend."
|
||||
elif $AMEND; then
|
||||
git add Cargo.lock nodejs/package-lock.json node/package-lock.json
|
||||
git add Cargo.lock nodejs/package-lock.json
|
||||
git commit --amend --no-edit
|
||||
else
|
||||
git add Cargo.lock nodejs/package-lock.json node/package-lock.json
|
||||
git add Cargo.lock nodejs/package-lock.json
|
||||
git commit -m "Update lockfiles"
|
||||
fi
|
||||
|
||||
275
docs/mkdocs.yml
275
docs/mkdocs.yml
@@ -70,6 +70,23 @@ plugins:
|
||||
- mkdocs-jupyter
|
||||
- render_swagger:
|
||||
allow_arbitrary_locations: true
|
||||
- redirects:
|
||||
redirect_maps:
|
||||
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
|
||||
# other sub-pages are handled by the ingected js in overrides/partials/header.html
|
||||
'index.md': 'https://lancedb.com/docs/'
|
||||
'guides/tables.md': 'https://lancedb.com/docs/tables/'
|
||||
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
|
||||
'basic.md': 'https://lancedb.com/docs/quickstart/'
|
||||
'faq.md': 'https://lancedb.com/docs/faq/'
|
||||
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
|
||||
'integrations.md': 'https://lancedb.com/docs/integrations/'
|
||||
'examples.md': 'https://lancedb.com/docs/tutorials/'
|
||||
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
|
||||
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
|
||||
'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
|
||||
|
||||
|
||||
|
||||
markdown_extensions:
|
||||
- admonition
|
||||
@@ -103,6 +120,264 @@ markdown_extensions:
|
||||
permalink: ""
|
||||
|
||||
nav:
|
||||
- Home:
|
||||
- LanceDB: index.md
|
||||
- 🏃🏼♂️ Quick start: basic.md
|
||||
- 📚 Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- Storage: concepts/storage.md
|
||||
- Data management: concepts/data_management.md
|
||||
- 🔨 Guides:
|
||||
- Working with tables: guides/tables.md
|
||||
- Building a vector index: ann_indexes.md
|
||||
- Vector Search: search.md
|
||||
- Full-text search (native): fts.md
|
||||
- Full-text search (tantivy-based): fts_tantivy.md
|
||||
- Building a scalar index: guides/scalar_index.md
|
||||
- Hybrid search:
|
||||
- Overview: hybrid_search/hybrid_search.md
|
||||
- Comparing Rerankers: hybrid_search/eval.md
|
||||
- Airbnb financial data example: notebooks/hybrid_search.ipynb
|
||||
- Late interaction with MultiVector search:
|
||||
- Overview: guides/multi-vector.md
|
||||
- Example: notebooks/Multivector_on_LanceDB.ipynb
|
||||
- RAG:
|
||||
- Vanilla RAG: rag/vanilla_rag.md
|
||||
- Multi-head RAG: rag/multi_head_rag.md
|
||||
- Corrective RAG: rag/corrective_rag.md
|
||||
- Agentic RAG: rag/agentic_rag.md
|
||||
- Graph RAG: rag/graph_rag.md
|
||||
- Self RAG: rag/self_rag.md
|
||||
- Adaptive RAG: rag/adaptive_rag.md
|
||||
- SFR RAG: rag/sfr_rag.md
|
||||
- Advanced Techniques:
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- Reranking:
|
||||
- Quickstart: reranking/index.md
|
||||
- Cohere Reranker: reranking/cohere.md
|
||||
- Linear Combination Reranker: reranking/linear_combination.md
|
||||
- Reciprocal Rank Fusion Reranker: reranking/rrf.md
|
||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||
- ColBERT Reranker: reranking/colbert.md
|
||||
- Jina Reranker: reranking/jina.md
|
||||
- OpenAI Reranker: reranking/openai.md
|
||||
- AnswerDotAi Rerankers: reranking/answerdotai.md
|
||||
- Voyage AI Rerankers: reranking/voyageai.md
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility:
|
||||
- sync API: notebooks/reproducibility.ipynb
|
||||
- async API: notebooks/reproducibility_async.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- Migration Guide: migration.md
|
||||
- Tuning retrieval performance:
|
||||
- Choosing right query type: guides/tuning_retrievers/1_query_types.md
|
||||
- Reranking: guides/tuning_retrievers/2_reranking.md
|
||||
- Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
|
||||
- 🧬 Managing embeddings:
|
||||
- Understand Embeddings: embeddings/understanding_embeddings.md
|
||||
- Get Started: embeddings/index.md
|
||||
- Embedding functions: embeddings/embedding_functions.md
|
||||
- Available models:
|
||||
- Overview: embeddings/default_embedding_functions.md
|
||||
- Text Embedding Functions:
|
||||
- Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
|
||||
- Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
|
||||
- Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
|
||||
- OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
|
||||
- Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
|
||||
- Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
|
||||
- Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
|
||||
- AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
|
||||
- IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
|
||||
- Voyage AI Embeddings: embeddings/available_embedding_models/text_embedding_functions/voyageai_embedding.md
|
||||
- Multimodal Embedding Functions:
|
||||
- OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
|
||||
- Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
|
||||
- User-defined embedding functions: embeddings/custom_embedding_function.md
|
||||
- Variables and secrets: embeddings/variables_and_secrets.md
|
||||
- "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
|
||||
- "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- 🔌 Integrations:
|
||||
- Tools and data formats: integrations/index.md
|
||||
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
||||
- Polars: python/polars_arrow.md
|
||||
- DuckDB: python/duckdb.md
|
||||
- Datafusion: python/datafusion.md
|
||||
- LangChain:
|
||||
- LangChain 🔗: integrations/langchain.md
|
||||
- LangChain demo: notebooks/langchain_demo.ipynb
|
||||
- LangChain JS/TS 🔗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
|
||||
- LlamaIndex 🦙:
|
||||
- LlamaIndex docs: integrations/llamaIndex.md
|
||||
- LlamaIndex demo: notebooks/llamaIndex_demo.ipynb
|
||||
- Pydantic: python/pydantic.md
|
||||
- Voxel51: integrations/voxel51.md
|
||||
- PromptTools: integrations/prompttools.md
|
||||
- dlt: integrations/dlt.md
|
||||
- phidata: integrations/phidata.md
|
||||
- Genkit: integrations/genkit.md
|
||||
- 🎯 Examples:
|
||||
- Overview: examples/index.md
|
||||
- 🐍 Python:
|
||||
- Overview: examples/examples_python.md
|
||||
- Build From Scratch: examples/python_examples/build_from_scratch.md
|
||||
- Multimodal: examples/python_examples/multimodal.md
|
||||
- Rag: examples/python_examples/rag.md
|
||||
- Vector Search: examples/python_examples/vector_search.md
|
||||
- Chatbot: examples/python_examples/chatbot.md
|
||||
- Evaluation: examples/python_examples/evaluations.md
|
||||
- AI Agent: examples/python_examples/aiagent.md
|
||||
- Recommender System: examples/python_examples/recommendersystem.md
|
||||
- Miscellaneous:
|
||||
- Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
|
||||
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
||||
- 👾 JavaScript:
|
||||
- Overview: examples/examples_js.md
|
||||
- Serverless Website Chatbot: examples/serverless_website_chatbot.md
|
||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
- 🦀 Rust:
|
||||
- Overview: examples/examples_rust.md
|
||||
- 📓 Studies:
|
||||
- ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
|
||||
- 💭 FAQs: faq.md
|
||||
- 🔍 Troubleshooting: troubleshooting.md
|
||||
- ⚙️ API reference:
|
||||
- 🐍 Python: python/python.md
|
||||
- 👾 JavaScript (vectordb): javascript/modules.md
|
||||
- 👾 JavaScript (lancedb): js/globals.md
|
||||
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
|
||||
|
||||
- Quick start: basic.md
|
||||
- Concepts:
|
||||
- Vector search: concepts/vector_search.md
|
||||
- Indexing:
|
||||
- IVFPQ: concepts/index_ivfpq.md
|
||||
- HNSW: concepts/index_hnsw.md
|
||||
- Storage: concepts/storage.md
|
||||
- Data management: concepts/data_management.md
|
||||
- Guides:
|
||||
- Working with tables: guides/tables.md
|
||||
- Working with SQL: guides/sql_querying.md
|
||||
- Building an ANN index: ann_indexes.md
|
||||
- Vector Search: search.md
|
||||
- Full-text search (native): fts.md
|
||||
- Full-text search (tantivy-based): fts_tantivy.md
|
||||
- Building a scalar index: guides/scalar_index.md
|
||||
- Hybrid search:
|
||||
- Overview: hybrid_search/hybrid_search.md
|
||||
- Comparing Rerankers: hybrid_search/eval.md
|
||||
- Airbnb financial data example: notebooks/hybrid_search.ipynb
|
||||
- Late interaction with MultiVector search:
|
||||
- Overview: guides/multi-vector.md
|
||||
- Document search Example: notebooks/Multivector_on_LanceDB.ipynb
|
||||
- RAG:
|
||||
- Vanilla RAG: rag/vanilla_rag.md
|
||||
- Multi-head RAG: rag/multi_head_rag.md
|
||||
- Corrective RAG: rag/corrective_rag.md
|
||||
- Agentic RAG: rag/agentic_rag.md
|
||||
- Graph RAG: rag/graph_rag.md
|
||||
- Self RAG: rag/self_rag.md
|
||||
- Adaptive RAG: rag/adaptive_rag.md
|
||||
- SFR RAG: rag/sfr_rag.md
|
||||
- Advanced Techniques:
|
||||
- HyDE: rag/advanced_techniques/hyde.md
|
||||
- FLARE: rag/advanced_techniques/flare.md
|
||||
- Reranking:
|
||||
- Quickstart: reranking/index.md
|
||||
- Cohere Reranker: reranking/cohere.md
|
||||
- Linear Combination Reranker: reranking/linear_combination.md
|
||||
- Reciprocal Rank Fusion Reranker: reranking/rrf.md
|
||||
- Cross Encoder Reranker: reranking/cross_encoder.md
|
||||
- ColBERT Reranker: reranking/colbert.md
|
||||
- Jina Reranker: reranking/jina.md
|
||||
- OpenAI Reranker: reranking/openai.md
|
||||
- AnswerDotAi Rerankers: reranking/answerdotai.md
|
||||
- Building Custom Rerankers: reranking/custom_reranker.md
|
||||
- Example: notebooks/lancedb_reranking.ipynb
|
||||
- Filtering: sql.md
|
||||
- Versioning & Reproducibility:
|
||||
- sync API: notebooks/reproducibility.ipynb
|
||||
- async API: notebooks/reproducibility_async.ipynb
|
||||
- Configuring Storage: guides/storage.md
|
||||
- Migration Guide: migration.md
|
||||
- Tuning retrieval performance:
|
||||
- Choosing right query type: guides/tuning_retrievers/1_query_types.md
|
||||
- Reranking: guides/tuning_retrievers/2_reranking.md
|
||||
- Embedding fine-tuning: guides/tuning_retrievers/3_embed_tuning.md
|
||||
- Managing Embeddings:
|
||||
- Understand Embeddings: embeddings/understanding_embeddings.md
|
||||
- Get Started: embeddings/index.md
|
||||
- Embedding functions: embeddings/embedding_functions.md
|
||||
- Available models:
|
||||
- Overview: embeddings/default_embedding_functions.md
|
||||
- Text Embedding Functions:
|
||||
- Sentence Transformers: embeddings/available_embedding_models/text_embedding_functions/sentence_transformers.md
|
||||
- Huggingface Embedding Models: embeddings/available_embedding_models/text_embedding_functions/huggingface_embedding.md
|
||||
- Ollama Embeddings: embeddings/available_embedding_models/text_embedding_functions/ollama_embedding.md
|
||||
- OpenAI Embeddings: embeddings/available_embedding_models/text_embedding_functions/openai_embedding.md
|
||||
- Instructor Embeddings: embeddings/available_embedding_models/text_embedding_functions/instructor_embedding.md
|
||||
- Gemini Embeddings: embeddings/available_embedding_models/text_embedding_functions/gemini_embedding.md
|
||||
- Cohere Embeddings: embeddings/available_embedding_models/text_embedding_functions/cohere_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/text_embedding_functions/jina_embedding.md
|
||||
- AWS Bedrock Text Embedding Functions: embeddings/available_embedding_models/text_embedding_functions/aws_bedrock_embedding.md
|
||||
- IBM watsonx.ai Embeddings: embeddings/available_embedding_models/text_embedding_functions/ibm_watsonx_ai_embedding.md
|
||||
- Multimodal Embedding Functions:
|
||||
- OpenClip embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/openclip_embedding.md
|
||||
- Imagebind embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/imagebind_embedding.md
|
||||
- Jina Embeddings: embeddings/available_embedding_models/multimodal_embedding_functions/jina_multimodal_embedding.md
|
||||
- User-defined embedding functions: embeddings/custom_embedding_function.md
|
||||
- Variables and secrets: embeddings/variables_and_secrets.md
|
||||
- "Example: Multi-lingual semantic search": notebooks/multi_lingual_example.ipynb
|
||||
- "Example: MultiModal CLIP Embeddings": notebooks/DisappearingEmbeddingFunction.ipynb
|
||||
- Integrations:
|
||||
- Overview: integrations/index.md
|
||||
- Pandas and PyArrow: python/pandas_and_pyarrow.md
|
||||
- Polars: python/polars_arrow.md
|
||||
- DuckDB: python/duckdb.md
|
||||
- Datafusion: python/datafusion.md
|
||||
- LangChain 🦜️🔗↗: integrations/langchain.md
|
||||
- LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
|
||||
- LlamaIndex 🦙↗: integrations/llamaIndex.md
|
||||
- Pydantic: python/pydantic.md
|
||||
- Voxel51: integrations/voxel51.md
|
||||
- PromptTools: integrations/prompttools.md
|
||||
- dlt: integrations/dlt.md
|
||||
- phidata: integrations/phidata.md
|
||||
- Genkit: integrations/genkit.md
|
||||
- Examples:
|
||||
- examples/index.md
|
||||
- 🐍 Python:
|
||||
- Overview: examples/examples_python.md
|
||||
- Build From Scratch: examples/python_examples/build_from_scratch.md
|
||||
- Multimodal: examples/python_examples/multimodal.md
|
||||
- Rag: examples/python_examples/rag.md
|
||||
- Vector Search: examples/python_examples/vector_search.md
|
||||
- Chatbot: examples/python_examples/chatbot.md
|
||||
- Evaluation: examples/python_examples/evaluations.md
|
||||
- AI Agent: examples/python_examples/aiagent.md
|
||||
- Recommender System: examples/python_examples/recommendersystem.md
|
||||
- Miscellaneous:
|
||||
- Serverless QA Bot with S3 and Lambda: examples/serverless_lancedb_with_s3_and_lambda.md
|
||||
- Serverless QA Bot with Modal: examples/serverless_qa_bot_with_modal_and_langchain.md
|
||||
- 👾 JavaScript:
|
||||
- Overview: examples/examples_js.md
|
||||
- Serverless Website Chatbot: examples/serverless_website_chatbot.md
|
||||
- YouTube Transcript Search: examples/youtube_transcript_bot_with_nodejs.md
|
||||
- TransformersJS Embedding Search: examples/transformerjs_embedding_search_nodejs.md
|
||||
- 🦀 Rust:
|
||||
- Overview: examples/examples_rust.md
|
||||
- Studies:
|
||||
- studies/overview.md
|
||||
- ↗Improve retrievers with hybrid search and reranking: https://blog.lancedb.com/hybrid-search-and-reranking-report/
|
||||
- API reference:
|
||||
- Overview: api_reference.md
|
||||
- Python: python/python.md
|
||||
|
||||
@@ -19,7 +19,13 @@
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
IN THE SOFTWARE.
|
||||
-->
|
||||
|
||||
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
|
||||
<p style="margin: 0; font-size: 1.1em;">
|
||||
<strong>This documentation site is deprecated.</strong>
|
||||
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
|
||||
lancedb.com/docs</a> for the latest information.
|
||||
</p>
|
||||
</div>
|
||||
{% set class = "md-header" %}
|
||||
{% if "navigation.tabs.sticky" in features %}
|
||||
{% set class = class ~ " md-header--shadow md-header--lifted" %}
|
||||
@@ -150,9 +156,9 @@
|
||||
|
||||
<div style="margin-left: 10px; margin-right: 5px;">
|
||||
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
|
||||
</a>
|
||||
</div>
|
||||
<div style="margin-left: 5px; margin-right: 5px;">
|
||||
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
|
||||
@@ -173,4 +179,77 @@
|
||||
{% include "partials/tabs.html" %}
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</header>
|
||||
</header>
|
||||
|
||||
<script>
|
||||
(function() {
|
||||
function checkPathAndRedirect() {
|
||||
var banner = document.getElementById('deprecation-banner');
|
||||
|
||||
if (document.querySelector('meta[http-equiv="refresh"]')) {
|
||||
return; // The redirects plugin is already handling this page.
|
||||
}
|
||||
|
||||
var currentPath = window.location.pathname;
|
||||
|
||||
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
|
||||
? currentPath.slice(0, -1)
|
||||
: currentPath;
|
||||
|
||||
// These are the ONLY paths that should remain on the old site
|
||||
var apiPaths = [
|
||||
'/lancedb/python',
|
||||
'/lancedb/javascript',
|
||||
'/lancedb/js',
|
||||
'/lancedb/api_reference'
|
||||
];
|
||||
|
||||
var isApiPage = apiPaths.some(function(apiPath) {
|
||||
return cleanPath.startsWith(apiPath);
|
||||
});
|
||||
|
||||
if (isApiPage) {
|
||||
if (banner) {
|
||||
banner.style.display = 'none';
|
||||
}
|
||||
} else {
|
||||
if (banner) {
|
||||
banner.style.display = 'block';
|
||||
}
|
||||
|
||||
// Add noindex meta tag to prevent indexing of old docs for seo
|
||||
var noindexMeta = document.createElement('meta');
|
||||
noindexMeta.setAttribute('name', 'robots');
|
||||
noindexMeta.setAttribute('content', 'noindex, follow');
|
||||
document.head.appendChild(noindexMeta);
|
||||
|
||||
// Add canonical link to point to the new docs to reward new site for seo
|
||||
var canonicalLink = document.createElement('link');
|
||||
canonicalLink.setAttribute('rel', 'canonical');
|
||||
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
|
||||
document.head.appendChild(canonicalLink);
|
||||
|
||||
window.location.replace('https://lancedb.com/docs');
|
||||
}
|
||||
}
|
||||
|
||||
// Run the check only if doc is ready. This makes sure we catch the initial load
|
||||
// and redirect.
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
|
||||
} else {
|
||||
checkPathAndRedirect();
|
||||
}
|
||||
|
||||
// Use an interval to handle subsequent navigation clicks.
|
||||
var lastPath = window.location.pathname;
|
||||
setInterval(function() {
|
||||
if (window.location.pathname !== lastPath) {
|
||||
lastPath = window.location.pathname;
|
||||
checkPathAndRedirect();
|
||||
}
|
||||
}, 2000); // keeping it 2 second to make it easy for user to understand
|
||||
// what's happening
|
||||
|
||||
})();
|
||||
</script>
|
||||
@@ -5,3 +5,4 @@ mkdocstrings[python]==0.25.2
|
||||
griffe
|
||||
mkdocs-render-swagger-plugin
|
||||
pydantic
|
||||
mkdocs-redirects
|
||||
|
||||
@@ -13,7 +13,7 @@ The following concepts are important to keep in mind:
|
||||
- Data is versioned, with each insert operation creating a new version of the dataset and an update to the manifest that tracks versions via metadata
|
||||
|
||||
!!! note
|
||||
1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.
|
||||
1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.
|
||||
2. Second, these versions exist to keep LanceDB scalable and consistent. We do not immediately blow away old versions when creating new ones because other clients might be in the middle of querying the old version. It's important to retain older versions for as long as they might be queried.
|
||||
|
||||
## What are fragments?
|
||||
@@ -37,6 +37,10 @@ Depending on the use case and dataset, optimal compaction will have different re
|
||||
- It’s always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
|
||||
- Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)
|
||||
|
||||
!!! note
|
||||
|
||||
LanceDB Cloud/Enterprise supports [auto-compaction](https://docs.lancedb.com/enterprise/architecture/architecture#write-path) which automatically optimizes fragments in the background as data changes.
|
||||
|
||||
## Deletion
|
||||
|
||||
Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
|
||||
@@ -50,13 +54,9 @@ Reindexing is the process of updating the index to account for new data, keeping
|
||||
|
||||
Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.
|
||||
|
||||
When a reindex job is triggered in the background, the entire data is reindexed, but in the interim as new queries come in, LanceDB will combine results from the existing index with exhaustive kNN search on the new data. This is done to ensure that you're still searching on all your data, but it does come at a performance cost. The more data that you add without reindexing, the impact on latency (due to exhaustive search) can be noticeable.
|
||||
In LanceDB OSS, re-indexing happens synchronously when you call either `create_index` or `optimize` on a table. In LanceDB Cloud, re-indexing happens asynchronously as you add and update data in your table.
|
||||
|
||||
### Vector reindex
|
||||
By default, queries will search new data even if it has yet to be indexed. This is done using brute-force methods, such as kNN for vector search, and combined with the fast index search results. This is done to ensure that you're always searching over all your data, but it does come at a performance cost. Without reindexing, adding more data to a table will make queries slower and more expensive. This behavior can be disabled by setting the [fast_search](https://lancedb.github.io/lancedb/python/python/#lancedb.query.AsyncQuery.fast_search) parameter which will instruct the query to ignore un-indexed data.
|
||||
|
||||
* LanceDB Cloud supports incremental reindexing, where a background process will trigger a new index build for you automatically when new data is added to a dataset
|
||||
* LanceDB Cloud/Enterprise supports [automatic incremental reindexing](https://docs.lancedb.com/core#vector-index) for vector, scalar, and FTS indices, where a background process will trigger a new index build for you automatically when new data is added or modified in a dataset
|
||||
* LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
|
||||
|
||||
### FTS reindex
|
||||
|
||||
FTS reindexing is supported in both LanceDB OSS and Cloud, but requires that it's manually rebuilt once you have a significant enough amount of new data added that needs to be reindexed. We [updated](https://github.com/lancedb/lancedb/pull/762) Tantivy's default heap size from 128MB to 1GB in LanceDB to make it much faster to reindex, by up to 10x from the default settings.
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
# VoyageAI Embeddings : Multimodal
|
||||
|
||||
VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
|
||||
under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|-------------------------|-------------------------------------------|
|
||||
| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import base64
|
||||
import os
|
||||
from io import BytesIO
|
||||
|
||||
import requests
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
import pandas as pd
|
||||
|
||||
os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
|
||||
|
||||
db = lancedb.connect(".lancedb")
|
||||
func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
|
||||
|
||||
|
||||
def image_to_base64(image_bytes: bytes):
|
||||
buffered = BytesIO(image_bytes)
|
||||
img_str = base64.b64encode(buffered.getvalue())
|
||||
return img_str.decode("utf-8")
|
||||
|
||||
|
||||
class Images(LanceModel):
|
||||
label: str
|
||||
image_uri: str = func.SourceField() # image uri as the source
|
||||
image_bytes: str = func.SourceField() # image bytes base64 encoded as the source
|
||||
vector: Vector(func.ndims()) = func.VectorField() # vector column
|
||||
vec_from_bytes: Vector(func.ndims()) = func.VectorField() # Another vector column
|
||||
|
||||
|
||||
if "images" in db.table_names():
|
||||
db.drop_table("images")
|
||||
table = db.create_table("images", schema=Images)
|
||||
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
||||
uris = [
|
||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
||||
]
|
||||
# get each uri as bytes
|
||||
images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
|
||||
table.add(
|
||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
|
||||
)
|
||||
```
|
||||
Now we can search using text from both the default vector column and the custom vector column
|
||||
```python
|
||||
|
||||
# text search
|
||||
actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label) # prints "dog"
|
||||
|
||||
frombytes = (
|
||||
table.search("man's best friend", vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(frombytes.label)
|
||||
|
||||
```
|
||||
|
||||
Because we're using a multi-modal embedding function, we can also search using images
|
||||
|
||||
```python
|
||||
# image search
|
||||
query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
|
||||
image_bytes = requests.get(query_image_uri).content
|
||||
query_image = Image.open(BytesIO(image_bytes))
|
||||
actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label == "dog")
|
||||
|
||||
# image search using a custom vector column
|
||||
other = (
|
||||
table.search(query_image, vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(actual.label)
|
||||
|
||||
```
|
||||
@@ -397,117 +397,6 @@ For **read-only access**, LanceDB will need a policy such as:
|
||||
}
|
||||
```
|
||||
|
||||
#### DynamoDB Commit Store for concurrent writes
|
||||
|
||||
By default, S3 does not support concurrent writes. Having two or more processes
|
||||
writing to the same table at the same time can lead to data corruption. This is
|
||||
because S3, unlike other object stores, does not have any atomic put or copy
|
||||
operation.
|
||||
|
||||
To enable concurrent writes, you can configure LanceDB to use a DynamoDB table
|
||||
as a commit store. This table will be used to coordinate writes between
|
||||
different processes. To enable this feature, you must modify your connection
|
||||
URI to use the `s3+ddb` scheme and add a query parameter `ddbTableName` with the
|
||||
name of the table to use.
|
||||
|
||||
=== "Python"
|
||||
|
||||
=== "Sync API"
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
db = lancedb.connect(
|
||||
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
|
||||
)
|
||||
```
|
||||
=== "Async API"
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
async_db = await lancedb.connect_async(
|
||||
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
|
||||
```javascript
|
||||
const lancedb = require("lancedb");
|
||||
|
||||
const db = await lancedb.connect(
|
||||
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
|
||||
);
|
||||
```
|
||||
|
||||
The DynamoDB table must be created with the following schema:
|
||||
|
||||
- Hash key: `base_uri` (string)
|
||||
- Range key: `version` (number)
|
||||
|
||||
You can create this programmatically with:
|
||||
|
||||
=== "Python"
|
||||
|
||||
<!-- skip-test -->
|
||||
```python
|
||||
import boto3
|
||||
|
||||
dynamodb = boto3.client("dynamodb")
|
||||
table = dynamodb.create_table(
|
||||
TableName=table_name,
|
||||
KeySchema=[
|
||||
{"AttributeName": "base_uri", "KeyType": "HASH"},
|
||||
{"AttributeName": "version", "KeyType": "RANGE"},
|
||||
],
|
||||
AttributeDefinitions=[
|
||||
{"AttributeName": "base_uri", "AttributeType": "S"},
|
||||
{"AttributeName": "version", "AttributeType": "N"},
|
||||
],
|
||||
ProvisionedThroughput={"ReadCapacityUnits": 1, "WriteCapacityUnits": 1},
|
||||
)
|
||||
```
|
||||
|
||||
=== "JavaScript"
|
||||
|
||||
<!-- skip-test -->
|
||||
```javascript
|
||||
import {
|
||||
CreateTableCommand,
|
||||
DynamoDBClient,
|
||||
} from "@aws-sdk/client-dynamodb";
|
||||
|
||||
const dynamodb = new DynamoDBClient({
|
||||
region: CONFIG.awsRegion,
|
||||
credentials: {
|
||||
accessKeyId: CONFIG.awsAccessKeyId,
|
||||
secretAccessKey: CONFIG.awsSecretAccessKey,
|
||||
},
|
||||
endpoint: CONFIG.awsEndpoint,
|
||||
});
|
||||
const command = new CreateTableCommand({
|
||||
TableName: table_name,
|
||||
AttributeDefinitions: [
|
||||
{
|
||||
AttributeName: "base_uri",
|
||||
AttributeType: "S",
|
||||
},
|
||||
{
|
||||
AttributeName: "version",
|
||||
AttributeType: "N",
|
||||
},
|
||||
],
|
||||
KeySchema: [
|
||||
{ AttributeName: "base_uri", KeyType: "HASH" },
|
||||
{ AttributeName: "version", KeyType: "RANGE" },
|
||||
],
|
||||
ProvisionedThroughput: {
|
||||
ReadCapacityUnits: 1,
|
||||
WriteCapacityUnits: 1,
|
||||
},
|
||||
});
|
||||
await client.send(command);
|
||||
```
|
||||
|
||||
|
||||
#### S3-compatible stores
|
||||
|
||||
|
||||
@@ -25,6 +25,51 @@ the underlying connection has been closed.
|
||||
|
||||
## Methods
|
||||
|
||||
### cloneTable()
|
||||
|
||||
```ts
|
||||
abstract cloneTable(
|
||||
targetTableName,
|
||||
sourceUri,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
Clone a table from a source table.
|
||||
|
||||
A shallow clone creates a new table that shares the underlying data files
|
||||
with the source table but has its own independent manifest. This allows
|
||||
both the source and cloned tables to evolve independently while initially
|
||||
sharing the same data, deletion, and index files.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **targetTableName**: `string`
|
||||
The name of the target table to create.
|
||||
|
||||
* **sourceUri**: `string`
|
||||
The URI of the source table to clone from.
|
||||
|
||||
* **options?**
|
||||
Clone options.
|
||||
|
||||
* **options.isShallow?**: `boolean`
|
||||
Whether to perform a shallow clone (defaults to true).
|
||||
|
||||
* **options.sourceTag?**: `string`
|
||||
The tag of the source table to clone.
|
||||
|
||||
* **options.sourceVersion?**: `number`
|
||||
The version of the source table to clone.
|
||||
|
||||
* **options.targetNamespace?**: `string`[]
|
||||
The namespace for the target table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
***
|
||||
|
||||
### close()
|
||||
|
||||
```ts
|
||||
@@ -45,6 +90,8 @@ Any attempt to use the connection after it is closed will result in an error.
|
||||
|
||||
### createEmptyTable()
|
||||
|
||||
#### createEmptyTable(name, schema, options)
|
||||
|
||||
```ts
|
||||
abstract createEmptyTable(
|
||||
name,
|
||||
@@ -54,7 +101,7 @@ abstract createEmptyTable(
|
||||
|
||||
Creates a new empty Table
|
||||
|
||||
#### Parameters
|
||||
##### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
The name of the table.
|
||||
@@ -63,8 +110,39 @@ Creates a new empty Table
|
||||
The schema of the table
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options (backwards compatibility)
|
||||
|
||||
#### Returns
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createEmptyTable(name, schema, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createEmptyTable(
|
||||
name,
|
||||
schema,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
Creates a new empty Table
|
||||
|
||||
##### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
The name of the table.
|
||||
|
||||
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||
The schema of the table
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
@@ -72,10 +150,10 @@ Creates a new empty Table
|
||||
|
||||
### createTable()
|
||||
|
||||
#### createTable(options)
|
||||
#### createTable(options, namespace)
|
||||
|
||||
```ts
|
||||
abstract createTable(options): Promise<Table>
|
||||
abstract createTable(options, namespace?): Promise<Table>
|
||||
```
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
@@ -85,6 +163,9 @@ Creates a new Table and initialize it with new data.
|
||||
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
The options object.
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
@@ -110,6 +191,38 @@ Creates a new Table and initialize it with new data.
|
||||
to be inserted into the table
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options (backwards compatibility)
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createTable(name, data, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createTable(
|
||||
name,
|
||||
data,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
|
||||
##### Parameters
|
||||
|
||||
* **name**: `string`
|
||||
The name of the table.
|
||||
|
||||
* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`<`string`, `unknown`>[]
|
||||
Non-empty Array of Records
|
||||
to be inserted into the table
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
|
||||
##### Returns
|
||||
|
||||
@@ -134,11 +247,16 @@ Return a brief description of the connection
|
||||
### dropAllTables()
|
||||
|
||||
```ts
|
||||
abstract dropAllTables(): Promise<void>
|
||||
abstract dropAllTables(namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop all tables in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to drop tables from (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
@@ -148,7 +266,7 @@ Drop all tables in the database.
|
||||
### dropTable()
|
||||
|
||||
```ts
|
||||
abstract dropTable(name): Promise<void>
|
||||
abstract dropTable(name, namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop an existing table.
|
||||
@@ -158,6 +276,9 @@ Drop an existing table.
|
||||
* **name**: `string`
|
||||
The name of the table to drop.
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
@@ -181,7 +302,10 @@ Return true if the connection has not been closed
|
||||
### openTable()
|
||||
|
||||
```ts
|
||||
abstract openTable(name, options?): Promise<Table>
|
||||
abstract openTable(
|
||||
name,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
Open a table in the database.
|
||||
@@ -191,7 +315,11 @@ Open a table in the database.
|
||||
* **name**: `string`
|
||||
The name of the table
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||
Additional options
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -201,6 +329,8 @@ Open a table in the database.
|
||||
|
||||
### tableNames()
|
||||
|
||||
#### tableNames(options)
|
||||
|
||||
```ts
|
||||
abstract tableNames(options?): Promise<string[]>
|
||||
```
|
||||
@@ -209,12 +339,35 @@ List all the table names in this database.
|
||||
|
||||
Tables will be returned in lexicographical order.
|
||||
|
||||
#### Parameters
|
||||
##### Parameters
|
||||
|
||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||
options to control the
|
||||
paging / start point (backwards compatibility)
|
||||
|
||||
##### Returns
|
||||
|
||||
`Promise`<`string`[]>
|
||||
|
||||
#### tableNames(namespace, options)
|
||||
|
||||
```ts
|
||||
abstract tableNames(namespace?, options?): Promise<string[]>
|
||||
```
|
||||
|
||||
List all the table names in this database.
|
||||
|
||||
Tables will be returned in lexicographical order.
|
||||
|
||||
##### Parameters
|
||||
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to list tables from (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||
options to control the
|
||||
paging / start point
|
||||
|
||||
#### Returns
|
||||
##### Returns
|
||||
|
||||
`Promise`<`string`[]>
|
||||
|
||||
85
docs/src/js/classes/HeaderProvider.md
Normal file
85
docs/src/js/classes/HeaderProvider.md
Normal file
@@ -0,0 +1,85 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / HeaderProvider
|
||||
|
||||
# Class: `abstract` HeaderProvider
|
||||
|
||||
Abstract base class for providing custom headers for each request.
|
||||
|
||||
Users can implement this interface to provide dynamic headers for various purposes
|
||||
such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
|
||||
custom metadata, or any other header-based requirements. The provider is called
|
||||
before each request to ensure fresh header values are always used.
|
||||
|
||||
## Examples
|
||||
|
||||
Simple JWT token provider:
|
||||
```typescript
|
||||
class JWTProvider extends HeaderProvider {
|
||||
constructor(private token: string) {
|
||||
super();
|
||||
}
|
||||
|
||||
getHeaders(): Record<string, string> {
|
||||
return { authorization: `Bearer ${this.token}` };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Provider with request tracking:
|
||||
```typescript
|
||||
class RequestTrackingProvider extends HeaderProvider {
|
||||
constructor(private sessionId: string) {
|
||||
super();
|
||||
}
|
||||
|
||||
getHeaders(): Record<string, string> {
|
||||
return {
|
||||
"X-Session-Id": this.sessionId,
|
||||
"X-Request-Id": `req-${Date.now()}`
|
||||
};
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Extended by
|
||||
|
||||
- [`StaticHeaderProvider`](StaticHeaderProvider.md)
|
||||
- [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
### new HeaderProvider()
|
||||
|
||||
```ts
|
||||
new HeaderProvider(): HeaderProvider
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
[`HeaderProvider`](HeaderProvider.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### getHeaders()
|
||||
|
||||
```ts
|
||||
abstract getHeaders(): Record<string, string>
|
||||
```
|
||||
|
||||
Get the latest headers to be added to requests.
|
||||
|
||||
This method is called before each request to the remote LanceDB server.
|
||||
Implementations should return headers that will be merged with existing headers.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Record`<`string`, `string`>
|
||||
|
||||
Dictionary of header names to values to add to the request.
|
||||
|
||||
#### Throws
|
||||
|
||||
If unable to fetch headers, the exception will be propagated and the request will fail.
|
||||
@@ -194,6 +194,37 @@ currently is also a memory intensive operation.
|
||||
|
||||
***
|
||||
|
||||
### ivfRq()
|
||||
|
||||
```ts
|
||||
static ivfRq(options?): Index
|
||||
```
|
||||
|
||||
Create an IvfRq index
|
||||
|
||||
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
||||
and organizes them into IVF partitions.
|
||||
|
||||
The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
|
||||
The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
|
||||
between index size (and thus search speed) and index accuracy.
|
||||
|
||||
The partitioning process is called IVF and the `num_partitions` parameter controls how
|
||||
many groups to create.
|
||||
|
||||
Note that training an IVF RQ index on a large dataset is a slow operation and
|
||||
currently is also a memory intensive operation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<[`IvfRqOptions`](../interfaces/IvfRqOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
[`Index`](Index.md)
|
||||
|
||||
***
|
||||
|
||||
### labelList()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -52,6 +52,30 @@ the merge result
|
||||
|
||||
***
|
||||
|
||||
### useIndex()
|
||||
|
||||
```ts
|
||||
useIndex(useIndex): MergeInsertBuilder
|
||||
```
|
||||
|
||||
Controls whether to use indexes for the merge operation.
|
||||
|
||||
When set to `true` (the default), the operation will use an index if available
|
||||
on the join key for improved performance. When set to `false`, it forces a full
|
||||
table scan even if an index exists. This can be useful for benchmarking or when
|
||||
the query optimizer chooses a suboptimal path.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **useIndex**: `boolean`
|
||||
Whether to use indices for the merge operation. Defaults to `true`.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`MergeInsertBuilder`](MergeInsertBuilder.md)
|
||||
|
||||
***
|
||||
|
||||
### whenMatchedUpdateAll()
|
||||
|
||||
```ts
|
||||
|
||||
29
docs/src/js/classes/NativeJsHeaderProvider.md
Normal file
29
docs/src/js/classes/NativeJsHeaderProvider.md
Normal file
@@ -0,0 +1,29 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / NativeJsHeaderProvider
|
||||
|
||||
# Class: NativeJsHeaderProvider
|
||||
|
||||
JavaScript HeaderProvider implementation that wraps a JavaScript callback.
|
||||
This is the only native header provider - all header provider implementations
|
||||
should provide a JavaScript function that returns headers.
|
||||
|
||||
## Constructors
|
||||
|
||||
### new NativeJsHeaderProvider()
|
||||
|
||||
```ts
|
||||
new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
|
||||
```
|
||||
|
||||
Create a new JsHeaderProvider from a JavaScript callback
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **getHeadersCallback**
|
||||
|
||||
#### Returns
|
||||
|
||||
[`NativeJsHeaderProvider`](NativeJsHeaderProvider.md)
|
||||
108
docs/src/js/classes/OAuthHeaderProvider.md
Normal file
108
docs/src/js/classes/OAuthHeaderProvider.md
Normal file
@@ -0,0 +1,108 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / OAuthHeaderProvider
|
||||
|
||||
# Class: OAuthHeaderProvider
|
||||
|
||||
Example implementation: OAuth token provider with automatic refresh.
|
||||
|
||||
This is an example implementation showing how to manage OAuth tokens
|
||||
with automatic refresh when they expire.
|
||||
|
||||
## Example
|
||||
|
||||
```typescript
|
||||
async function fetchToken(): Promise<TokenResponse> {
|
||||
const response = await fetch("https://oauth.example.com/token", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
grant_type: "client_credentials",
|
||||
client_id: "your-client-id",
|
||||
client_secret: "your-client-secret"
|
||||
}),
|
||||
headers: { "Content-Type": "application/json" }
|
||||
});
|
||||
const data = await response.json();
|
||||
return {
|
||||
accessToken: data.access_token,
|
||||
expiresIn: data.expires_in
|
||||
};
|
||||
}
|
||||
|
||||
const provider = new OAuthHeaderProvider(fetchToken);
|
||||
const headers = provider.getHeaders();
|
||||
// Returns: {"authorization": "Bearer <your-token>"}
|
||||
```
|
||||
|
||||
## Extends
|
||||
|
||||
- [`HeaderProvider`](HeaderProvider.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
### new OAuthHeaderProvider()
|
||||
|
||||
```ts
|
||||
new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
|
||||
```
|
||||
|
||||
Initialize the OAuth provider.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **tokenFetcher**
|
||||
Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
|
||||
|
||||
* **refreshBufferSeconds**: `number` = `300`
|
||||
Seconds before expiry to refresh token. Default 300 (5 minutes).
|
||||
|
||||
#### Returns
|
||||
|
||||
[`OAuthHeaderProvider`](OAuthHeaderProvider.md)
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
|
||||
|
||||
## Methods
|
||||
|
||||
### getHeaders()
|
||||
|
||||
```ts
|
||||
getHeaders(): Record<string, string>
|
||||
```
|
||||
|
||||
Get OAuth headers, refreshing token if needed.
|
||||
Note: This is synchronous for now as the Rust implementation expects sync.
|
||||
In a real implementation, this would need to handle async properly.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Record`<`string`, `string`>
|
||||
|
||||
Headers with Bearer token authorization.
|
||||
|
||||
#### Throws
|
||||
|
||||
If unable to fetch or refresh token.
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
|
||||
|
||||
***
|
||||
|
||||
### refreshToken()
|
||||
|
||||
```ts
|
||||
refreshToken(): Promise<void>
|
||||
```
|
||||
|
||||
Manually refresh the token.
|
||||
Call this before using getHeaders() to ensure token is available.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
250
docs/src/js/classes/PermutationBuilder.md
Normal file
250
docs/src/js/classes/PermutationBuilder.md
Normal file
@@ -0,0 +1,250 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / PermutationBuilder
|
||||
|
||||
# Class: PermutationBuilder
|
||||
|
||||
A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
|
||||
|
||||
This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
|
||||
offering methods to configure data splits, shuffling, and filtering before executing
|
||||
the permutation to create a new table.
|
||||
|
||||
## Methods
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
execute(): Promise<Table>
|
||||
```
|
||||
|
||||
Execute the permutation and create the destination table.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
A Promise that resolves to the new Table instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
const permutationTable = await builder.execute();
|
||||
console.log(`Created table: ${permutationTable.name}`);
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### filter()
|
||||
|
||||
```ts
|
||||
filter(filter): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure filtering for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **filter**: `string`
|
||||
SQL filter expression
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.filter("age > 18 AND status = 'active'");
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### persist()
|
||||
|
||||
```ts
|
||||
persist(connection, tableName): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure the permutation to be persisted.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **connection**: [`Connection`](Connection.md)
|
||||
The connection to persist the permutation to
|
||||
|
||||
* **tableName**: `string`
|
||||
The name of the table to create
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.persist(connection, "permutation_table");
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### shuffle()
|
||||
|
||||
```ts
|
||||
shuffle(options): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure shuffling for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
|
||||
Configuration for shuffling
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
// Basic shuffle
|
||||
builder.shuffle({ seed: 42 });
|
||||
|
||||
// Shuffle with clump size
|
||||
builder.shuffle({ seed: 42, clumpSize: 10 });
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitCalculated()
|
||||
|
||||
```ts
|
||||
splitCalculated(options): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure calculated splits for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options**: [`SplitCalculatedOptions`](../interfaces/SplitCalculatedOptions.md)
|
||||
Configuration for calculated splitting
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.splitCalculated("user_id % 3");
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitHash()
|
||||
|
||||
```ts
|
||||
splitHash(options): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure hash-based splits for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
|
||||
Configuration for hash-based splitting
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.splitHash({
|
||||
columns: ["user_id"],
|
||||
splitWeights: [70, 30],
|
||||
discardWeight: 0
|
||||
});
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitRandom()
|
||||
|
||||
```ts
|
||||
splitRandom(options): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure random splits for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
|
||||
Configuration for random splitting
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
// Split by ratios
|
||||
builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
|
||||
|
||||
// Split by counts
|
||||
builder.splitRandom({ counts: [1000, 500], seed: 42 });
|
||||
|
||||
// Split with fixed size
|
||||
builder.splitRandom({ fixed: 100, seed: 42 });
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitSequential()
|
||||
|
||||
```ts
|
||||
splitSequential(options): PermutationBuilder
|
||||
```
|
||||
|
||||
Configure sequential splits for the permutation.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
|
||||
Configuration for sequential splitting
|
||||
|
||||
#### Returns
|
||||
|
||||
[`PermutationBuilder`](PermutationBuilder.md)
|
||||
|
||||
A new PermutationBuilder instance
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
// Split by ratios
|
||||
builder.splitSequential({ ratios: [0.8, 0.2] });
|
||||
|
||||
// Split by counts
|
||||
builder.splitSequential({ counts: [800, 200] });
|
||||
|
||||
// Split with fixed size
|
||||
builder.splitSequential({ fixed: 1000 });
|
||||
```
|
||||
@@ -14,7 +14,7 @@ A builder for LanceDB queries.
|
||||
|
||||
## Extends
|
||||
|
||||
- [`QueryBase`](QueryBase.md)<`NativeQuery`>
|
||||
- `StandardQueryBase`<`NativeQuery`>
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -26,7 +26,7 @@ protected inner: Query | Promise<Query>;
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
||||
`StandardQueryBase.inner`
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -73,14 +73,14 @@ AnalyzeExec verbose=true, metrics=[]
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||
`StandardQueryBase.analyzePlan`
|
||||
|
||||
***
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
protected execute(options?): RecordBatchIterator
|
||||
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
|
||||
```
|
||||
|
||||
Execute the query and return the results as an
|
||||
@@ -91,7 +91,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Returns
|
||||
|
||||
[`RecordBatchIterator`](RecordBatchIterator.md)
|
||||
`AsyncGenerator`<`RecordBatch`<`any`>, `void`, `unknown`>
|
||||
|
||||
#### See
|
||||
|
||||
@@ -107,7 +107,7 @@ single query)
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
||||
`StandardQueryBase.execute`
|
||||
|
||||
***
|
||||
|
||||
@@ -143,7 +143,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
||||
`StandardQueryBase.explainPlan`
|
||||
|
||||
***
|
||||
|
||||
@@ -164,7 +164,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
||||
`StandardQueryBase.fastSearch`
|
||||
|
||||
***
|
||||
|
||||
@@ -194,7 +194,7 @@ Use `where` instead
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
|
||||
`StandardQueryBase.filter`
|
||||
|
||||
***
|
||||
|
||||
@@ -216,7 +216,7 @@ fullTextSearch(query, options?): this
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
||||
`StandardQueryBase.fullTextSearch`
|
||||
|
||||
***
|
||||
|
||||
@@ -241,7 +241,7 @@ called then every valid row from the table will be returned.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
|
||||
`StandardQueryBase.limit`
|
||||
|
||||
***
|
||||
|
||||
@@ -325,6 +325,10 @@ nearestToText(query, columns?): Query
|
||||
offset(offset): this
|
||||
```
|
||||
|
||||
Set the number of rows to skip before returning results.
|
||||
|
||||
This is useful for pagination.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **offset**: `number`
|
||||
@@ -335,7 +339,30 @@ offset(offset): this
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
||||
`StandardQueryBase.offset`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
outputSchema(): Promise<Schema<any>>
|
||||
```
|
||||
|
||||
Returns the schema of the output that will be returned by this query.
|
||||
|
||||
This can be used to inspect the types and names of the columns that will be
|
||||
returned by the query before executing it.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Schema`<`any`>>
|
||||
|
||||
An Arrow Schema describing the output columns.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.outputSchema`
|
||||
|
||||
***
|
||||
|
||||
@@ -388,7 +415,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
||||
`StandardQueryBase.select`
|
||||
|
||||
***
|
||||
|
||||
@@ -410,7 +437,7 @@ Collect the results as an array of objects.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
||||
`StandardQueryBase.toArray`
|
||||
|
||||
***
|
||||
|
||||
@@ -436,7 +463,7 @@ ArrowTable.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
||||
`StandardQueryBase.toArrow`
|
||||
|
||||
***
|
||||
|
||||
@@ -471,7 +498,7 @@ on the filter column(s).
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
||||
`StandardQueryBase.where`
|
||||
|
||||
***
|
||||
|
||||
@@ -493,4 +520,4 @@ order to perform hybrid search.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||
`StandardQueryBase.withRowId`
|
||||
|
||||
@@ -15,12 +15,11 @@ Common methods supported by all query types
|
||||
|
||||
## Extended by
|
||||
|
||||
- [`Query`](Query.md)
|
||||
- [`VectorQuery`](VectorQuery.md)
|
||||
- [`TakeQuery`](TakeQuery.md)
|
||||
|
||||
## Type Parameters
|
||||
|
||||
• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery`
|
||||
• **NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery` \| `NativeTakeQuery`
|
||||
|
||||
## Implements
|
||||
|
||||
@@ -82,7 +81,7 @@ AnalyzeExec verbose=true, metrics=[]
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
protected execute(options?): RecordBatchIterator
|
||||
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
|
||||
```
|
||||
|
||||
Execute the query and return the results as an
|
||||
@@ -93,7 +92,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Returns
|
||||
|
||||
[`RecordBatchIterator`](RecordBatchIterator.md)
|
||||
`AsyncGenerator`<`RecordBatch`<`any`>, `void`, `unknown`>
|
||||
|
||||
#### See
|
||||
|
||||
@@ -141,101 +140,22 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
||||
|
||||
***
|
||||
|
||||
### fastSearch()
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
fastSearch(): this
|
||||
outputSchema(): Promise<Schema<any>>
|
||||
```
|
||||
|
||||
Skip searching un-indexed data. This can make search faster, but will miss
|
||||
any data that is not yet indexed.
|
||||
Returns the schema of the output that will be returned by this query.
|
||||
|
||||
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
This can be used to inspect the types and names of the columns that will be
|
||||
returned by the query before executing it.
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
`Promise`<`Schema`<`any`>>
|
||||
|
||||
***
|
||||
|
||||
### ~~filter()~~
|
||||
|
||||
```ts
|
||||
filter(predicate): this
|
||||
```
|
||||
|
||||
A filter statement to be applied to this query.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **predicate**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
#### See
|
||||
|
||||
where
|
||||
|
||||
#### Deprecated
|
||||
|
||||
Use `where` instead
|
||||
|
||||
***
|
||||
|
||||
### fullTextSearch()
|
||||
|
||||
```ts
|
||||
fullTextSearch(query, options?): this
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
|
||||
|
||||
* **options?**: `Partial`<[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
***
|
||||
|
||||
### limit()
|
||||
|
||||
```ts
|
||||
limit(limit): this
|
||||
```
|
||||
|
||||
Set the maximum number of results to return.
|
||||
|
||||
By default, a plain search has no limit. If this method is not
|
||||
called then every valid row from the table will be returned.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **limit**: `number`
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
***
|
||||
|
||||
### offset()
|
||||
|
||||
```ts
|
||||
offset(offset): this
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **offset**: `number`
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
An Arrow Schema describing the output columns.
|
||||
|
||||
***
|
||||
|
||||
@@ -328,37 +248,6 @@ ArrowTable.
|
||||
|
||||
***
|
||||
|
||||
### where()
|
||||
|
||||
```ts
|
||||
where(predicate): this
|
||||
```
|
||||
|
||||
A filter statement to be applied to this query.
|
||||
|
||||
The filter should be supplied as an SQL query string. For example:
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **predicate**: `string`
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
x > 10
|
||||
y > 0 AND y < 100
|
||||
x > 5 OR y = 'test'
|
||||
|
||||
Filtering performance can often be improved by creating a scalar index
|
||||
on the filter column(s).
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### withRowId()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RecordBatchIterator
|
||||
|
||||
# Class: RecordBatchIterator
|
||||
|
||||
## Implements
|
||||
|
||||
- `AsyncIterator`<`RecordBatch`>
|
||||
|
||||
## Constructors
|
||||
|
||||
### new RecordBatchIterator()
|
||||
|
||||
```ts
|
||||
new RecordBatchIterator(promise?): RecordBatchIterator
|
||||
```
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **promise?**: `Promise`<`RecordBatchIterator`>
|
||||
|
||||
#### Returns
|
||||
|
||||
[`RecordBatchIterator`](RecordBatchIterator.md)
|
||||
|
||||
## Methods
|
||||
|
||||
### next()
|
||||
|
||||
```ts
|
||||
next(): Promise<IteratorResult<RecordBatch<any>, any>>
|
||||
```
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`IteratorResult`<`RecordBatch`<`any`>, `any`>>
|
||||
|
||||
#### Implementation of
|
||||
|
||||
`AsyncIterator.next`
|
||||
@@ -9,7 +9,8 @@
|
||||
A session for managing caches and object stores across LanceDB operations.
|
||||
|
||||
Sessions allow you to configure cache sizes for index and metadata caches,
|
||||
which can significantly impact performance for large datasets.
|
||||
which can significantly impact memory use and performance. They can
|
||||
also be re-used across multiple connections to share the same cache state.
|
||||
|
||||
## Constructors
|
||||
|
||||
@@ -24,8 +25,11 @@ Create a new session with custom cache sizes.
|
||||
# Parameters
|
||||
|
||||
- `index_cache_size_bytes`: The size of the index cache in bytes.
|
||||
Index data is stored in memory in this cache to speed up queries.
|
||||
Defaults to 6GB if not specified.
|
||||
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
|
||||
The metadata cache stores file metadata and schema information in memory.
|
||||
This cache improves scan and write performance.
|
||||
Defaults to 1GB if not specified.
|
||||
|
||||
#### Parameters
|
||||
|
||||
70
docs/src/js/classes/StaticHeaderProvider.md
Normal file
70
docs/src/js/classes/StaticHeaderProvider.md
Normal file
@@ -0,0 +1,70 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / StaticHeaderProvider
|
||||
|
||||
# Class: StaticHeaderProvider
|
||||
|
||||
Example implementation: A simple header provider that returns static headers.
|
||||
|
||||
This is an example implementation showing how to create a HeaderProvider
|
||||
for cases where headers don't change during the session.
|
||||
|
||||
## Example
|
||||
|
||||
```typescript
|
||||
const provider = new StaticHeaderProvider({
|
||||
authorization: "Bearer my-token",
|
||||
"X-Custom-Header": "custom-value"
|
||||
});
|
||||
const headers = provider.getHeaders();
|
||||
// Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
|
||||
```
|
||||
|
||||
## Extends
|
||||
|
||||
- [`HeaderProvider`](HeaderProvider.md)
|
||||
|
||||
## Constructors
|
||||
|
||||
### new StaticHeaderProvider()
|
||||
|
||||
```ts
|
||||
new StaticHeaderProvider(headers): StaticHeaderProvider
|
||||
```
|
||||
|
||||
Initialize with static headers.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **headers**: `Record`<`string`, `string`>
|
||||
Headers to return for every request.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`StaticHeaderProvider`](StaticHeaderProvider.md)
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
|
||||
|
||||
## Methods
|
||||
|
||||
### getHeaders()
|
||||
|
||||
```ts
|
||||
getHeaders(): Record<string, string>
|
||||
```
|
||||
|
||||
Return the static headers.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Record`<`string`, `string`>
|
||||
|
||||
Copy of the static headers.
|
||||
|
||||
#### Overrides
|
||||
|
||||
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
|
||||
@@ -674,6 +674,48 @@ console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
|
||||
|
||||
***
|
||||
|
||||
### takeOffsets()
|
||||
|
||||
```ts
|
||||
abstract takeOffsets(offsets): TakeQuery
|
||||
```
|
||||
|
||||
Create a query that returns a subset of the rows in the table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **offsets**: `number`[]
|
||||
The offsets of the rows to return.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`TakeQuery`](TakeQuery.md)
|
||||
|
||||
A builder that can be used to parameterize the query.
|
||||
|
||||
***
|
||||
|
||||
### takeRowIds()
|
||||
|
||||
```ts
|
||||
abstract takeRowIds(rowIds): TakeQuery
|
||||
```
|
||||
|
||||
Create a query that returns a subset of the rows in the table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **rowIds**: `number`[]
|
||||
The row ids of the rows to return.
|
||||
|
||||
#### Returns
|
||||
|
||||
[`TakeQuery`](TakeQuery.md)
|
||||
|
||||
A builder that can be used to parameterize the query.
|
||||
|
||||
***
|
||||
|
||||
### toArrow()
|
||||
|
||||
```ts
|
||||
|
||||
288
docs/src/js/classes/TakeQuery.md
Normal file
288
docs/src/js/classes/TakeQuery.md
Normal file
@@ -0,0 +1,288 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / TakeQuery
|
||||
|
||||
# Class: TakeQuery
|
||||
|
||||
A query that returns a subset of the rows in the table.
|
||||
|
||||
## Extends
|
||||
|
||||
- [`QueryBase`](QueryBase.md)<`NativeTakeQuery`>
|
||||
|
||||
## Properties
|
||||
|
||||
### inner
|
||||
|
||||
```ts
|
||||
protected inner: TakeQuery | Promise<TakeQuery>;
|
||||
```
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
||||
|
||||
## Methods
|
||||
|
||||
### analyzePlan()
|
||||
|
||||
```ts
|
||||
analyzePlan(): Promise<string>
|
||||
```
|
||||
|
||||
Executes the query and returns the physical query plan annotated with runtime metrics.
|
||||
|
||||
This is useful for debugging and performance analysis, as it shows how the query was executed
|
||||
and includes metrics such as elapsed time, rows processed, and I/O statistics.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`string`>
|
||||
|
||||
A query execution plan with runtime metrics for each step.
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb"
|
||||
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
const table = await db.createTable("my_table", [
|
||||
{ vector: [1.1, 0.9], id: "1" },
|
||||
]);
|
||||
|
||||
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
|
||||
|
||||
Example output (with runtime metrics inlined):
|
||||
AnalyzeExec verbose=true, metrics=[]
|
||||
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
|
||||
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
|
||||
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
|
||||
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
|
||||
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
|
||||
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
|
||||
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
|
||||
```
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||
|
||||
***
|
||||
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
|
||||
```
|
||||
|
||||
Execute the query and return the results as an
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
`AsyncGenerator`<`RecordBatch`<`any`>, `void`, `unknown`>
|
||||
|
||||
#### See
|
||||
|
||||
- AsyncIterator
|
||||
of
|
||||
- RecordBatch.
|
||||
|
||||
By default, LanceDb will use many threads to calculate results and, when
|
||||
the result set is large, multiple batches will be processed at one time.
|
||||
This readahead is limited however and backpressure will be applied if this
|
||||
stream is consumed slowly (this constrains the maximum memory used by a
|
||||
single query)
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
||||
|
||||
***
|
||||
|
||||
### explainPlan()
|
||||
|
||||
```ts
|
||||
explainPlan(verbose): Promise<string>
|
||||
```
|
||||
|
||||
Generates an explanation of the query execution plan.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **verbose**: `boolean` = `false`
|
||||
If true, provides a more detailed explanation. Defaults to false.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`string`>
|
||||
|
||||
A Promise that resolves to a string containing the query execution plan explanation.
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
import * as lancedb from "@lancedb/lancedb"
|
||||
const db = await lancedb.connect("./.lancedb");
|
||||
const table = await db.createTable("my_table", [
|
||||
{ vector: [1.1, 0.9], id: "1" },
|
||||
]);
|
||||
const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
||||
```
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
outputSchema(): Promise<Schema<any>>
|
||||
```
|
||||
|
||||
Returns the schema of the output that will be returned by this query.
|
||||
|
||||
This can be used to inspect the types and names of the columns that will be
|
||||
returned by the query before executing it.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Schema`<`any`>>
|
||||
|
||||
An Arrow Schema describing the output columns.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`outputSchema`](QueryBase.md#outputschema)
|
||||
|
||||
***
|
||||
|
||||
### select()
|
||||
|
||||
```ts
|
||||
select(columns): this
|
||||
```
|
||||
|
||||
Return only the specified columns.
|
||||
|
||||
By default a query will return all columns from the table. However, this can have
|
||||
a very significant impact on latency. LanceDb stores data in a columnar fashion. This
|
||||
means we can finely tune our I/O to select exactly the columns we need.
|
||||
|
||||
As a best practice you should always limit queries to the columns that you need. If you
|
||||
pass in an array of column names then only those columns will be returned.
|
||||
|
||||
You can also use this method to create new "dynamic" columns based on your existing columns.
|
||||
For example, you may not care about "a" or "b" but instead simply want "a + b". This is often
|
||||
seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
|
||||
|
||||
To create dynamic columns you can pass in a Map<string, string>. A column will be returned
|
||||
for each entry in the map. The key provides the name of the column. The value is
|
||||
an SQL string used to specify how the column is calculated.
|
||||
|
||||
For example, an SQL query might state `SELECT a + b AS combined, c`. The equivalent
|
||||
input to this method would be:
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **columns**: `string` \| `string`[] \| `Record`<`string`, `string`> \| `Map`<`string`, `string`>
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
new Map([["combined", "a + b"], ["c", "c"]])
|
||||
|
||||
Columns will always be returned in the order given, even if that order is different than
|
||||
the order used when adding the data.
|
||||
|
||||
Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
|
||||
uses `Object.entries` which should preserve the insertion order of the object. However,
|
||||
object insertion order is easy to get wrong and `Map` is more foolproof.
|
||||
```
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
||||
|
||||
***
|
||||
|
||||
### toArray()
|
||||
|
||||
```ts
|
||||
toArray(options?): Promise<any[]>
|
||||
```
|
||||
|
||||
Collect the results as an array of objects.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`any`[]>
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
||||
|
||||
***
|
||||
|
||||
### toArrow()
|
||||
|
||||
```ts
|
||||
toArrow(options?): Promise<Table<any>>
|
||||
```
|
||||
|
||||
Collect the results as an Arrow
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **options?**: `Partial`<[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)>
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Table`<`any`>>
|
||||
|
||||
#### See
|
||||
|
||||
ArrowTable.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
||||
|
||||
***
|
||||
|
||||
### withRowId()
|
||||
|
||||
```ts
|
||||
withRowId(): this
|
||||
```
|
||||
|
||||
Whether to return the row id in the results.
|
||||
|
||||
This column can be used to match results between different queries. For
|
||||
example, to match results from a full text search and a vector search in
|
||||
order to perform hybrid search.
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||
@@ -16,7 +16,7 @@ This builder can be reused to execute the query many times.
|
||||
|
||||
## Extends
|
||||
|
||||
- [`QueryBase`](QueryBase.md)<`NativeVectorQuery`>
|
||||
- `StandardQueryBase`<`NativeVectorQuery`>
|
||||
|
||||
## Properties
|
||||
|
||||
@@ -28,7 +28,7 @@ protected inner: VectorQuery | Promise<VectorQuery>;
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
|
||||
`StandardQueryBase.inner`
|
||||
|
||||
## Methods
|
||||
|
||||
@@ -91,7 +91,7 @@ AnalyzeExec verbose=true, metrics=[]
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
|
||||
`StandardQueryBase.analyzePlan`
|
||||
|
||||
***
|
||||
|
||||
@@ -221,7 +221,7 @@ also increase the latency of your query. The default value is 1.5*limit.
|
||||
### execute()
|
||||
|
||||
```ts
|
||||
protected execute(options?): RecordBatchIterator
|
||||
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
|
||||
```
|
||||
|
||||
Execute the query and return the results as an
|
||||
@@ -232,7 +232,7 @@ Execute the query and return the results as an
|
||||
|
||||
#### Returns
|
||||
|
||||
[`RecordBatchIterator`](RecordBatchIterator.md)
|
||||
`AsyncGenerator`<`RecordBatch`<`any`>, `void`, `unknown`>
|
||||
|
||||
#### See
|
||||
|
||||
@@ -248,7 +248,7 @@ single query)
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
|
||||
`StandardQueryBase.execute`
|
||||
|
||||
***
|
||||
|
||||
@@ -284,7 +284,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
|
||||
`StandardQueryBase.explainPlan`
|
||||
|
||||
***
|
||||
|
||||
@@ -305,7 +305,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
|
||||
`StandardQueryBase.fastSearch`
|
||||
|
||||
***
|
||||
|
||||
@@ -335,7 +335,7 @@ Use `where` instead
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
|
||||
`StandardQueryBase.filter`
|
||||
|
||||
***
|
||||
|
||||
@@ -357,7 +357,7 @@ fullTextSearch(query, options?): this
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
|
||||
`StandardQueryBase.fullTextSearch`
|
||||
|
||||
***
|
||||
|
||||
@@ -382,7 +382,7 @@ called then every valid row from the table will be returned.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
|
||||
`StandardQueryBase.limit`
|
||||
|
||||
***
|
||||
|
||||
@@ -480,6 +480,10 @@ the minimum and maximum to the same value.
|
||||
offset(offset): this
|
||||
```
|
||||
|
||||
Set the number of rows to skip before returning results.
|
||||
|
||||
This is useful for pagination.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **offset**: `number`
|
||||
@@ -490,7 +494,30 @@ offset(offset): this
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
|
||||
`StandardQueryBase.offset`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
outputSchema(): Promise<Schema<any>>
|
||||
```
|
||||
|
||||
Returns the schema of the output that will be returned by this query.
|
||||
|
||||
This can be used to inspect the types and names of the columns that will be
|
||||
returned by the query before executing it.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`Schema`<`any`>>
|
||||
|
||||
An Arrow Schema describing the output columns.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.outputSchema`
|
||||
|
||||
***
|
||||
|
||||
@@ -637,7 +664,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
|
||||
`StandardQueryBase.select`
|
||||
|
||||
***
|
||||
|
||||
@@ -659,7 +686,7 @@ Collect the results as an array of objects.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
|
||||
`StandardQueryBase.toArray`
|
||||
|
||||
***
|
||||
|
||||
@@ -685,7 +712,7 @@ ArrowTable.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
|
||||
`StandardQueryBase.toArrow`
|
||||
|
||||
***
|
||||
|
||||
@@ -720,7 +747,7 @@ on the filter column(s).
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
|
||||
`StandardQueryBase.where`
|
||||
|
||||
***
|
||||
|
||||
@@ -742,4 +769,4 @@ order to perform hybrid search.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)
|
||||
`StandardQueryBase.withRowId`
|
||||
|
||||
19
docs/src/js/functions/RecordBatchIterator.md
Normal file
19
docs/src/js/functions/RecordBatchIterator.md
Normal file
@@ -0,0 +1,19 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RecordBatchIterator
|
||||
|
||||
# Function: RecordBatchIterator()
|
||||
|
||||
```ts
|
||||
function RecordBatchIterator(promisedInner): AsyncGenerator<RecordBatch<any>, void, unknown>
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
* **promisedInner**: `Promise`<`RecordBatchIterator`>
|
||||
|
||||
## Returns
|
||||
|
||||
`AsyncGenerator`<`RecordBatch`<`any`>, `void`, `unknown`>
|
||||
@@ -6,13 +6,14 @@
|
||||
|
||||
# Function: connect()
|
||||
|
||||
## connect(uri, options, session)
|
||||
## connect(uri, options, session, headerProvider)
|
||||
|
||||
```ts
|
||||
function connect(
|
||||
uri,
|
||||
options?,
|
||||
session?): Promise<Connection>
|
||||
session?,
|
||||
headerProvider?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect to a LanceDB instance at the given URI.
|
||||
@@ -34,6 +35,8 @@ Accepted formats:
|
||||
|
||||
* **session?**: [`Session`](../classes/Session.md)
|
||||
|
||||
* **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`<`string`, `string`> \| () => `Promise`<`Record`<`string`, `string`>>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
@@ -55,6 +58,18 @@ const conn = await connect(
|
||||
});
|
||||
```
|
||||
|
||||
Using with a header provider for per-request authentication:
|
||||
```ts
|
||||
const provider = new StaticHeaderProvider({
|
||||
"X-API-Key": "my-key"
|
||||
});
|
||||
const conn = await connectWithHeaderProvider(
|
||||
"db://host:port",
|
||||
options,
|
||||
provider
|
||||
);
|
||||
```
|
||||
|
||||
## connect(options)
|
||||
|
||||
```ts
|
||||
|
||||
@@ -13,7 +13,7 @@ function makeArrowTable(
|
||||
metadata?): ArrowTable
|
||||
```
|
||||
|
||||
An enhanced version of the makeTable function from Apache Arrow
|
||||
An enhanced version of the apache-arrow makeTable function from Apache Arrow
|
||||
that supports nested fields and embeddings columns.
|
||||
|
||||
(typically you do not need to call this function. It will be called automatically
|
||||
|
||||
34
docs/src/js/functions/permutationBuilder.md
Normal file
34
docs/src/js/functions/permutationBuilder.md
Normal file
@@ -0,0 +1,34 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / permutationBuilder
|
||||
|
||||
# Function: permutationBuilder()
|
||||
|
||||
```ts
|
||||
function permutationBuilder(table): PermutationBuilder
|
||||
```
|
||||
|
||||
Create a permutation builder for the given table.
|
||||
|
||||
## Parameters
|
||||
|
||||
* **table**: [`Table`](../classes/Table.md)
|
||||
The source table to create a permutation from
|
||||
|
||||
## Returns
|
||||
|
||||
[`PermutationBuilder`](../classes/PermutationBuilder.md)
|
||||
|
||||
A PermutationBuilder instance
|
||||
|
||||
## Example
|
||||
|
||||
```ts
|
||||
const builder = permutationBuilder(sourceTable, "training_data")
|
||||
.splitRandom({ ratios: [0.8, 0.2], seed: 42 })
|
||||
.shuffle({ seed: 123 });
|
||||
|
||||
const trainingTable = await builder.execute();
|
||||
```
|
||||
@@ -20,19 +20,24 @@
|
||||
- [BooleanQuery](classes/BooleanQuery.md)
|
||||
- [BoostQuery](classes/BoostQuery.md)
|
||||
- [Connection](classes/Connection.md)
|
||||
- [HeaderProvider](classes/HeaderProvider.md)
|
||||
- [Index](classes/Index.md)
|
||||
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
|
||||
- [MatchQuery](classes/MatchQuery.md)
|
||||
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
|
||||
- [MultiMatchQuery](classes/MultiMatchQuery.md)
|
||||
- [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
|
||||
- [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
|
||||
- [PermutationBuilder](classes/PermutationBuilder.md)
|
||||
- [PhraseQuery](classes/PhraseQuery.md)
|
||||
- [Query](classes/Query.md)
|
||||
- [QueryBase](classes/QueryBase.md)
|
||||
- [RecordBatchIterator](classes/RecordBatchIterator.md)
|
||||
- [Session](classes/Session.md)
|
||||
- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
|
||||
- [Table](classes/Table.md)
|
||||
- [TagContents](classes/TagContents.md)
|
||||
- [Tags](classes/Tags.md)
|
||||
- [TakeQuery](classes/TakeQuery.md)
|
||||
- [VectorColumnOptions](classes/VectorColumnOptions.md)
|
||||
- [VectorQuery](classes/VectorQuery.md)
|
||||
|
||||
@@ -63,6 +68,7 @@
|
||||
- [IndexStatistics](interfaces/IndexStatistics.md)
|
||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||
- [IvfRqOptions](interfaces/IvfRqOptions.md)
|
||||
- [MergeResult](interfaces/MergeResult.md)
|
||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||
@@ -70,9 +76,16 @@
|
||||
- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
|
||||
- [RemovalStats](interfaces/RemovalStats.md)
|
||||
- [RetryConfig](interfaces/RetryConfig.md)
|
||||
- [ShuffleOptions](interfaces/ShuffleOptions.md)
|
||||
- [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
|
||||
- [SplitHashOptions](interfaces/SplitHashOptions.md)
|
||||
- [SplitRandomOptions](interfaces/SplitRandomOptions.md)
|
||||
- [SplitSequentialOptions](interfaces/SplitSequentialOptions.md)
|
||||
- [TableNamesOptions](interfaces/TableNamesOptions.md)
|
||||
- [TableStatistics](interfaces/TableStatistics.md)
|
||||
- [TimeoutConfig](interfaces/TimeoutConfig.md)
|
||||
- [TlsConfig](interfaces/TlsConfig.md)
|
||||
- [TokenResponse](interfaces/TokenResponse.md)
|
||||
- [UpdateOptions](interfaces/UpdateOptions.md)
|
||||
- [UpdateResult](interfaces/UpdateResult.md)
|
||||
- [Version](interfaces/Version.md)
|
||||
@@ -92,6 +105,8 @@
|
||||
|
||||
## Functions
|
||||
|
||||
- [RecordBatchIterator](functions/RecordBatchIterator.md)
|
||||
- [connect](functions/connect.md)
|
||||
- [makeArrowTable](functions/makeArrowTable.md)
|
||||
- [packBits](functions/packBits.md)
|
||||
- [permutationBuilder](functions/permutationBuilder.md)
|
||||
|
||||
@@ -16,6 +16,14 @@ optional extraHeaders: Record<string, string>;
|
||||
|
||||
***
|
||||
|
||||
### idDelimiter?
|
||||
|
||||
```ts
|
||||
optional idDelimiter: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### retryConfig?
|
||||
|
||||
```ts
|
||||
@@ -32,6 +40,14 @@ optional timeoutConfig: TimeoutConfig;
|
||||
|
||||
***
|
||||
|
||||
### tlsConfig?
|
||||
|
||||
```ts
|
||||
optional tlsConfig: TlsConfig;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### userAgent?
|
||||
|
||||
```ts
|
||||
|
||||
@@ -26,6 +26,18 @@ will be used to determine the most useful kind of index to create.
|
||||
|
||||
***
|
||||
|
||||
### name?
|
||||
|
||||
```ts
|
||||
optional name: string;
|
||||
```
|
||||
|
||||
Optional custom name for the index.
|
||||
|
||||
If not provided, a default name will be generated based on the column name.
|
||||
|
||||
***
|
||||
|
||||
### replace?
|
||||
|
||||
```ts
|
||||
@@ -42,8 +54,27 @@ The default is true
|
||||
|
||||
***
|
||||
|
||||
### train?
|
||||
|
||||
```ts
|
||||
optional train: boolean;
|
||||
```
|
||||
|
||||
Whether to train the index with existing data.
|
||||
|
||||
If true (default), the index will be trained with existing data in the table.
|
||||
If false, the index will be created empty and populated as new data is added.
|
||||
|
||||
Note: This option is only supported for scalar indices. Vector indices always train.
|
||||
|
||||
***
|
||||
|
||||
### waitTimeoutSeconds?
|
||||
|
||||
```ts
|
||||
optional waitTimeoutSeconds: number;
|
||||
```
|
||||
|
||||
Timeout in seconds to wait for index creation to complete.
|
||||
|
||||
If not specified, the method will return immediately after starting the index creation.
|
||||
|
||||
101
docs/src/js/interfaces/IvfRqOptions.md
Normal file
101
docs/src/js/interfaces/IvfRqOptions.md
Normal file
@@ -0,0 +1,101 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / IvfRqOptions
|
||||
|
||||
# Interface: IvfRqOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### distanceType?
|
||||
|
||||
```ts
|
||||
optional distanceType: "l2" | "cosine" | "dot";
|
||||
```
|
||||
|
||||
Distance type to use to build the index.
|
||||
|
||||
Default value is "l2".
|
||||
|
||||
This is used when training the index to calculate the IVF partitions
|
||||
(vectors are grouped in partitions with similar vectors according to this
|
||||
distance type) and during quantization.
|
||||
|
||||
The distance type used to train an index MUST match the distance type used
|
||||
to search the index. Failure to do so will yield inaccurate results.
|
||||
|
||||
The following distance types are available:
|
||||
|
||||
"l2" - Euclidean distance.
|
||||
"cosine" - Cosine distance.
|
||||
"dot" - Dot product.
|
||||
|
||||
***
|
||||
|
||||
### maxIterations?
|
||||
|
||||
```ts
|
||||
optional maxIterations: number;
|
||||
```
|
||||
|
||||
Max iterations to train IVF kmeans.
|
||||
|
||||
When training an IVF index we use kmeans to calculate the partitions. This parameter
|
||||
controls how many iterations of kmeans to run.
|
||||
|
||||
The default value is 50.
|
||||
|
||||
***
|
||||
|
||||
### numBits?
|
||||
|
||||
```ts
|
||||
optional numBits: number;
|
||||
```
|
||||
|
||||
Number of bits per dimension for residual quantization.
|
||||
|
||||
This value controls how much each residual component is compressed. The more
|
||||
bits, the more accurate the index will be but the slower search. Typical values
|
||||
are small integers; the default is 1 bit per dimension.
|
||||
|
||||
***
|
||||
|
||||
### numPartitions?
|
||||
|
||||
```ts
|
||||
optional numPartitions: number;
|
||||
```
|
||||
|
||||
The number of IVF partitions to create.
|
||||
|
||||
This value should generally scale with the number of rows in the dataset.
|
||||
By default the number of partitions is the square root of the number of
|
||||
rows.
|
||||
|
||||
If this value is too large then the first part of the search (picking the
|
||||
right partition) will be slow. If this value is too small then the second
|
||||
part of the search (searching within a partition) will be slow.
|
||||
|
||||
***
|
||||
|
||||
### sampleRate?
|
||||
|
||||
```ts
|
||||
optional sampleRate: number;
|
||||
```
|
||||
|
||||
The number of vectors, per partition, to sample when training IVF kmeans.
|
||||
|
||||
When an IVF index is trained, we need to calculate partitions. These are groups
|
||||
of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
||||
|
||||
Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
||||
random sample of the data. This parameter controls the size of the sample. The total
|
||||
number of vectors used to train the index is `sample_rate * num_partitions`.
|
||||
|
||||
Increasing this value might improve the quality of the index but in most cases the
|
||||
default should be sufficient.
|
||||
|
||||
The default value is 256.
|
||||
23
docs/src/js/interfaces/ShuffleOptions.md
Normal file
23
docs/src/js/interfaces/ShuffleOptions.md
Normal file
@@ -0,0 +1,23 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ShuffleOptions
|
||||
|
||||
# Interface: ShuffleOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### clumpSize?
|
||||
|
||||
```ts
|
||||
optional clumpSize: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### seed?
|
||||
|
||||
```ts
|
||||
optional seed: number;
|
||||
```
|
||||
23
docs/src/js/interfaces/SplitCalculatedOptions.md
Normal file
23
docs/src/js/interfaces/SplitCalculatedOptions.md
Normal file
@@ -0,0 +1,23 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / SplitCalculatedOptions
|
||||
|
||||
# Interface: SplitCalculatedOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### calculation
|
||||
|
||||
```ts
|
||||
calculation: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitNames?
|
||||
|
||||
```ts
|
||||
optional splitNames: string[];
|
||||
```
|
||||
39
docs/src/js/interfaces/SplitHashOptions.md
Normal file
39
docs/src/js/interfaces/SplitHashOptions.md
Normal file
@@ -0,0 +1,39 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / SplitHashOptions
|
||||
|
||||
# Interface: SplitHashOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### columns
|
||||
|
||||
```ts
|
||||
columns: string[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### discardWeight?
|
||||
|
||||
```ts
|
||||
optional discardWeight: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitNames?
|
||||
|
||||
```ts
|
||||
optional splitNames: string[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitWeights
|
||||
|
||||
```ts
|
||||
splitWeights: number[];
|
||||
```
|
||||
47
docs/src/js/interfaces/SplitRandomOptions.md
Normal file
47
docs/src/js/interfaces/SplitRandomOptions.md
Normal file
@@ -0,0 +1,47 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / SplitRandomOptions
|
||||
|
||||
# Interface: SplitRandomOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### counts?
|
||||
|
||||
```ts
|
||||
optional counts: number[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### fixed?
|
||||
|
||||
```ts
|
||||
optional fixed: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### ratios?
|
||||
|
||||
```ts
|
||||
optional ratios: number[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### seed?
|
||||
|
||||
```ts
|
||||
optional seed: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitNames?
|
||||
|
||||
```ts
|
||||
optional splitNames: string[];
|
||||
```
|
||||
39
docs/src/js/interfaces/SplitSequentialOptions.md
Normal file
39
docs/src/js/interfaces/SplitSequentialOptions.md
Normal file
@@ -0,0 +1,39 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / SplitSequentialOptions
|
||||
|
||||
# Interface: SplitSequentialOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### counts?
|
||||
|
||||
```ts
|
||||
optional counts: number[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### fixed?
|
||||
|
||||
```ts
|
||||
optional fixed: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### ratios?
|
||||
|
||||
```ts
|
||||
optional ratios: number[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### splitNames?
|
||||
|
||||
```ts
|
||||
optional splitNames: string[];
|
||||
```
|
||||
@@ -44,3 +44,17 @@ optional readTimeout: number;
|
||||
The timeout for reading data from the server in seconds. Default is 300
|
||||
seconds (5 minutes). This can also be set via the environment variable
|
||||
`LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
|
||||
|
||||
***
|
||||
|
||||
### timeout?
|
||||
|
||||
```ts
|
||||
optional timeout: number;
|
||||
```
|
||||
|
||||
The overall timeout for the entire request in seconds. This includes
|
||||
connection, send, and read time. If the entire request doesn't complete
|
||||
within this time, it will fail. Default is None (no overall timeout).
|
||||
This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
|
||||
as an integer number of seconds.
|
||||
|
||||
49
docs/src/js/interfaces/TlsConfig.md
Normal file
49
docs/src/js/interfaces/TlsConfig.md
Normal file
@@ -0,0 +1,49 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / TlsConfig
|
||||
|
||||
# Interface: TlsConfig
|
||||
|
||||
TLS/mTLS configuration for the remote HTTP client.
|
||||
|
||||
## Properties
|
||||
|
||||
### assertHostname?
|
||||
|
||||
```ts
|
||||
optional assertHostname: boolean;
|
||||
```
|
||||
|
||||
Whether to verify the hostname in the server's certificate.
|
||||
|
||||
***
|
||||
|
||||
### certFile?
|
||||
|
||||
```ts
|
||||
optional certFile: string;
|
||||
```
|
||||
|
||||
Path to the client certificate file (PEM format) for mTLS authentication.
|
||||
|
||||
***
|
||||
|
||||
### keyFile?
|
||||
|
||||
```ts
|
||||
optional keyFile: string;
|
||||
```
|
||||
|
||||
Path to the client private key file (PEM format) for mTLS authentication.
|
||||
|
||||
***
|
||||
|
||||
### sslCaCert?
|
||||
|
||||
```ts
|
||||
optional sslCaCert: string;
|
||||
```
|
||||
|
||||
Path to the CA certificate file (PEM format) for server verification.
|
||||
25
docs/src/js/interfaces/TokenResponse.md
Normal file
25
docs/src/js/interfaces/TokenResponse.md
Normal file
@@ -0,0 +1,25 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / TokenResponse
|
||||
|
||||
# Interface: TokenResponse
|
||||
|
||||
Token response from OAuth provider.
|
||||
|
||||
## Properties
|
||||
|
||||
### accessToken
|
||||
|
||||
```ts
|
||||
accessToken: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### expiresIn?
|
||||
|
||||
```ts
|
||||
optional expiresIn: number;
|
||||
```
|
||||
@@ -15,7 +15,7 @@ publish = false
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
lancedb = { path = "../../../rust/lancedb" }
|
||||
lancedb = { path = "../../../rust/lancedb", default-features = false }
|
||||
lance = { workspace = true }
|
||||
arrow = { workspace = true, features = ["ffi"] }
|
||||
arrow-schema.workspace = true
|
||||
@@ -25,3 +25,6 @@ snafu.workspace = true
|
||||
lazy_static.workspace = true
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
|
||||
[features]
|
||||
default = ["lancedb/default"]
|
||||
|
||||
@@ -51,8 +51,11 @@ pub enum Error {
|
||||
DatasetAlreadyExists { uri: String, location: Location },
|
||||
#[snafu(display("Table '{name}' already exists"))]
|
||||
TableAlreadyExists { name: String },
|
||||
#[snafu(display("Table '{name}' was not found"))]
|
||||
TableNotFound { name: String },
|
||||
#[snafu(display("Table '{name}' was not found: {source}"))]
|
||||
TableNotFound {
|
||||
name: String,
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
#[snafu(display("Invalid table name '{name}': {reason}"))]
|
||||
InvalidTableName { name: String, reason: String },
|
||||
#[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
|
||||
@@ -191,7 +194,7 @@ impl From<lancedb::Error> for Error {
|
||||
message,
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
lancedb::Error::TableNotFound { name } => Self::TableNotFound { name },
|
||||
lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source },
|
||||
lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
|
||||
lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
|
||||
Self::EmbeddingFunctionNotFound {
|
||||
|
||||
@@ -16,6 +16,7 @@ pub trait JNIEnvExt {
|
||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||
|
||||
/// Get strings from Java List<String> object.
|
||||
#[allow(dead_code)]
|
||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||
|
||||
/// Get strings from Java String[] object.
|
||||
|
||||
@@ -6,6 +6,7 @@ use jni::JNIEnv;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJObject<T> {
|
||||
fn extract(&self) -> Result<T>;
|
||||
}
|
||||
@@ -39,6 +40,7 @@ impl FromJObject<f64> for JObject<'_> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJString {
|
||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||
}
|
||||
@@ -66,6 +68,7 @@ pub trait JMapExt {
|
||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||
where
|
||||
for<'a> JObject<'a>: FromJObject<T>,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.21.2-final.0</version>
|
||||
<version>0.22.3-beta.5</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.21.2-final.0</version>
|
||||
<version>0.22.3-beta.5</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.21.2-final.0</version>
|
||||
<version>0.22.3-beta.5</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
|
||||
13
nodejs/AGENTS.md
Normal file
13
nodejs/AGENTS.md
Normal file
@@ -0,0 +1,13 @@
|
||||
These are the typescript bindings of LanceDB.
|
||||
The core Rust library is in the `../rust/lancedb` directory, the rust binding
|
||||
code is in the `src/` directory and the typescript bindings are in
|
||||
the `lancedb/` directory.
|
||||
|
||||
Whenever you change the Rust code, you will need to recompile: `npm run build`.
|
||||
|
||||
Common commands:
|
||||
* Build: `npm run build`
|
||||
* Lint: `npm run lint`
|
||||
* Fix lints: `npm run lint-fix`
|
||||
* Test: `npm test`
|
||||
* Run single test file: `npm test __test__/arrow.test.ts`
|
||||
@@ -1,13 +0,0 @@
|
||||
These are the typescript bindings of LanceDB.
|
||||
The core Rust library is in the `../rust/lancedb` directory, the rust binding
|
||||
code is in the `src/` directory and the typescript bindings are in
|
||||
the `lancedb/` directory.
|
||||
|
||||
Whenever you change the Rust code, you will need to recompile: `npm run build`.
|
||||
|
||||
Common commands:
|
||||
* Build: `npm run build`
|
||||
* Lint: `npm run lint`
|
||||
* Fix lints: `npm run lint-fix`
|
||||
* Test: `npm test`
|
||||
* Run single test file: `npm test __test__/arrow.test.ts`
|
||||
1
nodejs/CLAUDE.md
Symbolic link
1
nodejs/CLAUDE.md
Symbolic link
@@ -0,0 +1 @@
|
||||
AGENTS.md
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.21.2"
|
||||
version = "0.22.3-beta.5"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
@@ -18,7 +18,7 @@ arrow-array.workspace = true
|
||||
arrow-schema.workspace = true
|
||||
env_logger.workspace = true
|
||||
futures.workspace = true
|
||||
lancedb = { path = "../rust/lancedb" }
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
napi = { version = "2.16.8", default-features = false, features = [
|
||||
"napi9",
|
||||
"async"
|
||||
@@ -36,6 +36,6 @@ aws-lc-rs = "=1.13.0"
|
||||
napi-build = "2.1"
|
||||
|
||||
[features]
|
||||
default = ["remote"]
|
||||
default = ["remote", "lancedb/default"]
|
||||
fp16kernels = ["lancedb/fp16kernels"]
|
||||
remote = ["lancedb/remote"]
|
||||
|
||||
@@ -1,17 +1,5 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import {
|
||||
Bool,
|
||||
Field,
|
||||
Int32,
|
||||
List,
|
||||
Schema,
|
||||
Struct,
|
||||
Uint8,
|
||||
Utf8,
|
||||
} from "apache-arrow";
|
||||
|
||||
import * as arrow15 from "apache-arrow-15";
|
||||
import * as arrow16 from "apache-arrow-16";
|
||||
import * as arrow17 from "apache-arrow-17";
|
||||
@@ -25,11 +13,9 @@ import {
|
||||
fromTableToBuffer,
|
||||
makeArrowTable,
|
||||
makeEmptyTable,
|
||||
tableFromIPC,
|
||||
} from "../lancedb/arrow";
|
||||
import {
|
||||
EmbeddingFunction,
|
||||
FieldOptions,
|
||||
FunctionOptions,
|
||||
} from "../lancedb/embedding/embedding_function";
|
||||
import { EmbeddingFunctionConfig } from "../lancedb/embedding/registry";
|
||||
@@ -1008,5 +994,64 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(result).toEqual(null);
|
||||
});
|
||||
});
|
||||
|
||||
describe("boolean null handling", function () {
|
||||
it("should handle null values in nullable boolean fields", () => {
|
||||
const { makeArrowTable } = require("../lancedb/arrow");
|
||||
const schema = new Schema([new Field("test", new arrow.Bool(), true)]);
|
||||
|
||||
// Test with all null values
|
||||
const data = [{ test: null }];
|
||||
const table = makeArrowTable(data, { schema });
|
||||
|
||||
expect(table.numRows).toBe(1);
|
||||
expect(table.schema.names).toEqual(["test"]);
|
||||
expect(table.getChild("test")!.get(0)).toBeNull();
|
||||
});
|
||||
|
||||
it("should handle mixed null and non-null boolean values", () => {
|
||||
const { makeArrowTable } = require("../lancedb/arrow");
|
||||
const schema = new Schema([new Field("test", new Bool(), true)]);
|
||||
|
||||
// Test with mixed values
|
||||
const data = [{ test: true }, { test: null }, { test: false }];
|
||||
const table = makeArrowTable(data, { schema });
|
||||
|
||||
expect(table.numRows).toBe(3);
|
||||
expect(table.getChild("test")!.get(0)).toBe(true);
|
||||
expect(table.getChild("test")!.get(1)).toBeNull();
|
||||
expect(table.getChild("test")!.get(2)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// Test for the undefined values bug fix
|
||||
describe("undefined values handling", () => {
|
||||
it("should handle mixed undefined and actual values", () => {
|
||||
const schema = new Schema([
|
||||
new Field("text", new Utf8(), true), // nullable
|
||||
new Field("number", new Int32(), true), // nullable
|
||||
new Field("bool", new Bool(), true), // nullable
|
||||
]);
|
||||
|
||||
const data = [
|
||||
{ text: undefined, number: 42, bool: true },
|
||||
{ text: "hello", number: undefined, bool: false },
|
||||
{ text: "world", number: 123, bool: undefined },
|
||||
];
|
||||
const table = makeArrowTable(data, { schema });
|
||||
|
||||
const result = table.toArray();
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[0].text).toBe(null);
|
||||
expect(result[0].number).toBe(42);
|
||||
expect(result[0].bool).toBe(true);
|
||||
expect(result[1].text).toBe("hello");
|
||||
expect(result[1].number).toBe(null);
|
||||
expect(result[1].bool).toBe(false);
|
||||
expect(result[2].text).toBe("world");
|
||||
expect(result[2].number).toBe(123);
|
||||
expect(result[2].bool).toBe(null);
|
||||
});
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -203,3 +203,106 @@ describe("given a connection", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("clone table functionality", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let db: Connection;
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
db = await connect(tmpDir.name);
|
||||
});
|
||||
afterEach(() => tmpDir.removeCallback());
|
||||
|
||||
it("should clone a table with latest version (default behavior)", async () => {
|
||||
// Create source table with some data
|
||||
const data = [
|
||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
||||
];
|
||||
const sourceTable = await db.createTable("source", data);
|
||||
|
||||
// Add more data to create a new version
|
||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
||||
await sourceTable.add(moreData);
|
||||
|
||||
// Clone the table (should get latest version with 3 rows)
|
||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
||||
const clonedTable = await db.cloneTable("cloned", sourceUri);
|
||||
|
||||
// Verify cloned table has all 3 rows
|
||||
expect(await clonedTable.countRows()).toBe(3);
|
||||
expect((await db.tableNames()).includes("cloned")).toBe(true);
|
||||
});
|
||||
|
||||
it("should clone a table from a specific version", async () => {
|
||||
// Create source table with initial data
|
||||
const data = [
|
||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
||||
];
|
||||
const sourceTable = await db.createTable("source", data);
|
||||
|
||||
// Get the initial version
|
||||
const initialVersion = await sourceTable.version();
|
||||
|
||||
// Add more data to create a new version
|
||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
||||
await sourceTable.add(moreData);
|
||||
|
||||
// Verify source now has 3 rows
|
||||
expect(await sourceTable.countRows()).toBe(3);
|
||||
|
||||
// Clone from the initial version (should have only 2 rows)
|
||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
||||
const clonedTable = await db.cloneTable("cloned", sourceUri, {
|
||||
sourceVersion: initialVersion,
|
||||
});
|
||||
|
||||
// Verify cloned table has only the initial 2 rows
|
||||
expect(await clonedTable.countRows()).toBe(2);
|
||||
});
|
||||
|
||||
it("should clone a table from a tagged version", async () => {
|
||||
// Create source table with initial data
|
||||
const data = [
|
||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
||||
];
|
||||
const sourceTable = await db.createTable("source", data);
|
||||
|
||||
// Create a tag for the current version
|
||||
const tags = await sourceTable.tags();
|
||||
await tags.create("v1.0", await sourceTable.version());
|
||||
|
||||
// Add more data after the tag
|
||||
const moreData = [{ id: 3, text: "test", vector: [5.0, 6.0] }];
|
||||
await sourceTable.add(moreData);
|
||||
|
||||
// Verify source now has 3 rows
|
||||
expect(await sourceTable.countRows()).toBe(3);
|
||||
|
||||
// Clone from the tagged version (should have only 2 rows)
|
||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
||||
const clonedTable = await db.cloneTable("cloned", sourceUri, {
|
||||
sourceTag: "v1.0",
|
||||
});
|
||||
|
||||
// Verify cloned table has only the tagged version's 2 rows
|
||||
expect(await clonedTable.countRows()).toBe(2);
|
||||
});
|
||||
|
||||
it("should fail when attempting deep clone", async () => {
|
||||
// Create source table with some data
|
||||
const data = [
|
||||
{ id: 1, text: "hello", vector: [1.0, 2.0] },
|
||||
{ id: 2, text: "world", vector: [3.0, 4.0] },
|
||||
];
|
||||
await db.createTable("source", data);
|
||||
|
||||
// Try to create a deep clone (should fail)
|
||||
const sourceUri = `${tmpDir.name}/source.lance`;
|
||||
await expect(
|
||||
db.cloneTable("cloned", sourceUri, { isShallow: false }),
|
||||
).rejects.toThrow("Deep clone is not yet implemented");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -256,6 +256,60 @@ describe("embedding functions", () => {
|
||||
expect(actual).toHaveProperty("text");
|
||||
});
|
||||
|
||||
it("should handle undefined vector field with embedding function correctly", async () => {
|
||||
@register("undefined_test")
|
||||
class MockEmbeddingFunction extends EmbeddingFunction<string> {
|
||||
ndims() {
|
||||
return 3;
|
||||
}
|
||||
embeddingDataType(): Float {
|
||||
return new Float32();
|
||||
}
|
||||
async computeQueryEmbeddings(_data: string) {
|
||||
return [1, 2, 3];
|
||||
}
|
||||
async computeSourceEmbeddings(data: string[]) {
|
||||
return Array.from({ length: data.length }).fill([
|
||||
1, 2, 3,
|
||||
]) as number[][];
|
||||
}
|
||||
}
|
||||
const func = getRegistry()
|
||||
.get<MockEmbeddingFunction>("undefined_test")!
|
||||
.create();
|
||||
const schema = new Schema([
|
||||
new Field("text", new Utf8(), true),
|
||||
new Field(
|
||||
"vector",
|
||||
new FixedSizeList(3, new Field("item", new Float32(), true)),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
const db = await connect(tmpDir.name);
|
||||
const table = await db.createEmptyTable("test_undefined", schema, {
|
||||
embeddingFunction: {
|
||||
function: func,
|
||||
sourceColumn: "text",
|
||||
vectorColumn: "vector",
|
||||
},
|
||||
});
|
||||
|
||||
// Test that undefined, null, and omitted vector fields all work
|
||||
await table.add([{ text: "test1", vector: undefined }]);
|
||||
await table.add([{ text: "test2", vector: null }]);
|
||||
await table.add([{ text: "test3" }]);
|
||||
|
||||
const rows = await table.query().toArray();
|
||||
expect(rows.length).toBe(3);
|
||||
|
||||
// All rows should have vectors computed by the embedding function
|
||||
for (const row of rows) {
|
||||
expect(row.vector).toBeDefined();
|
||||
expect(JSON.parse(JSON.stringify(row.vector))).toEqual([1, 2, 3]);
|
||||
}
|
||||
});
|
||||
|
||||
test.each([new Float16(), new Float32(), new Float64()])(
|
||||
"should be able to provide manual embeddings with multiple float datatype",
|
||||
async (floatType) => {
|
||||
|
||||
371
nodejs/__test__/permutation.test.ts
Normal file
371
nodejs/__test__/permutation.test.ts
Normal file
@@ -0,0 +1,371 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import * as tmp from "tmp";
|
||||
import { Table, connect, permutationBuilder } from "../lancedb";
|
||||
import { makeArrowTable } from "../lancedb/arrow";
|
||||
|
||||
describe("PermutationBuilder", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let table: Table;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
const db = await connect(tmpDir.name);
|
||||
|
||||
// Create test data
|
||||
const data = makeArrowTable(
|
||||
[
|
||||
{ id: 1, value: 10 },
|
||||
{ id: 2, value: 20 },
|
||||
{ id: 3, value: 30 },
|
||||
{ id: 4, value: 40 },
|
||||
{ id: 5, value: 50 },
|
||||
{ id: 6, value: 60 },
|
||||
{ id: 7, value: 70 },
|
||||
{ id: 8, value: 80 },
|
||||
{ id: 9, value: 90 },
|
||||
{ id: 10, value: 100 },
|
||||
],
|
||||
{ vectorColumns: {} },
|
||||
);
|
||||
|
||||
table = await db.createTable("test_table", data);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
tmpDir.removeCallback();
|
||||
});
|
||||
|
||||
test("should create permutation builder", () => {
|
||||
const builder = permutationBuilder(table);
|
||||
expect(builder).toBeDefined();
|
||||
});
|
||||
|
||||
test("should execute basic permutation", async () => {
|
||||
const builder = permutationBuilder(table);
|
||||
const permutationTable = await builder.execute();
|
||||
|
||||
expect(permutationTable).toBeDefined();
|
||||
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with random splits", async () => {
|
||||
const builder = permutationBuilder(table).splitRandom({
|
||||
ratios: [1.0],
|
||||
seed: 42,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with percentage splits", async () => {
|
||||
const builder = permutationBuilder(table).splitRandom({
|
||||
ratios: [0.3, 0.7],
|
||||
seed: 42,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Check split distribution
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with count splits", async () => {
|
||||
const builder = permutationBuilder(table).splitRandom({
|
||||
counts: [3, 7],
|
||||
seed: 42,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Check split distribution
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBe(3);
|
||||
expect(split1Count).toBe(7);
|
||||
});
|
||||
|
||||
test("should create permutation with hash splits", async () => {
|
||||
const builder = permutationBuilder(table).splitHash({
|
||||
columns: ["id"],
|
||||
splitWeights: [50, 50],
|
||||
discardWeight: 0,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Check that splits exist
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with sequential splits", async () => {
|
||||
const builder = permutationBuilder(table).splitSequential({
|
||||
ratios: [0.5, 0.5],
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Check split distribution - sequential should give exactly 5 and 5
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBe(5);
|
||||
expect(split1Count).toBe(5);
|
||||
});
|
||||
|
||||
test("should create permutation with calculated splits", async () => {
|
||||
const builder = permutationBuilder(table).splitCalculated({
|
||||
calculation: "id % 2",
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Check split distribution
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with shuffle", async () => {
|
||||
const builder = permutationBuilder(table).shuffle({
|
||||
seed: 42,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with shuffle and clump size", async () => {
|
||||
const builder = permutationBuilder(table).shuffle({
|
||||
seed: 42,
|
||||
clumpSize: 2,
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
});
|
||||
|
||||
test("should create permutation with filter", async () => {
|
||||
const builder = permutationBuilder(table).filter("value > 50");
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(5); // Values 60, 70, 80, 90, 100
|
||||
});
|
||||
|
||||
test("should chain multiple operations", async () => {
|
||||
const builder = permutationBuilder(table)
|
||||
.filter("value <= 80")
|
||||
.splitRandom({ ratios: [0.5, 0.5], seed: 42 })
|
||||
.shuffle({ seed: 123 });
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(8); // Values 10, 20, 30, 40, 50, 60, 70, 80
|
||||
|
||||
// Check split distribution
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(8);
|
||||
});
|
||||
|
||||
test("should throw error for invalid split arguments", () => {
|
||||
const builder = permutationBuilder(table);
|
||||
|
||||
// Test no arguments provided
|
||||
expect(() => builder.splitRandom({})).toThrow(
|
||||
"Exactly one of 'ratios', 'counts', or 'fixed' must be provided",
|
||||
);
|
||||
|
||||
// Test multiple arguments provided
|
||||
expect(() =>
|
||||
builder.splitRandom({ ratios: [0.5, 0.5], counts: [3, 7], seed: 42 }),
|
||||
).toThrow("Exactly one of 'ratios', 'counts', or 'fixed' must be provided");
|
||||
});
|
||||
|
||||
test("should throw error when builder is consumed", async () => {
|
||||
const builder = permutationBuilder(table);
|
||||
|
||||
// Execute once
|
||||
await builder.execute();
|
||||
|
||||
// Should throw error on second execution
|
||||
await expect(builder.execute()).rejects.toThrow("Builder already consumed");
|
||||
});
|
||||
|
||||
test("should accept custom split names with random splits", async () => {
|
||||
const builder = permutationBuilder(table).splitRandom({
|
||||
ratios: [0.3, 0.7],
|
||||
seed: 42,
|
||||
splitNames: ["train", "test"],
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Split names are provided but split_id is still numeric (0, 1, etc.)
|
||||
// The names are metadata that can be used by higher-level APIs
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should accept custom split names with hash splits", async () => {
|
||||
const builder = permutationBuilder(table).splitHash({
|
||||
columns: ["id"],
|
||||
splitWeights: [50, 50],
|
||||
discardWeight: 0,
|
||||
splitNames: ["set_a", "set_b"],
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Split names are provided but split_id is still numeric
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should accept custom split names with sequential splits", async () => {
|
||||
const builder = permutationBuilder(table).splitSequential({
|
||||
ratios: [0.5, 0.5],
|
||||
splitNames: ["first", "second"],
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Split names are provided but split_id is still numeric
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBe(5);
|
||||
expect(split1Count).toBe(5);
|
||||
});
|
||||
|
||||
test("should accept custom split names with calculated splits", async () => {
|
||||
const builder = permutationBuilder(table).splitCalculated({
|
||||
calculation: "id % 2",
|
||||
splitNames: ["even", "odd"],
|
||||
});
|
||||
|
||||
const permutationTable = await builder.execute();
|
||||
const rowCount = await permutationTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Split names are provided but split_id is still numeric
|
||||
const split0Count = await permutationTable.countRows("split_id = 0");
|
||||
const split1Count = await permutationTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
});
|
||||
|
||||
test("should persist permutation to a new table", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const builder = permutationBuilder(table)
|
||||
.splitRandom({
|
||||
ratios: [0.7, 0.3],
|
||||
seed: 42,
|
||||
splitNames: ["train", "validation"],
|
||||
})
|
||||
.persist(db, "my_permutation");
|
||||
|
||||
// Execute the builder which will persist the table
|
||||
const permutationTable = await builder.execute();
|
||||
|
||||
// Verify the persisted table exists and can be opened
|
||||
const persistedTable = await db.openTable("my_permutation");
|
||||
expect(persistedTable).toBeDefined();
|
||||
|
||||
// Verify the persisted table has the correct number of rows
|
||||
const rowCount = await persistedTable.countRows();
|
||||
expect(rowCount).toBe(10);
|
||||
|
||||
// Verify splits exist (numeric split_id values)
|
||||
const split0Count = await persistedTable.countRows("split_id = 0");
|
||||
const split1Count = await persistedTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(10);
|
||||
|
||||
// Verify the table returned by execute is the same as the persisted one
|
||||
const executedRowCount = await permutationTable.countRows();
|
||||
expect(executedRowCount).toBe(10);
|
||||
});
|
||||
|
||||
test("should persist permutation with multiple operations", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const builder = permutationBuilder(table)
|
||||
.filter("value > 30")
|
||||
.splitRandom({ ratios: [0.5, 0.5], seed: 123, splitNames: ["a", "b"] })
|
||||
.shuffle({ seed: 456 })
|
||||
.persist(db, "filtered_permutation");
|
||||
|
||||
// Execute the builder
|
||||
const permutationTable = await builder.execute();
|
||||
|
||||
// Verify the persisted table
|
||||
const persistedTable = await db.openTable("filtered_permutation");
|
||||
const rowCount = await persistedTable.countRows();
|
||||
expect(rowCount).toBe(7); // Values 40, 50, 60, 70, 80, 90, 100
|
||||
|
||||
// Verify splits exist (numeric split_id values)
|
||||
const split0Count = await persistedTable.countRows("split_id = 0");
|
||||
const split1Count = await persistedTable.countRows("split_id = 1");
|
||||
|
||||
expect(split0Count).toBeGreaterThan(0);
|
||||
expect(split1Count).toBeGreaterThan(0);
|
||||
expect(split0Count + split1Count).toBe(7);
|
||||
|
||||
// Verify the executed table matches
|
||||
const executedRowCount = await permutationTable.countRows();
|
||||
expect(executedRowCount).toBe(7);
|
||||
});
|
||||
});
|
||||
111
nodejs/__test__/query.test.ts
Normal file
111
nodejs/__test__/query.test.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import * as tmp from "tmp";
|
||||
|
||||
import { type Table, connect } from "../lancedb";
|
||||
import {
|
||||
Field,
|
||||
FixedSizeList,
|
||||
Float32,
|
||||
Int64,
|
||||
Schema,
|
||||
Utf8,
|
||||
makeArrowTable,
|
||||
} from "../lancedb/arrow";
|
||||
import { Index } from "../lancedb/indices";
|
||||
|
||||
describe("Query outputSchema", () => {
|
||||
let tmpDir: tmp.DirResult;
|
||||
let table: Table;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
const db = await connect(tmpDir.name);
|
||||
|
||||
// Create table with explicit schema to ensure proper types
|
||||
const schema = new Schema([
|
||||
new Field("a", new Int64(), true),
|
||||
new Field("text", new Utf8(), true),
|
||||
new Field(
|
||||
"vec",
|
||||
new FixedSizeList(2, new Field("item", new Float32())),
|
||||
true,
|
||||
),
|
||||
]);
|
||||
|
||||
const data = makeArrowTable(
|
||||
[
|
||||
{ a: 1n, text: "foo", vec: [1, 2] },
|
||||
{ a: 2n, text: "bar", vec: [3, 4] },
|
||||
{ a: 3n, text: "baz", vec: [5, 6] },
|
||||
],
|
||||
{ schema },
|
||||
);
|
||||
table = await db.createTable("test", data);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
tmpDir.removeCallback();
|
||||
});
|
||||
|
||||
it("should return schema for plain query", async () => {
|
||||
const schema = await table.query().outputSchema();
|
||||
|
||||
expect(schema.fields.length).toBe(3);
|
||||
expect(schema.fields.map((f) => f.name)).toEqual(["a", "text", "vec"]);
|
||||
expect(schema.fields[0].type.toString()).toBe("Int64");
|
||||
expect(schema.fields[1].type.toString()).toBe("Utf8");
|
||||
});
|
||||
|
||||
it("should return schema with dynamic projection", async () => {
|
||||
const schema = await table.query().select({ bl: "a * 2" }).outputSchema();
|
||||
|
||||
expect(schema.fields.length).toBe(1);
|
||||
expect(schema.fields[0].name).toBe("bl");
|
||||
expect(schema.fields[0].type.toString()).toBe("Int64");
|
||||
});
|
||||
|
||||
it("should return schema for vector search with _distance column", async () => {
|
||||
const schema = await table
|
||||
.vectorSearch([1, 2])
|
||||
.select(["a"])
|
||||
.outputSchema();
|
||||
|
||||
expect(schema.fields.length).toBe(2);
|
||||
expect(schema.fields.map((f) => f.name)).toEqual(["a", "_distance"]);
|
||||
expect(schema.fields[0].type.toString()).toBe("Int64");
|
||||
expect(schema.fields[1].type.toString()).toBe("Float32");
|
||||
});
|
||||
|
||||
it("should return schema for FTS search", async () => {
|
||||
await table.createIndex("text", { config: Index.fts() });
|
||||
|
||||
const schema = await table
|
||||
.search("foo", "fts")
|
||||
.select(["a"])
|
||||
.outputSchema();
|
||||
|
||||
// FTS search includes _score column in addition to selected columns
|
||||
expect(schema.fields.length).toBe(2);
|
||||
expect(schema.fields.map((f) => f.name)).toContain("a");
|
||||
expect(schema.fields.map((f) => f.name)).toContain("_score");
|
||||
const aField = schema.fields.find((f) => f.name === "a");
|
||||
expect(aField?.type.toString()).toBe("Int64");
|
||||
});
|
||||
|
||||
it("should return schema for take query", async () => {
|
||||
const schema = await table.takeOffsets([0]).select(["text"]).outputSchema();
|
||||
|
||||
expect(schema.fields.length).toBe(1);
|
||||
expect(schema.fields[0].name).toBe("text");
|
||||
expect(schema.fields[0].type.toString()).toBe("Utf8");
|
||||
});
|
||||
|
||||
it("should return full schema when no select is specified", async () => {
|
||||
const schema = await table.query().outputSchema();
|
||||
|
||||
// Should return all columns
|
||||
expect(schema.fields.length).toBe(3);
|
||||
});
|
||||
});
|
||||
@@ -3,7 +3,49 @@
|
||||
|
||||
import * as http from "http";
|
||||
import { RequestListener } from "http";
|
||||
import { Connection, ConnectionOptions, connect } from "../lancedb";
|
||||
import {
|
||||
ClientConfig,
|
||||
Connection,
|
||||
ConnectionOptions,
|
||||
TlsConfig,
|
||||
connect,
|
||||
} from "../lancedb";
|
||||
import {
|
||||
HeaderProvider,
|
||||
OAuthHeaderProvider,
|
||||
StaticHeaderProvider,
|
||||
} from "../lancedb/header";
|
||||
|
||||
// Test-only header providers
|
||||
class CustomProvider extends HeaderProvider {
|
||||
getHeaders(): Record<string, string> {
|
||||
return { "X-Custom": "custom-value" };
|
||||
}
|
||||
}
|
||||
|
||||
class ErrorProvider extends HeaderProvider {
|
||||
private errorMessage: string;
|
||||
public callCount: number = 0;
|
||||
|
||||
constructor(errorMessage: string = "Test error") {
|
||||
super();
|
||||
this.errorMessage = errorMessage;
|
||||
}
|
||||
|
||||
getHeaders(): Record<string, string> {
|
||||
this.callCount++;
|
||||
throw new Error(this.errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
class ConcurrentProvider extends HeaderProvider {
|
||||
private counter: number = 0;
|
||||
|
||||
getHeaders(): Record<string, string> {
|
||||
this.counter++;
|
||||
return { "X-Request-Id": String(this.counter) };
|
||||
}
|
||||
}
|
||||
|
||||
async function withMockDatabase(
|
||||
listener: RequestListener,
|
||||
@@ -148,4 +190,431 @@ describe("remote connection", () => {
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
describe("TlsConfig", () => {
|
||||
it("should create TlsConfig with all fields", () => {
|
||||
const tlsConfig: TlsConfig = {
|
||||
certFile: "/path/to/cert.pem",
|
||||
keyFile: "/path/to/key.pem",
|
||||
sslCaCert: "/path/to/ca.pem",
|
||||
assertHostname: false,
|
||||
};
|
||||
|
||||
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
|
||||
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
|
||||
expect(tlsConfig.sslCaCert).toBe("/path/to/ca.pem");
|
||||
expect(tlsConfig.assertHostname).toBe(false);
|
||||
});
|
||||
|
||||
it("should create TlsConfig with partial fields", () => {
|
||||
const tlsConfig: TlsConfig = {
|
||||
certFile: "/path/to/cert.pem",
|
||||
keyFile: "/path/to/key.pem",
|
||||
};
|
||||
|
||||
expect(tlsConfig.certFile).toBe("/path/to/cert.pem");
|
||||
expect(tlsConfig.keyFile).toBe("/path/to/key.pem");
|
||||
expect(tlsConfig.sslCaCert).toBeUndefined();
|
||||
expect(tlsConfig.assertHostname).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should create ClientConfig with TlsConfig", () => {
|
||||
const tlsConfig: TlsConfig = {
|
||||
certFile: "/path/to/cert.pem",
|
||||
keyFile: "/path/to/key.pem",
|
||||
sslCaCert: "/path/to/ca.pem",
|
||||
assertHostname: true,
|
||||
};
|
||||
|
||||
const clientConfig: ClientConfig = {
|
||||
userAgent: "test-agent",
|
||||
tlsConfig: tlsConfig,
|
||||
};
|
||||
|
||||
expect(clientConfig.userAgent).toBe("test-agent");
|
||||
expect(clientConfig.tlsConfig).toBeDefined();
|
||||
expect(clientConfig.tlsConfig?.certFile).toBe("/path/to/cert.pem");
|
||||
expect(clientConfig.tlsConfig?.keyFile).toBe("/path/to/key.pem");
|
||||
expect(clientConfig.tlsConfig?.sslCaCert).toBe("/path/to/ca.pem");
|
||||
expect(clientConfig.tlsConfig?.assertHostname).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle empty TlsConfig", () => {
|
||||
const tlsConfig: TlsConfig = {};
|
||||
|
||||
expect(tlsConfig.certFile).toBeUndefined();
|
||||
expect(tlsConfig.keyFile).toBeUndefined();
|
||||
expect(tlsConfig.sslCaCert).toBeUndefined();
|
||||
expect(tlsConfig.assertHostname).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should accept TlsConfig in connection options", () => {
|
||||
const tlsConfig: TlsConfig = {
|
||||
certFile: "/path/to/cert.pem",
|
||||
keyFile: "/path/to/key.pem",
|
||||
sslCaCert: "/path/to/ca.pem",
|
||||
assertHostname: false,
|
||||
};
|
||||
|
||||
// Just verify that the ClientConfig accepts the TlsConfig
|
||||
const clientConfig: ClientConfig = {
|
||||
tlsConfig: tlsConfig,
|
||||
};
|
||||
|
||||
const connectionOptions: ConnectionOptions = {
|
||||
apiKey: "fake",
|
||||
clientConfig: clientConfig,
|
||||
};
|
||||
|
||||
// Verify the configuration structure is correct
|
||||
expect(connectionOptions.clientConfig).toBeDefined();
|
||||
expect(connectionOptions.clientConfig?.tlsConfig).toBeDefined();
|
||||
expect(connectionOptions.clientConfig?.tlsConfig?.certFile).toBe(
|
||||
"/path/to/cert.pem",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("header providers", () => {
|
||||
it("should work with StaticHeaderProvider", async () => {
|
||||
const provider = new StaticHeaderProvider({
|
||||
authorization: "Bearer test-token",
|
||||
"X-Custom": "value",
|
||||
});
|
||||
|
||||
const headers = provider.getHeaders();
|
||||
expect(headers).toEqual({
|
||||
authorization: "Bearer test-token",
|
||||
"X-Custom": "value",
|
||||
});
|
||||
|
||||
// Test that it returns a copy
|
||||
headers["X-Modified"] = "modified";
|
||||
const headers2 = provider.getHeaders();
|
||||
expect(headers2).not.toHaveProperty("X-Modified");
|
||||
});
|
||||
|
||||
it("should pass headers from StaticHeaderProvider to requests", async () => {
|
||||
const provider = new StaticHeaderProvider({
|
||||
"X-Custom-Auth": "secret-token",
|
||||
"X-Request-Source": "test-suite",
|
||||
});
|
||||
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
expect(req.headers["x-custom-auth"]).toEqual("secret-token");
|
||||
expect(req.headers["x-request-source"]).toEqual("test-suite");
|
||||
|
||||
const body = JSON.stringify({ tables: [] });
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async () => {
|
||||
// Use actual header provider mechanism instead of extraHeaders
|
||||
const conn = await connect(
|
||||
"db://dev",
|
||||
{
|
||||
apiKey: "fake",
|
||||
hostOverride: "http://localhost:8000",
|
||||
},
|
||||
undefined, // session
|
||||
provider, // headerProvider
|
||||
);
|
||||
|
||||
const tableNames = await conn.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("should work with CustomProvider", () => {
|
||||
const provider = new CustomProvider();
|
||||
const headers = provider.getHeaders();
|
||||
expect(headers).toEqual({ "X-Custom": "custom-value" });
|
||||
});
|
||||
|
||||
it("should handle ErrorProvider errors", () => {
|
||||
const provider = new ErrorProvider("Authentication failed");
|
||||
|
||||
expect(() => provider.getHeaders()).toThrow("Authentication failed");
|
||||
expect(provider.callCount).toBe(1);
|
||||
|
||||
// Test that error is thrown each time
|
||||
expect(() => provider.getHeaders()).toThrow("Authentication failed");
|
||||
expect(provider.callCount).toBe(2);
|
||||
});
|
||||
|
||||
it("should work with ConcurrentProvider", () => {
|
||||
const provider = new ConcurrentProvider();
|
||||
|
||||
const headers1 = provider.getHeaders();
|
||||
const headers2 = provider.getHeaders();
|
||||
const headers3 = provider.getHeaders();
|
||||
|
||||
expect(headers1).toEqual({ "X-Request-Id": "1" });
|
||||
expect(headers2).toEqual({ "X-Request-Id": "2" });
|
||||
expect(headers3).toEqual({ "X-Request-Id": "3" });
|
||||
});
|
||||
|
||||
describe("OAuthHeaderProvider", () => {
|
||||
it("should initialize correctly", () => {
|
||||
const fetcher = () => ({
|
||||
accessToken: "token123",
|
||||
expiresIn: 3600,
|
||||
});
|
||||
|
||||
const provider = new OAuthHeaderProvider(fetcher);
|
||||
expect(provider).toBeInstanceOf(HeaderProvider);
|
||||
});
|
||||
|
||||
it("should fetch token on first use", async () => {
|
||||
let callCount = 0;
|
||||
const fetcher = () => {
|
||||
callCount++;
|
||||
return {
|
||||
accessToken: "token123",
|
||||
expiresIn: 3600,
|
||||
};
|
||||
};
|
||||
|
||||
const provider = new OAuthHeaderProvider(fetcher);
|
||||
|
||||
// Need to manually refresh first due to sync limitation
|
||||
await provider.refreshToken();
|
||||
|
||||
const headers = provider.getHeaders();
|
||||
expect(headers).toEqual({ authorization: "Bearer token123" });
|
||||
expect(callCount).toBe(1);
|
||||
|
||||
// Second call should not fetch again
|
||||
const headers2 = provider.getHeaders();
|
||||
expect(headers2).toEqual({ authorization: "Bearer token123" });
|
||||
expect(callCount).toBe(1);
|
||||
});
|
||||
|
||||
it("should handle tokens without expiry", async () => {
|
||||
const fetcher = () => ({
|
||||
accessToken: "permanent_token",
|
||||
});
|
||||
|
||||
const provider = new OAuthHeaderProvider(fetcher);
|
||||
await provider.refreshToken();
|
||||
|
||||
const headers = provider.getHeaders();
|
||||
expect(headers).toEqual({ authorization: "Bearer permanent_token" });
|
||||
});
|
||||
|
||||
it("should throw error when access_token is missing", async () => {
|
||||
const fetcher = () =>
|
||||
({
|
||||
expiresIn: 3600,
|
||||
}) as { accessToken?: string; expiresIn?: number };
|
||||
|
||||
const provider = new OAuthHeaderProvider(
|
||||
fetcher as () => {
|
||||
accessToken: string;
|
||||
expiresIn?: number;
|
||||
},
|
||||
);
|
||||
|
||||
await expect(provider.refreshToken()).rejects.toThrow(
|
||||
"Token fetcher did not return 'accessToken'",
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle async token fetchers", async () => {
|
||||
const fetcher = async () => {
|
||||
// Simulate async operation
|
||||
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||
return {
|
||||
accessToken: "async_token",
|
||||
expiresIn: 3600,
|
||||
};
|
||||
};
|
||||
|
||||
const provider = new OAuthHeaderProvider(fetcher);
|
||||
await provider.refreshToken();
|
||||
|
||||
const headers = provider.getHeaders();
|
||||
expect(headers).toEqual({ authorization: "Bearer async_token" });
|
||||
});
|
||||
});
|
||||
|
||||
it("should merge header provider headers with extra headers", async () => {
|
||||
const provider = new StaticHeaderProvider({
|
||||
"X-From-Provider": "provider-value",
|
||||
});
|
||||
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
expect(req.headers["x-from-provider"]).toEqual("provider-value");
|
||||
expect(req.headers["x-extra-header"]).toEqual("extra-value");
|
||||
|
||||
const body = JSON.stringify({ tables: [] });
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async () => {
|
||||
// Use header provider with additional extraHeaders
|
||||
const conn = await connect(
|
||||
"db://dev",
|
||||
{
|
||||
apiKey: "fake",
|
||||
hostOverride: "http://localhost:8000",
|
||||
clientConfig: {
|
||||
extraHeaders: {
|
||||
"X-Extra-Header": "extra-value",
|
||||
},
|
||||
},
|
||||
},
|
||||
undefined, // session
|
||||
provider, // headerProvider
|
||||
);
|
||||
|
||||
const tableNames = await conn.tableNames();
|
||||
expect(tableNames).toEqual([]);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("header provider integration", () => {
|
||||
it("should work with TypeScript StaticHeaderProvider", async () => {
|
||||
let requestCount = 0;
|
||||
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
requestCount++;
|
||||
|
||||
// Check headers are present on each request
|
||||
expect(req.headers["authorization"]).toEqual("Bearer test-token-123");
|
||||
expect(req.headers["x-custom"]).toEqual("custom-value");
|
||||
|
||||
// Return different responses based on the endpoint
|
||||
if (req.url === "/v1/table/test_table/describe/") {
|
||||
const body = JSON.stringify({
|
||||
name: "test_table",
|
||||
schema: { fields: [] },
|
||||
});
|
||||
res
|
||||
.writeHead(200, { "Content-Type": "application/json" })
|
||||
.end(body);
|
||||
} else {
|
||||
const body = JSON.stringify({ tables: ["test_table"] });
|
||||
res
|
||||
.writeHead(200, { "Content-Type": "application/json" })
|
||||
.end(body);
|
||||
}
|
||||
},
|
||||
async () => {
|
||||
// Create provider with static headers
|
||||
const provider = new StaticHeaderProvider({
|
||||
authorization: "Bearer test-token-123",
|
||||
"X-Custom": "custom-value",
|
||||
});
|
||||
|
||||
// Connect with the provider
|
||||
const conn = await connect(
|
||||
"db://dev",
|
||||
{
|
||||
apiKey: "fake",
|
||||
hostOverride: "http://localhost:8000",
|
||||
},
|
||||
undefined, // session
|
||||
provider, // headerProvider
|
||||
);
|
||||
|
||||
// Make multiple requests to verify headers are sent each time
|
||||
const tables1 = await conn.tableNames();
|
||||
expect(tables1).toEqual(["test_table"]);
|
||||
|
||||
const tables2 = await conn.tableNames();
|
||||
expect(tables2).toEqual(["test_table"]);
|
||||
|
||||
// Verify headers were sent with each request
|
||||
expect(requestCount).toBeGreaterThanOrEqual(2);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("should work with JavaScript function provider", async () => {
|
||||
let requestId = 0;
|
||||
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
// Check dynamic header is present
|
||||
expect(req.headers["x-request-id"]).toBeDefined();
|
||||
expect(req.headers["x-request-id"]).toMatch(/^req-\d+$/);
|
||||
|
||||
const body = JSON.stringify({ tables: [] });
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async () => {
|
||||
// Create a JavaScript function that returns dynamic headers
|
||||
const getHeaders = async () => {
|
||||
requestId++;
|
||||
return {
|
||||
"X-Request-Id": `req-${requestId}`,
|
||||
"X-Timestamp": new Date().toISOString(),
|
||||
};
|
||||
};
|
||||
|
||||
// Connect with the function directly
|
||||
const conn = await connect(
|
||||
"db://dev",
|
||||
{
|
||||
apiKey: "fake",
|
||||
hostOverride: "http://localhost:8000",
|
||||
},
|
||||
undefined, // session
|
||||
getHeaders, // headerProvider
|
||||
);
|
||||
|
||||
// Make requests - each should have different headers
|
||||
const tables = await conn.tableNames();
|
||||
expect(tables).toEqual([]);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
it("should support OAuth-like token refresh pattern", async () => {
|
||||
let tokenVersion = 0;
|
||||
|
||||
await withMockDatabase(
|
||||
(req, res) => {
|
||||
// Verify authorization header
|
||||
const authHeader = req.headers["authorization"];
|
||||
expect(authHeader).toBeDefined();
|
||||
expect(authHeader).toMatch(/^Bearer token-v\d+$/);
|
||||
|
||||
const body = JSON.stringify({ tables: [] });
|
||||
res.writeHead(200, { "Content-Type": "application/json" }).end(body);
|
||||
},
|
||||
async () => {
|
||||
// Simulate OAuth token fetcher
|
||||
const fetchToken = async () => {
|
||||
tokenVersion++;
|
||||
return {
|
||||
authorization: `Bearer token-v${tokenVersion}`,
|
||||
};
|
||||
};
|
||||
|
||||
// Connect with the function directly
|
||||
const conn = await connect(
|
||||
"db://dev",
|
||||
{
|
||||
apiKey: "fake",
|
||||
hostOverride: "http://localhost:8000",
|
||||
},
|
||||
undefined, // session
|
||||
fetchToken, // headerProvider
|
||||
);
|
||||
|
||||
// Each request will fetch a new token
|
||||
await conn.tableNames();
|
||||
|
||||
// Token should be different on next request
|
||||
await conn.tableNames();
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
184
nodejs/__test__/sanitize.test.ts
Normal file
184
nodejs/__test__/sanitize.test.ts
Normal file
@@ -0,0 +1,184 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import * as arrow from "../lancedb/arrow";
|
||||
import { sanitizeField, sanitizeType } from "../lancedb/sanitize";
|
||||
|
||||
describe("sanitize", function () {
|
||||
describe("sanitizeType function", function () {
|
||||
it("should handle type objects", function () {
|
||||
const type = new arrow.Int32();
|
||||
const result = sanitizeType(type);
|
||||
|
||||
expect(result.typeId).toBe(arrow.Type.Int);
|
||||
expect((result as arrow.Int).bitWidth).toBe(32);
|
||||
expect((result as arrow.Int).isSigned).toBe(true);
|
||||
|
||||
const floatType = {
|
||||
typeId: 3, // Type.Float = 3
|
||||
precision: 2,
|
||||
toString: () => "Float",
|
||||
isFloat: true,
|
||||
isFixedWidth: true,
|
||||
};
|
||||
|
||||
const floatResult = sanitizeType(floatType);
|
||||
expect(floatResult).toBeInstanceOf(arrow.DataType);
|
||||
expect(floatResult.typeId).toBe(arrow.Type.Float);
|
||||
|
||||
const floatResult2 = sanitizeType({ ...floatType, typeId: () => 3 });
|
||||
expect(floatResult2).toBeInstanceOf(arrow.DataType);
|
||||
expect(floatResult2.typeId).toBe(arrow.Type.Float);
|
||||
});
|
||||
|
||||
const allTypeNameTestCases = [
|
||||
["null", new arrow.Null()],
|
||||
["binary", new arrow.Binary()],
|
||||
["utf8", new arrow.Utf8()],
|
||||
["bool", new arrow.Bool()],
|
||||
["int8", new arrow.Int8()],
|
||||
["int16", new arrow.Int16()],
|
||||
["int32", new arrow.Int32()],
|
||||
["int64", new arrow.Int64()],
|
||||
["uint8", new arrow.Uint8()],
|
||||
["uint16", new arrow.Uint16()],
|
||||
["uint32", new arrow.Uint32()],
|
||||
["uint64", new arrow.Uint64()],
|
||||
["float16", new arrow.Float16()],
|
||||
["float32", new arrow.Float32()],
|
||||
["float64", new arrow.Float64()],
|
||||
["datemillisecond", new arrow.DateMillisecond()],
|
||||
["dateday", new arrow.DateDay()],
|
||||
["timenanosecond", new arrow.TimeNanosecond()],
|
||||
["timemicrosecond", new arrow.TimeMicrosecond()],
|
||||
["timemillisecond", new arrow.TimeMillisecond()],
|
||||
["timesecond", new arrow.TimeSecond()],
|
||||
["intervaldaytime", new arrow.IntervalDayTime()],
|
||||
["intervalyearmonth", new arrow.IntervalYearMonth()],
|
||||
["durationnanosecond", new arrow.DurationNanosecond()],
|
||||
["durationmicrosecond", new arrow.DurationMicrosecond()],
|
||||
["durationmillisecond", new arrow.DurationMillisecond()],
|
||||
["durationsecond", new arrow.DurationSecond()],
|
||||
] as const;
|
||||
|
||||
it.each(allTypeNameTestCases)(
|
||||
'should map type name "%s" to %s',
|
||||
function (name, expected) {
|
||||
const result = sanitizeType(name);
|
||||
expect(result).toBeInstanceOf(expected.constructor);
|
||||
},
|
||||
);
|
||||
|
||||
const caseVariationTestCases = [
|
||||
["NULL", new arrow.Null()],
|
||||
["Utf8", new arrow.Utf8()],
|
||||
["FLOAT32", new arrow.Float32()],
|
||||
["DaTedAy", new arrow.DateDay()],
|
||||
] as const;
|
||||
|
||||
it.each(caseVariationTestCases)(
|
||||
'should be case insensitive for type name "%s" mapped to %s',
|
||||
function (name, expected) {
|
||||
const result = sanitizeType(name);
|
||||
expect(result).toBeInstanceOf(expected.constructor);
|
||||
},
|
||||
);
|
||||
|
||||
it("should throw error for unrecognized type name", function () {
|
||||
expect(() => sanitizeType("invalid_type")).toThrow(
|
||||
"Unrecognized type name in schema: invalid_type",
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("sanitizeField function", function () {
|
||||
it("should handle field with string type name", function () {
|
||||
const field = sanitizeField({
|
||||
name: "string_field",
|
||||
type: "utf8",
|
||||
nullable: true,
|
||||
metadata: new Map([["key", "value"]]),
|
||||
});
|
||||
|
||||
expect(field).toBeInstanceOf(arrow.Field);
|
||||
expect(field.name).toBe("string_field");
|
||||
expect(field.type).toBeInstanceOf(arrow.Utf8);
|
||||
expect(field.nullable).toBe(true);
|
||||
expect(field.metadata?.get("key")).toBe("value");
|
||||
});
|
||||
|
||||
it("should handle field with type object", function () {
|
||||
const floatType = {
|
||||
typeId: 3, // Float
|
||||
precision: 32,
|
||||
};
|
||||
|
||||
const field = sanitizeField({
|
||||
name: "float_field",
|
||||
type: floatType,
|
||||
nullable: false,
|
||||
});
|
||||
|
||||
expect(field).toBeInstanceOf(arrow.Field);
|
||||
expect(field.name).toBe("float_field");
|
||||
expect(field.type).toBeInstanceOf(arrow.DataType);
|
||||
expect(field.type.typeId).toBe(arrow.Type.Float);
|
||||
expect((field.type as arrow.Float64).precision).toBe(32);
|
||||
expect(field.nullable).toBe(false);
|
||||
});
|
||||
|
||||
it("should handle field with direct Type instance", function () {
|
||||
const field = sanitizeField({
|
||||
name: "bool_field",
|
||||
type: new arrow.Bool(),
|
||||
nullable: true,
|
||||
});
|
||||
|
||||
expect(field).toBeInstanceOf(arrow.Field);
|
||||
expect(field.name).toBe("bool_field");
|
||||
expect(field.type).toBeInstanceOf(arrow.Bool);
|
||||
expect(field.nullable).toBe(true);
|
||||
});
|
||||
|
||||
it("should throw error for invalid field object", function () {
|
||||
expect(() =>
|
||||
sanitizeField({
|
||||
type: "int32",
|
||||
nullable: true,
|
||||
}),
|
||||
).toThrow(
|
||||
"The field passed in is missing a `type`/`name`/`nullable` property",
|
||||
);
|
||||
|
||||
// Invalid type
|
||||
expect(() =>
|
||||
sanitizeField({
|
||||
name: "invalid",
|
||||
type: { invalid: true },
|
||||
nullable: true,
|
||||
}),
|
||||
).toThrow("Expected a Type to have a typeId property");
|
||||
|
||||
// Invalid nullable
|
||||
expect(() =>
|
||||
sanitizeField({
|
||||
name: "invalid_nullable",
|
||||
type: "int32",
|
||||
nullable: "not a boolean",
|
||||
}),
|
||||
).toThrow("The field passed in had a non-boolean `nullable` property");
|
||||
});
|
||||
|
||||
it("should report error for invalid type name", function () {
|
||||
expect(() =>
|
||||
sanitizeField({
|
||||
name: "invalid_field",
|
||||
type: "invalid_type",
|
||||
nullable: true,
|
||||
}),
|
||||
).toThrow(
|
||||
"Unable to sanitize type for field: invalid_field due to error: Error: Unrecognized type name in schema: invalid_type",
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -10,7 +10,13 @@ import * as arrow16 from "apache-arrow-16";
|
||||
import * as arrow17 from "apache-arrow-17";
|
||||
import * as arrow18 from "apache-arrow-18";
|
||||
|
||||
import { MatchQuery, PhraseQuery, Table, connect } from "../lancedb";
|
||||
import {
|
||||
Connection,
|
||||
MatchQuery,
|
||||
PhraseQuery,
|
||||
Table,
|
||||
connect,
|
||||
} from "../lancedb";
|
||||
import {
|
||||
Table as ArrowTable,
|
||||
Field,
|
||||
@@ -21,6 +27,8 @@ import {
|
||||
Int64,
|
||||
List,
|
||||
Schema,
|
||||
SchemaLike,
|
||||
Type,
|
||||
Uint8,
|
||||
Utf8,
|
||||
makeArrowTable,
|
||||
@@ -39,7 +47,6 @@ import {
|
||||
Operator,
|
||||
instanceOfFullTextQuery,
|
||||
} from "../lancedb/query";
|
||||
import exp = require("constants");
|
||||
|
||||
describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
"Given a table",
|
||||
@@ -212,8 +219,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
},
|
||||
);
|
||||
|
||||
// TODO: https://github.com/lancedb/lancedb/issues/1832
|
||||
it.skip("should be able to omit nullable fields", async () => {
|
||||
it("should be able to omit nullable fields", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field(
|
||||
@@ -237,23 +243,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
await table.add([data3]);
|
||||
|
||||
let res = await table.query().limit(10).toArray();
|
||||
const resVector = res.map((r) => r.get("vector").toArray());
|
||||
const resVector = res.map((r) =>
|
||||
r.vector ? Array.from(r.vector) : null,
|
||||
);
|
||||
expect(resVector).toEqual([null, data2.vector, data3.vector]);
|
||||
const resItem = res.map((r) => r.get("item").toArray());
|
||||
const resItem = res.map((r) => r.item);
|
||||
expect(resItem).toEqual(["foo", null, "bar"]);
|
||||
const resPrice = res.map((r) => r.get("price").toArray());
|
||||
const resPrice = res.map((r) => r.price);
|
||||
expect(resPrice).toEqual([10.0, 2.0, 3.0]);
|
||||
|
||||
const data4 = { item: "foo" };
|
||||
// We can't omit a column if it's not nullable
|
||||
await expect(table.add([data4])).rejects.toThrow("Invalid user input");
|
||||
await expect(table.add([data4])).rejects.toThrow(
|
||||
"Append with different schema",
|
||||
);
|
||||
|
||||
// But we can alter columns to make them nullable
|
||||
await table.alterColumns([{ path: "price", nullable: true }]);
|
||||
await table.add([data4]);
|
||||
|
||||
res = (await table.query().limit(10).toArray()).map((r) => r.toJSON());
|
||||
expect(res).toEqual([data1, data2, data3, data4]);
|
||||
res = (await table.query().limit(10).toArray()).map((r) => ({
|
||||
...r.toJSON(),
|
||||
vector: r.vector ? Array.from(r.vector) : null,
|
||||
}));
|
||||
// Rust fills missing nullable fields with null
|
||||
expect(res).toEqual([
|
||||
{ ...data1, vector: null },
|
||||
{ ...data2, item: null },
|
||||
data3,
|
||||
{ ...data4, price: null, vector: null },
|
||||
]);
|
||||
});
|
||||
|
||||
it("should be able to insert nullable data for non-nullable fields", async () => {
|
||||
@@ -287,6 +306,12 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(res2[1].id).toEqual(data2.id);
|
||||
});
|
||||
|
||||
it("should support take queries", async () => {
|
||||
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
|
||||
const res = await table.takeOffsets([1, 2]).toArrow();
|
||||
expect(res.getChild("id")?.toJSON()).toEqual([2, 3]);
|
||||
});
|
||||
|
||||
it("should return the table as an instance of an arrow table", async () => {
|
||||
const arrowTbl = await table.toArrow();
|
||||
expect(arrowTbl).toBeInstanceOf(ArrowTable);
|
||||
@@ -325,6 +350,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
const table = await db.createTable("my_table", data);
|
||||
expect(await table.countRows()).toEqual(2);
|
||||
});
|
||||
|
||||
it("should allow undefined and omitted nullable vector fields", async () => {
|
||||
// Test for the bug: can't pass undefined or omit vector column
|
||||
const db = await connect("memory://");
|
||||
const schema = new arrow.Schema([
|
||||
new arrow.Field("id", new arrow.Int32(), true),
|
||||
new arrow.Field(
|
||||
"vector",
|
||||
new arrow.FixedSizeList(
|
||||
32,
|
||||
new arrow.Field("item", new arrow.Float32(), true),
|
||||
),
|
||||
true, // nullable = true
|
||||
),
|
||||
]);
|
||||
const table = await db.createEmptyTable("test_table", schema);
|
||||
|
||||
// Should not throw error for undefined value
|
||||
await table.add([{ id: 0, vector: undefined }]);
|
||||
|
||||
// Should not throw error for omitted field
|
||||
await table.add([{ id: 1 }]);
|
||||
|
||||
// Should still work for null
|
||||
await table.add([{ id: 2, vector: null }]);
|
||||
|
||||
// Should still work for actual vector
|
||||
const testVector = new Array(32).fill(0.5);
|
||||
await table.add([{ id: 3, vector: testVector }]);
|
||||
expect(await table.countRows()).toEqual(4);
|
||||
|
||||
const res = await table.query().limit(10).toArray();
|
||||
const resVector = res.map((r) =>
|
||||
r.vector ? Array.from(r.vector) : null,
|
||||
);
|
||||
expect(resVector).toEqual([null, null, null, testVector]);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
@@ -482,6 +544,32 @@ describe("merge insert", () => {
|
||||
.execute(newData, { timeoutMs: 0 }),
|
||||
).rejects.toThrow("merge insert timed out");
|
||||
});
|
||||
|
||||
test("useIndex", async () => {
|
||||
const newData = [
|
||||
{ a: 2, b: "x" },
|
||||
{ a: 4, b: "z" },
|
||||
];
|
||||
|
||||
// Test with useIndex(true) - should work fine
|
||||
const result1 = await table
|
||||
.mergeInsert("a")
|
||||
.whenNotMatchedInsertAll()
|
||||
.useIndex(true)
|
||||
.execute(newData);
|
||||
|
||||
expect(result1.numInsertedRows).toBe(1); // Only a=4 should be inserted
|
||||
|
||||
// Test with useIndex(false) - should also work fine
|
||||
const newData2 = [{ a: 5, b: "w" }];
|
||||
const result2 = await table
|
||||
.mergeInsert("a")
|
||||
.whenNotMatchedInsertAll()
|
||||
.useIndex(false)
|
||||
.execute(newData2);
|
||||
|
||||
expect(result2.numInsertedRows).toBe(1); // a=5 should be inserted
|
||||
});
|
||||
});
|
||||
|
||||
describe("When creating an index", () => {
|
||||
@@ -557,7 +645,7 @@ describe("When creating an index", () => {
|
||||
|
||||
// test offset
|
||||
rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
|
||||
expect(rst.numRows).toBe(1);
|
||||
expect(rst.numRows).toBe(2);
|
||||
|
||||
// test nprobes
|
||||
rst = await tbl.query().nearestTo(queryVec).limit(2).nprobes(50).toArrow();
|
||||
@@ -696,7 +784,7 @@ describe("When creating an index", () => {
|
||||
|
||||
// test offset
|
||||
rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
|
||||
expect(rst.numRows).toBe(1);
|
||||
expect(rst.numRows).toBe(2);
|
||||
|
||||
// test ef
|
||||
rst = await tbl.query().limit(2).nearestTo(queryVec).ef(100).toArrow();
|
||||
@@ -773,6 +861,15 @@ describe("When creating an index", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("should be able to create IVF_RQ", async () => {
|
||||
await tbl.createIndex("vec", {
|
||||
config: Index.ivfRq({
|
||||
numPartitions: 10,
|
||||
numBits: 1,
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it("should allow me to replace (or not) an existing index", async () => {
|
||||
await tbl.createIndex("id");
|
||||
// Default is replace=true
|
||||
@@ -851,6 +948,40 @@ describe("When creating an index", () => {
|
||||
expect(stats).toBeUndefined();
|
||||
});
|
||||
|
||||
test("should support name and train parameters", async () => {
|
||||
// Test with custom name
|
||||
await tbl.createIndex("vec", {
|
||||
config: Index.ivfPq({ numPartitions: 4 }),
|
||||
name: "my_custom_vector_index",
|
||||
});
|
||||
|
||||
const indices = await tbl.listIndices();
|
||||
expect(indices).toHaveLength(1);
|
||||
expect(indices[0].name).toBe("my_custom_vector_index");
|
||||
|
||||
// Test scalar index with train=false
|
||||
await tbl.createIndex("id", {
|
||||
config: Index.btree(),
|
||||
name: "btree_empty",
|
||||
train: false,
|
||||
});
|
||||
|
||||
const allIndices = await tbl.listIndices();
|
||||
expect(allIndices).toHaveLength(2);
|
||||
expect(allIndices.some((idx) => idx.name === "btree_empty")).toBe(true);
|
||||
|
||||
// Test with both name and train=true (use tags column)
|
||||
await tbl.createIndex("tags", {
|
||||
config: Index.labelList(),
|
||||
name: "tags_trained",
|
||||
train: true,
|
||||
});
|
||||
|
||||
const finalIndices = await tbl.listIndices();
|
||||
expect(finalIndices).toHaveLength(3);
|
||||
expect(finalIndices.some((idx) => idx.name === "tags_trained")).toBe(true);
|
||||
});
|
||||
|
||||
test("create ivf_flat with binary vectors", async () => {
|
||||
const db = await connect(tmpDir.name);
|
||||
const binarySchema = new Schema([
|
||||
@@ -1389,7 +1520,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
@@ -1903,3 +2036,52 @@ describe("column name options", () => {
|
||||
expect(results2.length).toBe(10);
|
||||
});
|
||||
});
|
||||
|
||||
describe("when creating an empty table", () => {
|
||||
let con: Connection;
|
||||
beforeEach(async () => {
|
||||
const tmpDir = tmp.dirSync({ unsafeCleanup: true });
|
||||
con = await connect(tmpDir.name);
|
||||
});
|
||||
afterEach(() => {
|
||||
con.close();
|
||||
});
|
||||
|
||||
it("can create an empty table from an arrow Schema", async () => {
|
||||
const schema = new Schema([
|
||||
new Field("id", new Int64()),
|
||||
new Field("vector", new Float64()),
|
||||
]);
|
||||
const table = await con.createEmptyTable("test", schema);
|
||||
const actualSchema = await table.schema();
|
||||
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
|
||||
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
|
||||
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
|
||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||
});
|
||||
|
||||
it("can create an empty table from schema that specifies field types by name", async () => {
|
||||
const schemaLike = {
|
||||
fields: [
|
||||
{
|
||||
name: "id",
|
||||
type: "int64",
|
||||
nullable: true,
|
||||
},
|
||||
{
|
||||
name: "vector",
|
||||
type: "float64",
|
||||
nullable: true,
|
||||
},
|
||||
],
|
||||
metadata: new Map(),
|
||||
names: ["id", "vector"],
|
||||
} satisfies SchemaLike;
|
||||
const table = await con.createEmptyTable("test", schemaLike);
|
||||
const actualSchema = await table.schema();
|
||||
expect(actualSchema.fields[0].type.typeId).toBe(Type.Int);
|
||||
expect((actualSchema.fields[0].type as Int64).bitWidth).toBe(64);
|
||||
expect(actualSchema.fields[1].type.typeId).toBe(Type.Float);
|
||||
expect((actualSchema.fields[1].type as Float64).precision).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
"noUnreachableSuper": "error",
|
||||
"noUnsafeFinally": "error",
|
||||
"noUnsafeOptionalChaining": "error",
|
||||
"noUnusedImports": "error",
|
||||
"noUnusedLabels": "error",
|
||||
"noUnusedVariables": "warn",
|
||||
"useIsNan": "error",
|
||||
|
||||
@@ -12,7 +12,7 @@ test("ann index examples", async () => {
|
||||
// --8<-- [start:ingest]
|
||||
const db = await lancedb.connect(databaseDir);
|
||||
|
||||
const data = Array.from({ length: 5_000 }, (_, i) => ({
|
||||
const data = Array.from({ length: 1_000 }, (_, i) => ({
|
||||
vector: Array(128).fill(i),
|
||||
id: `${i}`,
|
||||
content: "",
|
||||
@@ -24,8 +24,8 @@ test("ann index examples", async () => {
|
||||
});
|
||||
await table.createIndex("vector", {
|
||||
config: lancedb.Index.ivfPq({
|
||||
numPartitions: 10,
|
||||
numSubVectors: 16,
|
||||
numPartitions: 30,
|
||||
numSubVectors: 8,
|
||||
}),
|
||||
});
|
||||
// --8<-- [end:ingest]
|
||||
|
||||
@@ -41,7 +41,6 @@ import {
|
||||
vectorFromArray as badVectorFromArray,
|
||||
makeBuilder,
|
||||
makeData,
|
||||
makeTable,
|
||||
} from "apache-arrow";
|
||||
import { Buffers } from "apache-arrow/data";
|
||||
import { type EmbeddingFunction } from "./embedding/embedding_function";
|
||||
@@ -74,7 +73,7 @@ export type FieldLike =
|
||||
| {
|
||||
type: string;
|
||||
name: string;
|
||||
nullable?: boolean;
|
||||
nullable: boolean;
|
||||
metadata?: Map<string, string>;
|
||||
};
|
||||
|
||||
@@ -279,7 +278,7 @@ export class MakeArrowTableOptions {
|
||||
}
|
||||
|
||||
/**
|
||||
* An enhanced version of the {@link makeTable} function from Apache Arrow
|
||||
* An enhanced version of the apache-arrow makeTable function from Apache Arrow
|
||||
* that supports nested fields and embeddings columns.
|
||||
*
|
||||
* (typically you do not need to call this function. It will be called automatically
|
||||
@@ -512,7 +511,11 @@ function* rowPathsAndValues(
|
||||
if (isObject(value)) {
|
||||
yield* rowPathsAndValues(value, [...basePath, key]);
|
||||
} else {
|
||||
yield [[...basePath, key], value];
|
||||
// Skip undefined values - they should be treated the same as missing fields
|
||||
// for embedding function purposes
|
||||
if (value !== undefined) {
|
||||
yield [[...basePath, key], value];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -701,7 +704,7 @@ function transposeData(
|
||||
}
|
||||
return current;
|
||||
});
|
||||
return makeVector(values, field.type);
|
||||
return makeVector(values, field.type, undefined, field.nullable);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -748,9 +751,30 @@ function makeVector(
|
||||
values: unknown[],
|
||||
type?: DataType,
|
||||
stringAsDictionary?: boolean,
|
||||
nullable?: boolean,
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
): Vector<any> {
|
||||
if (type !== undefined) {
|
||||
// Convert undefined values to null for nullable fields
|
||||
if (nullable) {
|
||||
values = values.map((v) => (v === undefined ? null : v));
|
||||
}
|
||||
|
||||
// workaround for: https://github.com/apache/arrow-js/issues/68
|
||||
if (DataType.isBool(type)) {
|
||||
const hasNonNullValue = values.some((v) => v !== null && v !== undefined);
|
||||
if (!hasNonNullValue) {
|
||||
const nullBitmap = new Uint8Array(Math.ceil(values.length / 8));
|
||||
const data = makeData({
|
||||
type: type,
|
||||
length: values.length,
|
||||
nullCount: values.length,
|
||||
nullBitmap,
|
||||
});
|
||||
return arrowMakeVector(data);
|
||||
}
|
||||
}
|
||||
|
||||
// No need for inference, let Arrow create it
|
||||
if (type instanceof Int) {
|
||||
if (DataType.isInt(type) && type.bitWidth === 64) {
|
||||
@@ -875,7 +899,12 @@ async function applyEmbeddingsFromMetadata(
|
||||
for (const field of schema.fields) {
|
||||
if (!(field.name in columns)) {
|
||||
const nullValues = new Array(table.numRows).fill(null);
|
||||
columns[field.name] = makeVector(nullValues, field.type);
|
||||
columns[field.name] = makeVector(
|
||||
nullValues,
|
||||
field.type,
|
||||
undefined,
|
||||
field.nullable,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -939,7 +968,12 @@ async function applyEmbeddings<T>(
|
||||
} else if (schema != null) {
|
||||
const destField = schema.fields.find((f) => f.name === destColumn);
|
||||
if (destField != null) {
|
||||
newColumns[destColumn] = makeVector([], destField.type);
|
||||
newColumns[destColumn] = makeVector(
|
||||
[],
|
||||
destField.type,
|
||||
undefined,
|
||||
destField.nullable,
|
||||
);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,
|
||||
@@ -1251,19 +1285,36 @@ function validateSchemaEmbeddings(
|
||||
if (isFixedSizeList(field.type)) {
|
||||
field = sanitizeField(field);
|
||||
if (data.length !== 0 && data?.[0]?.[field.name] === undefined) {
|
||||
// Check if there's an embedding function registered for this field
|
||||
let hasEmbeddingFunction = false;
|
||||
|
||||
// Check schema metadata for embedding functions
|
||||
if (schema.metadata.has("embedding_functions")) {
|
||||
const embeddings = JSON.parse(
|
||||
schema.metadata.get("embedding_functions")!,
|
||||
);
|
||||
if (
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
|
||||
embeddings.find((f: any) => f["vectorColumn"] === field.name) ===
|
||||
undefined
|
||||
) {
|
||||
// biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f`
|
||||
if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) {
|
||||
hasEmbeddingFunction = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check passed embedding function parameter
|
||||
if (embeddings && embeddings.vectorColumn === field.name) {
|
||||
hasEmbeddingFunction = true;
|
||||
}
|
||||
|
||||
// If the field is nullable AND there's no embedding function, allow undefined/omitted values
|
||||
if (field.nullable && !hasEmbeddingFunction) {
|
||||
fields.push(field);
|
||||
} else {
|
||||
// Either not nullable OR has embedding function - require explicit values
|
||||
if (hasEmbeddingFunction) {
|
||||
// Don't add to missingEmbeddingFields since this is expected to be filled by embedding function
|
||||
fields.push(field);
|
||||
} else {
|
||||
missingEmbeddingFields.push(field);
|
||||
}
|
||||
} else {
|
||||
missingEmbeddingFields.push(field);
|
||||
}
|
||||
} else {
|
||||
fields.push(field);
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
import {
|
||||
Data,
|
||||
Schema,
|
||||
SchemaLike,
|
||||
TableLike,
|
||||
fromTableToStreamBuffer,
|
||||
@@ -159,17 +158,33 @@ export abstract class Connection {
|
||||
*
|
||||
* Tables will be returned in lexicographical order.
|
||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||
* paging / start point
|
||||
* paging / start point (backwards compatibility)
|
||||
*
|
||||
*/
|
||||
abstract tableNames(options?: Partial<TableNamesOptions>): Promise<string[]>;
|
||||
/**
|
||||
* List all the table names in this database.
|
||||
*
|
||||
* Tables will be returned in lexicographical order.
|
||||
* @param {string[]} namespace - The namespace to list tables from (defaults to root namespace)
|
||||
* @param {Partial<TableNamesOptions>} options - options to control the
|
||||
* paging / start point
|
||||
*
|
||||
*/
|
||||
abstract tableNames(
|
||||
namespace?: string[],
|
||||
options?: Partial<TableNamesOptions>,
|
||||
): Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Open a table in the database.
|
||||
* @param {string} name - The name of the table
|
||||
* @param {string[]} namespace - The namespace of the table (defaults to root namespace)
|
||||
* @param {Partial<OpenTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract openTable(
|
||||
name: string,
|
||||
namespace?: string[],
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
@@ -178,6 +193,7 @@ export abstract class Connection {
|
||||
* @param {object} options - The options object.
|
||||
* @param {string} options.name - The name of the table.
|
||||
* @param {Data} options.data - Non-empty Array of Records to be inserted into the table
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
*
|
||||
*/
|
||||
abstract createTable(
|
||||
@@ -185,40 +201,99 @@ export abstract class Connection {
|
||||
name: string;
|
||||
data: Data;
|
||||
} & Partial<CreateTableOptions>,
|
||||
namespace?: string[],
|
||||
): Promise<Table>;
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
||||
* to be inserted into the table
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options (backwards compatibility)
|
||||
*/
|
||||
abstract createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | TableLike,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Record<string, unknown>[] | TableLike} data - Non-empty Array of Records
|
||||
* to be inserted into the table
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract createTable(
|
||||
name: string,
|
||||
data: Record<string, unknown>[] | TableLike,
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Creates a new empty Table
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Schema} schema - The schema of the table
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options (backwards compatibility)
|
||||
*/
|
||||
abstract createEmptyTable(
|
||||
name: string,
|
||||
schema: import("./arrow").SchemaLike,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
/**
|
||||
* Creates a new empty Table
|
||||
* @param {string} name - The name of the table.
|
||||
* @param {Schema} schema - The schema of the table
|
||||
* @param {string[]} namespace - The namespace to create the table in (defaults to root namespace)
|
||||
* @param {Partial<CreateTableOptions>} options - Additional options
|
||||
*/
|
||||
abstract createEmptyTable(
|
||||
name: string,
|
||||
schema: import("./arrow").SchemaLike,
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table>;
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param {string} name The name of the table to drop.
|
||||
* @param {string[]} namespace The namespace of the table (defaults to root namespace).
|
||||
*/
|
||||
abstract dropTable(name: string): Promise<void>;
|
||||
abstract dropTable(name: string, namespace?: string[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Drop all tables in the database.
|
||||
* @param {string[]} namespace The namespace to drop tables from (defaults to root namespace).
|
||||
*/
|
||||
abstract dropAllTables(): Promise<void>;
|
||||
abstract dropAllTables(namespace?: string[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Clone a table from a source table.
|
||||
*
|
||||
* A shallow clone creates a new table that shares the underlying data files
|
||||
* with the source table but has its own independent manifest. This allows
|
||||
* both the source and cloned tables to evolve independently while initially
|
||||
* sharing the same data, deletion, and index files.
|
||||
*
|
||||
* @param {string} targetTableName - The name of the target table to create.
|
||||
* @param {string} sourceUri - The URI of the source table to clone from.
|
||||
* @param {object} options - Clone options.
|
||||
* @param {string[]} options.targetNamespace - The namespace for the target table (defaults to root namespace).
|
||||
* @param {number} options.sourceVersion - The version of the source table to clone.
|
||||
* @param {string} options.sourceTag - The tag of the source table to clone.
|
||||
* @param {boolean} options.isShallow - Whether to perform a shallow clone (defaults to true).
|
||||
*/
|
||||
abstract cloneTable(
|
||||
targetTableName: string,
|
||||
sourceUri: string,
|
||||
options?: {
|
||||
targetNamespace?: string[];
|
||||
sourceVersion?: number;
|
||||
sourceTag?: string;
|
||||
isShallow?: boolean;
|
||||
},
|
||||
): Promise<Table>;
|
||||
}
|
||||
|
||||
/** @hideconstructor */
|
||||
@@ -243,16 +318,39 @@ export class LocalConnection extends Connection {
|
||||
return this.inner.display();
|
||||
}
|
||||
|
||||
async tableNames(options?: Partial<TableNamesOptions>): Promise<string[]> {
|
||||
return this.inner.tableNames(options?.startAfter, options?.limit);
|
||||
async tableNames(
|
||||
namespaceOrOptions?: string[] | Partial<TableNamesOptions>,
|
||||
options?: Partial<TableNamesOptions>,
|
||||
): Promise<string[]> {
|
||||
// Detect if first argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let tableNamesOptions: Partial<TableNamesOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// First argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
tableNamesOptions = options;
|
||||
} else {
|
||||
// First argument is options object (backwards compatibility)
|
||||
namespace = undefined;
|
||||
tableNamesOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
return this.inner.tableNames(
|
||||
namespace ?? [],
|
||||
tableNamesOptions?.startAfter,
|
||||
tableNamesOptions?.limit,
|
||||
);
|
||||
}
|
||||
|
||||
async openTable(
|
||||
name: string,
|
||||
namespace?: string[],
|
||||
options?: Partial<OpenTableOptions>,
|
||||
): Promise<Table> {
|
||||
const innerTable = await this.inner.openTable(
|
||||
name,
|
||||
namespace ?? [],
|
||||
cleanseStorageOptions(options?.storageOptions),
|
||||
options?.indexCacheSize,
|
||||
);
|
||||
@@ -260,6 +358,28 @@ export class LocalConnection extends Connection {
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
|
||||
async cloneTable(
|
||||
targetTableName: string,
|
||||
sourceUri: string,
|
||||
options?: {
|
||||
targetNamespace?: string[];
|
||||
sourceVersion?: number;
|
||||
sourceTag?: string;
|
||||
isShallow?: boolean;
|
||||
},
|
||||
): Promise<Table> {
|
||||
const innerTable = await this.inner.cloneTable(
|
||||
targetTableName,
|
||||
sourceUri,
|
||||
options?.targetNamespace ?? [],
|
||||
options?.sourceVersion ?? null,
|
||||
options?.sourceTag ?? null,
|
||||
options?.isShallow ?? true,
|
||||
);
|
||||
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
|
||||
private getStorageOptions(
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Record<string, string> | undefined {
|
||||
@@ -286,14 +406,44 @@ export class LocalConnection extends Connection {
|
||||
nameOrOptions:
|
||||
| string
|
||||
| ({ name: string; data: Data } & Partial<CreateTableOptions>),
|
||||
data?: Record<string, unknown>[] | TableLike,
|
||||
dataOrNamespace?: Record<string, unknown>[] | TableLike | string[],
|
||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
if (typeof nameOrOptions !== "string" && "name" in nameOrOptions) {
|
||||
const { name, data, ...options } = nameOrOptions;
|
||||
|
||||
return this.createTable(name, data, options);
|
||||
// First overload: createTable(options, namespace?)
|
||||
const { name, data, ...createOptions } = nameOrOptions;
|
||||
const namespace = dataOrNamespace as string[] | undefined;
|
||||
return this._createTableImpl(name, data, namespace, createOptions);
|
||||
}
|
||||
|
||||
// Second overload: createTable(name, data, namespace?, options?)
|
||||
const name = nameOrOptions;
|
||||
const data = dataOrNamespace as Record<string, unknown>[] | TableLike;
|
||||
|
||||
// Detect if third argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// Third argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
createOptions = options;
|
||||
} else {
|
||||
// Third argument is options object (backwards compatibility)
|
||||
namespace = undefined;
|
||||
createOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
return this._createTableImpl(name, data, namespace, createOptions);
|
||||
}
|
||||
|
||||
private async _createTableImpl(
|
||||
name: string,
|
||||
data: Data,
|
||||
namespace?: string[],
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
if (data === undefined) {
|
||||
throw new Error("data is required");
|
||||
}
|
||||
@@ -302,9 +452,10 @@ export class LocalConnection extends Connection {
|
||||
const storageOptions = this.getStorageOptions(options);
|
||||
|
||||
const innerTable = await this.inner.createTable(
|
||||
nameOrOptions,
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
namespace ?? [],
|
||||
storageOptions,
|
||||
);
|
||||
|
||||
@@ -314,39 +465,55 @@ export class LocalConnection extends Connection {
|
||||
async createEmptyTable(
|
||||
name: string,
|
||||
schema: import("./arrow").SchemaLike,
|
||||
namespaceOrOptions?: string[] | Partial<CreateTableOptions>,
|
||||
options?: Partial<CreateTableOptions>,
|
||||
): Promise<Table> {
|
||||
let mode: string = options?.mode ?? "create";
|
||||
const existOk = options?.existOk ?? false;
|
||||
// Detect if third argument is namespace array or options object
|
||||
let namespace: string[] | undefined;
|
||||
let createOptions: Partial<CreateTableOptions> | undefined;
|
||||
|
||||
if (Array.isArray(namespaceOrOptions)) {
|
||||
// Third argument is namespace array
|
||||
namespace = namespaceOrOptions;
|
||||
createOptions = options;
|
||||
} else {
|
||||
// Third argument is options object (backwards compatibility)
|
||||
namespace = undefined;
|
||||
createOptions = namespaceOrOptions;
|
||||
}
|
||||
|
||||
let mode: string = createOptions?.mode ?? "create";
|
||||
const existOk = createOptions?.existOk ?? false;
|
||||
|
||||
if (mode === "create" && existOk) {
|
||||
mode = "exist_ok";
|
||||
}
|
||||
let metadata: Map<string, string> | undefined = undefined;
|
||||
if (options?.embeddingFunction !== undefined) {
|
||||
const embeddingFunction = options.embeddingFunction;
|
||||
if (createOptions?.embeddingFunction !== undefined) {
|
||||
const embeddingFunction = createOptions.embeddingFunction;
|
||||
const registry = getRegistry();
|
||||
metadata = registry.getTableMetadata([embeddingFunction]);
|
||||
}
|
||||
|
||||
const storageOptions = this.getStorageOptions(options);
|
||||
const storageOptions = this.getStorageOptions(createOptions);
|
||||
const table = makeEmptyTable(schema, metadata);
|
||||
const buf = await fromTableToBuffer(table);
|
||||
const innerTable = await this.inner.createEmptyTable(
|
||||
name,
|
||||
buf,
|
||||
mode,
|
||||
namespace ?? [],
|
||||
storageOptions,
|
||||
);
|
||||
return new LocalTable(innerTable);
|
||||
}
|
||||
|
||||
async dropTable(name: string): Promise<void> {
|
||||
return this.inner.dropTable(name);
|
||||
async dropTable(name: string, namespace?: string[]): Promise<void> {
|
||||
return this.inner.dropTable(name, namespace ?? []);
|
||||
}
|
||||
|
||||
async dropAllTables(): Promise<void> {
|
||||
return this.inner.dropAllTables();
|
||||
async dropAllTables(namespace?: string[]): Promise<void> {
|
||||
return this.inner.dropAllTables(namespace ?? []);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
253
nodejs/lancedb/header.ts
Normal file
253
nodejs/lancedb/header.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
/**
|
||||
* Header providers for LanceDB remote connections.
|
||||
*
|
||||
* This module provides a flexible header management framework for LanceDB remote
|
||||
* connections, allowing users to implement custom header strategies for
|
||||
* authentication, request tracking, custom metadata, or any other header-based
|
||||
* requirements.
|
||||
*
|
||||
* @module header
|
||||
*/
|
||||
|
||||
/**
|
||||
* Abstract base class for providing custom headers for each request.
|
||||
*
|
||||
* Users can implement this interface to provide dynamic headers for various purposes
|
||||
* such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
|
||||
* custom metadata, or any other header-based requirements. The provider is called
|
||||
* before each request to ensure fresh header values are always used.
|
||||
*
|
||||
* @example
|
||||
* Simple JWT token provider:
|
||||
* ```typescript
|
||||
* class JWTProvider extends HeaderProvider {
|
||||
* constructor(private token: string) {
|
||||
* super();
|
||||
* }
|
||||
*
|
||||
* getHeaders(): Record<string, string> {
|
||||
* return { authorization: `Bearer ${this.token}` };
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @example
|
||||
* Provider with request tracking:
|
||||
* ```typescript
|
||||
* class RequestTrackingProvider extends HeaderProvider {
|
||||
* constructor(private sessionId: string) {
|
||||
* super();
|
||||
* }
|
||||
*
|
||||
* getHeaders(): Record<string, string> {
|
||||
* return {
|
||||
* "X-Session-Id": this.sessionId,
|
||||
* "X-Request-Id": `req-${Date.now()}`
|
||||
* };
|
||||
* }
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export abstract class HeaderProvider {
|
||||
/**
|
||||
* Get the latest headers to be added to requests.
|
||||
*
|
||||
* This method is called before each request to the remote LanceDB server.
|
||||
* Implementations should return headers that will be merged with existing headers.
|
||||
*
|
||||
* @returns Dictionary of header names to values to add to the request.
|
||||
* @throws If unable to fetch headers, the exception will be propagated and the request will fail.
|
||||
*/
|
||||
abstract getHeaders(): Record<string, string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Example implementation: A simple header provider that returns static headers.
|
||||
*
|
||||
* This is an example implementation showing how to create a HeaderProvider
|
||||
* for cases where headers don't change during the session.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const provider = new StaticHeaderProvider({
|
||||
* authorization: "Bearer my-token",
|
||||
* "X-Custom-Header": "custom-value"
|
||||
* });
|
||||
* const headers = provider.getHeaders();
|
||||
* // Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
|
||||
* ```
|
||||
*/
|
||||
export class StaticHeaderProvider extends HeaderProvider {
|
||||
private _headers: Record<string, string>;
|
||||
|
||||
/**
|
||||
* Initialize with static headers.
|
||||
* @param headers - Headers to return for every request.
|
||||
*/
|
||||
constructor(headers: Record<string, string>) {
|
||||
super();
|
||||
this._headers = { ...headers };
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the static headers.
|
||||
* @returns Copy of the static headers.
|
||||
*/
|
||||
getHeaders(): Record<string, string> {
|
||||
return { ...this._headers };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Token response from OAuth provider.
|
||||
* @public
|
||||
*/
|
||||
export interface TokenResponse {
|
||||
accessToken: string;
|
||||
expiresIn?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Example implementation: OAuth token provider with automatic refresh.
|
||||
*
|
||||
* This is an example implementation showing how to manage OAuth tokens
|
||||
* with automatic refresh when they expire.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* async function fetchToken(): Promise<TokenResponse> {
|
||||
* const response = await fetch("https://oauth.example.com/token", {
|
||||
* method: "POST",
|
||||
* body: JSON.stringify({
|
||||
* grant_type: "client_credentials",
|
||||
* client_id: "your-client-id",
|
||||
* client_secret: "your-client-secret"
|
||||
* }),
|
||||
* headers: { "Content-Type": "application/json" }
|
||||
* });
|
||||
* const data = await response.json();
|
||||
* return {
|
||||
* accessToken: data.access_token,
|
||||
* expiresIn: data.expires_in
|
||||
* };
|
||||
* }
|
||||
*
|
||||
* const provider = new OAuthHeaderProvider(fetchToken);
|
||||
* const headers = provider.getHeaders();
|
||||
* // Returns: {"authorization": "Bearer <your-token>"}
|
||||
* ```
|
||||
*/
|
||||
export class OAuthHeaderProvider extends HeaderProvider {
|
||||
private _tokenFetcher: () => Promise<TokenResponse> | TokenResponse;
|
||||
private _refreshBufferSeconds: number;
|
||||
private _currentToken: string | null = null;
|
||||
private _tokenExpiresAt: number | null = null;
|
||||
private _refreshPromise: Promise<void> | null = null;
|
||||
|
||||
/**
|
||||
* Initialize the OAuth provider.
|
||||
* @param tokenFetcher - Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
|
||||
* @param refreshBufferSeconds - Seconds before expiry to refresh token. Default 300 (5 minutes).
|
||||
*/
|
||||
constructor(
|
||||
tokenFetcher: () => Promise<TokenResponse> | TokenResponse,
|
||||
refreshBufferSeconds: number = 300,
|
||||
) {
|
||||
super();
|
||||
this._tokenFetcher = tokenFetcher;
|
||||
this._refreshBufferSeconds = refreshBufferSeconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if token needs refresh.
|
||||
*/
|
||||
private _needsRefresh(): boolean {
|
||||
if (this._currentToken === null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (this._tokenExpiresAt === null) {
|
||||
// No expiration info, assume token is valid
|
||||
return false;
|
||||
}
|
||||
|
||||
// Refresh if we're within the buffer time of expiration
|
||||
const now = Date.now() / 1000;
|
||||
return now >= this._tokenExpiresAt - this._refreshBufferSeconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh the token if it's expired or close to expiring.
|
||||
*/
|
||||
private async _refreshTokenIfNeeded(): Promise<void> {
|
||||
if (!this._needsRefresh()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If refresh is already in progress, wait for it
|
||||
if (this._refreshPromise) {
|
||||
await this._refreshPromise;
|
||||
return;
|
||||
}
|
||||
|
||||
// Start refresh
|
||||
this._refreshPromise = (async () => {
|
||||
try {
|
||||
const tokenData = await this._tokenFetcher();
|
||||
|
||||
this._currentToken = tokenData.accessToken;
|
||||
if (!this._currentToken) {
|
||||
throw new Error("Token fetcher did not return 'accessToken'");
|
||||
}
|
||||
|
||||
// Set expiration if provided
|
||||
if (tokenData.expiresIn) {
|
||||
this._tokenExpiresAt = Date.now() / 1000 + tokenData.expiresIn;
|
||||
} else {
|
||||
// Token doesn't expire or expiration unknown
|
||||
this._tokenExpiresAt = null;
|
||||
}
|
||||
} finally {
|
||||
this._refreshPromise = null;
|
||||
}
|
||||
})();
|
||||
|
||||
await this._refreshPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get OAuth headers, refreshing token if needed.
|
||||
* Note: This is synchronous for now as the Rust implementation expects sync.
|
||||
* In a real implementation, this would need to handle async properly.
|
||||
* @returns Headers with Bearer token authorization.
|
||||
* @throws If unable to fetch or refresh token.
|
||||
*/
|
||||
getHeaders(): Record<string, string> {
|
||||
// For simplicity in this example, we assume the token is already fetched
|
||||
// In a real implementation, this would need to handle the async nature properly
|
||||
if (!this._currentToken && !this._refreshPromise) {
|
||||
// Synchronously trigger refresh - this is a limitation of the current implementation
|
||||
throw new Error(
|
||||
"Token not initialized. Call refreshToken() first or use async initialization.",
|
||||
);
|
||||
}
|
||||
|
||||
if (!this._currentToken) {
|
||||
throw new Error("Failed to obtain OAuth token");
|
||||
}
|
||||
|
||||
return { authorization: `Bearer ${this._currentToken}` };
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually refresh the token.
|
||||
* Call this before using getHeaders() to ensure token is available.
|
||||
*/
|
||||
async refreshToken(): Promise<void> {
|
||||
this._currentToken = null; // Force refresh
|
||||
await this._refreshTokenIfNeeded();
|
||||
}
|
||||
}
|
||||
@@ -10,9 +10,15 @@ import {
|
||||
import {
|
||||
ConnectionOptions,
|
||||
Connection as LanceDbConnection,
|
||||
JsHeaderProvider as NativeJsHeaderProvider,
|
||||
Session,
|
||||
} from "./native.js";
|
||||
|
||||
import { HeaderProvider } from "./header";
|
||||
|
||||
// Re-export native header provider for use with connectWithHeaderProvider
|
||||
export { JsHeaderProvider as NativeJsHeaderProvider } from "./native.js";
|
||||
|
||||
export {
|
||||
AddColumnsSql,
|
||||
ConnectionOptions,
|
||||
@@ -21,6 +27,7 @@ export {
|
||||
ClientConfig,
|
||||
TimeoutConfig,
|
||||
RetryConfig,
|
||||
TlsConfig,
|
||||
OptimizeStats,
|
||||
CompactionStats,
|
||||
RemovalStats,
|
||||
@@ -36,6 +43,11 @@ export {
|
||||
DeleteResult,
|
||||
DropColumnsResult,
|
||||
UpdateResult,
|
||||
SplitCalculatedOptions,
|
||||
SplitRandomOptions,
|
||||
SplitHashOptions,
|
||||
SplitSequentialOptions,
|
||||
ShuffleOptions,
|
||||
} from "./native.js";
|
||||
|
||||
export {
|
||||
@@ -59,6 +71,7 @@ export {
|
||||
Query,
|
||||
QueryBase,
|
||||
VectorQuery,
|
||||
TakeQuery,
|
||||
QueryExecutionOptions,
|
||||
FullTextSearchOptions,
|
||||
RecordBatchIterator,
|
||||
@@ -77,6 +90,7 @@ export {
|
||||
Index,
|
||||
IndexOptions,
|
||||
IvfPqOptions,
|
||||
IvfRqOptions,
|
||||
IvfFlatOptions,
|
||||
HnswPqOptions,
|
||||
HnswSqOptions,
|
||||
@@ -92,9 +106,17 @@ export {
|
||||
ColumnAlteration,
|
||||
} from "./table";
|
||||
|
||||
export {
|
||||
HeaderProvider,
|
||||
StaticHeaderProvider,
|
||||
OAuthHeaderProvider,
|
||||
TokenResponse,
|
||||
} from "./header";
|
||||
|
||||
export { MergeInsertBuilder, WriteExecutionOptions } from "./merge";
|
||||
|
||||
export * as embedding from "./embedding";
|
||||
export { permutationBuilder, PermutationBuilder } from "./permutation";
|
||||
export * as rerankers from "./rerankers";
|
||||
export {
|
||||
SchemaLike,
|
||||
@@ -130,11 +152,27 @@ export { IntoSql, packBits } from "./util";
|
||||
* {storageOptions: {timeout: "60s"}
|
||||
* });
|
||||
* ```
|
||||
* @example
|
||||
* Using with a header provider for per-request authentication:
|
||||
* ```ts
|
||||
* const provider = new StaticHeaderProvider({
|
||||
* "X-API-Key": "my-key"
|
||||
* });
|
||||
* const conn = await connectWithHeaderProvider(
|
||||
* "db://host:port",
|
||||
* options,
|
||||
* provider
|
||||
* );
|
||||
* ```
|
||||
*/
|
||||
export async function connect(
|
||||
uri: string,
|
||||
options?: Partial<ConnectionOptions>,
|
||||
session?: Session,
|
||||
headerProvider?:
|
||||
| HeaderProvider
|
||||
| (() => Record<string, string>)
|
||||
| (() => Promise<Record<string, string>>),
|
||||
): Promise<Connection>;
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI.
|
||||
@@ -168,18 +206,58 @@ export async function connect(
|
||||
): Promise<Connection>;
|
||||
export async function connect(
|
||||
uriOrOptions: string | (Partial<ConnectionOptions> & { uri: string }),
|
||||
options?: Partial<ConnectionOptions>,
|
||||
optionsOrSession?: Partial<ConnectionOptions> | Session,
|
||||
sessionOrHeaderProvider?:
|
||||
| Session
|
||||
| HeaderProvider
|
||||
| (() => Record<string, string>)
|
||||
| (() => Promise<Record<string, string>>),
|
||||
headerProvider?:
|
||||
| HeaderProvider
|
||||
| (() => Record<string, string>)
|
||||
| (() => Promise<Record<string, string>>),
|
||||
): Promise<Connection> {
|
||||
let uri: string | undefined;
|
||||
let finalOptions: Partial<ConnectionOptions> = {};
|
||||
let finalHeaderProvider:
|
||||
| HeaderProvider
|
||||
| (() => Record<string, string>)
|
||||
| (() => Promise<Record<string, string>>)
|
||||
| undefined;
|
||||
|
||||
if (typeof uriOrOptions !== "string") {
|
||||
// First overload: connect(options)
|
||||
const { uri: uri_, ...opts } = uriOrOptions;
|
||||
uri = uri_;
|
||||
finalOptions = opts;
|
||||
} else {
|
||||
// Second overload: connect(uri, options?, session?, headerProvider?)
|
||||
uri = uriOrOptions;
|
||||
finalOptions = options || {};
|
||||
|
||||
// Handle optionsOrSession parameter
|
||||
if (optionsOrSession && "inner" in optionsOrSession) {
|
||||
// Second param is session, so no options provided
|
||||
finalOptions = {};
|
||||
} else {
|
||||
// Second param is options
|
||||
finalOptions = (optionsOrSession as Partial<ConnectionOptions>) || {};
|
||||
}
|
||||
|
||||
// Handle sessionOrHeaderProvider parameter
|
||||
if (
|
||||
sessionOrHeaderProvider &&
|
||||
(typeof sessionOrHeaderProvider === "function" ||
|
||||
"getHeaders" in sessionOrHeaderProvider)
|
||||
) {
|
||||
// Third param is header provider
|
||||
finalHeaderProvider = sessionOrHeaderProvider as
|
||||
| HeaderProvider
|
||||
| (() => Record<string, string>)
|
||||
| (() => Promise<Record<string, string>>);
|
||||
} else {
|
||||
// Third param is session, header provider is fourth param
|
||||
finalHeaderProvider = headerProvider;
|
||||
}
|
||||
}
|
||||
|
||||
if (!uri) {
|
||||
@@ -190,6 +268,26 @@ export async function connect(
|
||||
(<ConnectionOptions>finalOptions).storageOptions = cleanseStorageOptions(
|
||||
(<ConnectionOptions>finalOptions).storageOptions,
|
||||
);
|
||||
const nativeConn = await LanceDbConnection.new(uri, finalOptions);
|
||||
|
||||
// Create native header provider if one was provided
|
||||
let nativeProvider: NativeJsHeaderProvider | undefined;
|
||||
if (finalHeaderProvider) {
|
||||
if (typeof finalHeaderProvider === "function") {
|
||||
nativeProvider = new NativeJsHeaderProvider(finalHeaderProvider);
|
||||
} else if (
|
||||
finalHeaderProvider &&
|
||||
typeof finalHeaderProvider.getHeaders === "function"
|
||||
) {
|
||||
nativeProvider = new NativeJsHeaderProvider(async () =>
|
||||
finalHeaderProvider.getHeaders(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const nativeConn = await LanceDbConnection.new(
|
||||
uri,
|
||||
finalOptions,
|
||||
nativeProvider,
|
||||
);
|
||||
return new LocalConnection(nativeConn);
|
||||
}
|
||||
|
||||
@@ -112,6 +112,77 @@ export interface IvfPqOptions {
|
||||
sampleRate?: number;
|
||||
}
|
||||
|
||||
export interface IvfRqOptions {
|
||||
/**
|
||||
* The number of IVF partitions to create.
|
||||
*
|
||||
* This value should generally scale with the number of rows in the dataset.
|
||||
* By default the number of partitions is the square root of the number of
|
||||
* rows.
|
||||
*
|
||||
* If this value is too large then the first part of the search (picking the
|
||||
* right partition) will be slow. If this value is too small then the second
|
||||
* part of the search (searching within a partition) will be slow.
|
||||
*/
|
||||
numPartitions?: number;
|
||||
|
||||
/**
|
||||
* Number of bits per dimension for residual quantization.
|
||||
*
|
||||
* This value controls how much each residual component is compressed. The more
|
||||
* bits, the more accurate the index will be but the slower search. Typical values
|
||||
* are small integers; the default is 1 bit per dimension.
|
||||
*/
|
||||
numBits?: number;
|
||||
|
||||
/**
|
||||
* Distance type to use to build the index.
|
||||
*
|
||||
* Default value is "l2".
|
||||
*
|
||||
* This is used when training the index to calculate the IVF partitions
|
||||
* (vectors are grouped in partitions with similar vectors according to this
|
||||
* distance type) and during quantization.
|
||||
*
|
||||
* The distance type used to train an index MUST match the distance type used
|
||||
* to search the index. Failure to do so will yield inaccurate results.
|
||||
*
|
||||
* The following distance types are available:
|
||||
*
|
||||
* "l2" - Euclidean distance.
|
||||
* "cosine" - Cosine distance.
|
||||
* "dot" - Dot product.
|
||||
*/
|
||||
distanceType?: "l2" | "cosine" | "dot";
|
||||
|
||||
/**
|
||||
* Max iterations to train IVF kmeans.
|
||||
*
|
||||
* When training an IVF index we use kmeans to calculate the partitions. This parameter
|
||||
* controls how many iterations of kmeans to run.
|
||||
*
|
||||
* The default value is 50.
|
||||
*/
|
||||
maxIterations?: number;
|
||||
|
||||
/**
|
||||
* The number of vectors, per partition, to sample when training IVF kmeans.
|
||||
*
|
||||
* When an IVF index is trained, we need to calculate partitions. These are groups
|
||||
* of vectors that are similar to each other. To do this we use an algorithm called kmeans.
|
||||
*
|
||||
* Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
|
||||
* random sample of the data. This parameter controls the size of the sample. The total
|
||||
* number of vectors used to train the index is `sample_rate * num_partitions`.
|
||||
*
|
||||
* Increasing this value might improve the quality of the index but in most cases the
|
||||
* default should be sufficient.
|
||||
*
|
||||
* The default value is 256.
|
||||
*/
|
||||
sampleRate?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Options to create an `HNSW_PQ` index
|
||||
*/
|
||||
@@ -523,6 +594,35 @@ export class Index {
|
||||
options?.distanceType,
|
||||
options?.numPartitions,
|
||||
options?.numSubVectors,
|
||||
options?.numBits,
|
||||
options?.maxIterations,
|
||||
options?.sampleRate,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an IvfRq index
|
||||
*
|
||||
* IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
|
||||
* and organizes them into IVF partitions.
|
||||
*
|
||||
* The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
|
||||
* The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
|
||||
* between index size (and thus search speed) and index accuracy.
|
||||
*
|
||||
* The partitioning process is called IVF and the `num_partitions` parameter controls how
|
||||
* many groups to create.
|
||||
*
|
||||
* Note that training an IVF RQ index on a large dataset is a slow operation and
|
||||
* currently is also a memory intensive operation.
|
||||
*/
|
||||
static ivfRq(options?: Partial<IvfRqOptions>) {
|
||||
return new Index(
|
||||
LanceDbIndex.ivfRq(
|
||||
options?.distanceType,
|
||||
options?.numPartitions,
|
||||
options?.numBits,
|
||||
options?.maxIterations,
|
||||
options?.sampleRate,
|
||||
),
|
||||
@@ -700,5 +800,27 @@ export interface IndexOptions {
|
||||
*/
|
||||
replace?: boolean;
|
||||
|
||||
/**
|
||||
* Timeout in seconds to wait for index creation to complete.
|
||||
*
|
||||
* If not specified, the method will return immediately after starting the index creation.
|
||||
*/
|
||||
waitTimeoutSeconds?: number;
|
||||
|
||||
/**
|
||||
* Optional custom name for the index.
|
||||
*
|
||||
* If not provided, a default name will be generated based on the column name.
|
||||
*/
|
||||
name?: string;
|
||||
|
||||
/**
|
||||
* Whether to train the index with existing data.
|
||||
*
|
||||
* If true (default), the index will be trained with existing data in the table.
|
||||
* If false, the index will be created empty and populated as new data is added.
|
||||
*
|
||||
* Note: This option is only supported for scalar indices. Vector indices always train.
|
||||
*/
|
||||
train?: boolean;
|
||||
}
|
||||
|
||||
@@ -70,6 +70,23 @@ export class MergeInsertBuilder {
|
||||
this.#schema,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Controls whether to use indexes for the merge operation.
|
||||
*
|
||||
* When set to `true` (the default), the operation will use an index if available
|
||||
* on the join key for improved performance. When set to `false`, it forces a full
|
||||
* table scan even if an index exists. This can be useful for benchmarking or when
|
||||
* the query optimizer chooses a suboptimal path.
|
||||
*
|
||||
* @param useIndex - Whether to use indices for the merge operation. Defaults to `true`.
|
||||
*/
|
||||
useIndex(useIndex: boolean): MergeInsertBuilder {
|
||||
return new MergeInsertBuilder(
|
||||
this.#native.useIndex(useIndex),
|
||||
this.#schema,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Executes the merge insert operation
|
||||
*
|
||||
|
||||
202
nodejs/lancedb/permutation.ts
Normal file
202
nodejs/lancedb/permutation.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
import { Connection, LocalConnection } from "./connection.js";
|
||||
import {
|
||||
PermutationBuilder as NativePermutationBuilder,
|
||||
Table as NativeTable,
|
||||
ShuffleOptions,
|
||||
SplitCalculatedOptions,
|
||||
SplitHashOptions,
|
||||
SplitRandomOptions,
|
||||
SplitSequentialOptions,
|
||||
permutationBuilder as nativePermutationBuilder,
|
||||
} from "./native.js";
|
||||
import { LocalTable, Table } from "./table";
|
||||
|
||||
/**
|
||||
* A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
|
||||
*
|
||||
* This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
|
||||
* offering methods to configure data splits, shuffling, and filtering before executing
|
||||
* the permutation to create a new table.
|
||||
*/
|
||||
export class PermutationBuilder {
|
||||
private inner: NativePermutationBuilder;
|
||||
|
||||
/**
|
||||
* @hidden
|
||||
*/
|
||||
constructor(inner: NativePermutationBuilder) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the permutation to be persisted.
|
||||
*
|
||||
* @param connection - The connection to persist the permutation to
|
||||
* @param tableName - The name of the table to create
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.persist(connection, "permutation_table");
|
||||
* ```
|
||||
*/
|
||||
persist(connection: Connection, tableName: string): PermutationBuilder {
|
||||
const localConnection = connection as LocalConnection;
|
||||
const newInner = this.inner.persist(localConnection.inner, tableName);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure random splits for the permutation.
|
||||
*
|
||||
* @param options - Configuration for random splitting
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* // Split by ratios
|
||||
* builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
|
||||
*
|
||||
* // Split by counts
|
||||
* builder.splitRandom({ counts: [1000, 500], seed: 42 });
|
||||
*
|
||||
* // Split with fixed size
|
||||
* builder.splitRandom({ fixed: 100, seed: 42 });
|
||||
* ```
|
||||
*/
|
||||
splitRandom(options: SplitRandomOptions): PermutationBuilder {
|
||||
const newInner = this.inner.splitRandom(options);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure hash-based splits for the permutation.
|
||||
*
|
||||
* @param options - Configuration for hash-based splitting
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.splitHash({
|
||||
* columns: ["user_id"],
|
||||
* splitWeights: [70, 30],
|
||||
* discardWeight: 0
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
splitHash(options: SplitHashOptions): PermutationBuilder {
|
||||
const newInner = this.inner.splitHash(options);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure sequential splits for the permutation.
|
||||
*
|
||||
* @param options - Configuration for sequential splitting
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* // Split by ratios
|
||||
* builder.splitSequential({ ratios: [0.8, 0.2] });
|
||||
*
|
||||
* // Split by counts
|
||||
* builder.splitSequential({ counts: [800, 200] });
|
||||
*
|
||||
* // Split with fixed size
|
||||
* builder.splitSequential({ fixed: 1000 });
|
||||
* ```
|
||||
*/
|
||||
splitSequential(options: SplitSequentialOptions): PermutationBuilder {
|
||||
const newInner = this.inner.splitSequential(options);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure calculated splits for the permutation.
|
||||
*
|
||||
* @param options - Configuration for calculated splitting
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.splitCalculated("user_id % 3");
|
||||
* ```
|
||||
*/
|
||||
splitCalculated(options: SplitCalculatedOptions): PermutationBuilder {
|
||||
const newInner = this.inner.splitCalculated(options);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure shuffling for the permutation.
|
||||
*
|
||||
* @param options - Configuration for shuffling
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* // Basic shuffle
|
||||
* builder.shuffle({ seed: 42 });
|
||||
*
|
||||
* // Shuffle with clump size
|
||||
* builder.shuffle({ seed: 42, clumpSize: 10 });
|
||||
* ```
|
||||
*/
|
||||
shuffle(options: ShuffleOptions): PermutationBuilder {
|
||||
const newInner = this.inner.shuffle(options);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure filtering for the permutation.
|
||||
*
|
||||
* @param filter - SQL filter expression
|
||||
* @returns A new PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* builder.filter("age > 18 AND status = 'active'");
|
||||
* ```
|
||||
*/
|
||||
filter(filter: string): PermutationBuilder {
|
||||
const newInner = this.inner.filter(filter);
|
||||
return new PermutationBuilder(newInner);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the permutation and create the destination table.
|
||||
*
|
||||
* @returns A Promise that resolves to the new Table instance
|
||||
* @example
|
||||
* ```ts
|
||||
* const permutationTable = await builder.execute();
|
||||
* console.log(`Created table: ${permutationTable.name}`);
|
||||
* ```
|
||||
*/
|
||||
async execute(): Promise<Table> {
|
||||
const nativeTable: NativeTable = await this.inner.execute();
|
||||
return new LocalTable(nativeTable);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a permutation builder for the given table.
|
||||
*
|
||||
* @param table - The source table to create a permutation from
|
||||
* @returns A PermutationBuilder instance
|
||||
* @example
|
||||
* ```ts
|
||||
* const builder = permutationBuilder(sourceTable, "training_data")
|
||||
* .splitRandom({ ratios: [0.8, 0.2], seed: 42 })
|
||||
* .shuffle({ seed: 123 });
|
||||
*
|
||||
* const trainingTable = await builder.execute();
|
||||
* ```
|
||||
*/
|
||||
export function permutationBuilder(table: Table): PermutationBuilder {
|
||||
// Extract the inner native table from the TypeScript wrapper
|
||||
const localTable = table as LocalTable;
|
||||
// Access inner through type assertion since it's private
|
||||
const nativeBuilder = nativePermutationBuilder(
|
||||
// biome-ignore lint/suspicious/noExplicitAny: need access to private variable
|
||||
(localTable as any).inner,
|
||||
);
|
||||
return new PermutationBuilder(nativeBuilder);
|
||||
}
|
||||
@@ -15,42 +15,33 @@ import {
|
||||
RecordBatchIterator as NativeBatchIterator,
|
||||
Query as NativeQuery,
|
||||
Table as NativeTable,
|
||||
TakeQuery as NativeTakeQuery,
|
||||
VectorQuery as NativeVectorQuery,
|
||||
} from "./native";
|
||||
import { Reranker } from "./rerankers";
|
||||
|
||||
export class RecordBatchIterator implements AsyncIterator<RecordBatch> {
|
||||
private promisedInner?: Promise<NativeBatchIterator>;
|
||||
private inner?: NativeBatchIterator;
|
||||
export async function* RecordBatchIterator(
|
||||
promisedInner: Promise<NativeBatchIterator>,
|
||||
) {
|
||||
const inner = await promisedInner;
|
||||
|
||||
constructor(promise?: Promise<NativeBatchIterator>) {
|
||||
// TODO: check promise reliably so we dont need to pass two arguments.
|
||||
this.promisedInner = promise;
|
||||
if (inner === undefined) {
|
||||
throw new Error("Invalid iterator state");
|
||||
}
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
async next(): Promise<IteratorResult<RecordBatch<any>>> {
|
||||
if (this.inner === undefined) {
|
||||
this.inner = await this.promisedInner;
|
||||
}
|
||||
if (this.inner === undefined) {
|
||||
throw new Error("Invalid iterator state state");
|
||||
}
|
||||
const n = await this.inner.next();
|
||||
if (n == null) {
|
||||
return Promise.resolve({ done: true, value: null });
|
||||
}
|
||||
const tbl = tableFromIPC(n);
|
||||
if (tbl.batches.length != 1) {
|
||||
for (let buffer = await inner.next(); buffer; buffer = await inner.next()) {
|
||||
const { batches } = tableFromIPC(buffer);
|
||||
|
||||
if (batches.length !== 1) {
|
||||
throw new Error("Expected only one batch");
|
||||
}
|
||||
return Promise.resolve({ done: false, value: tbl.batches[0] });
|
||||
|
||||
yield batches[0];
|
||||
}
|
||||
}
|
||||
/* eslint-enable */
|
||||
|
||||
class RecordBatchIterable<
|
||||
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
||||
NativeQueryType extends NativeQuery | NativeVectorQuery | NativeTakeQuery,
|
||||
> implements AsyncIterable<RecordBatch>
|
||||
{
|
||||
private inner: NativeQueryType;
|
||||
@@ -63,7 +54,7 @@ class RecordBatchIterable<
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>, any, undefined> {
|
||||
return new RecordBatchIterator(
|
||||
return RecordBatchIterator(
|
||||
this.inner.execute(this.options?.maxBatchLength, this.options?.timeoutMs),
|
||||
);
|
||||
}
|
||||
@@ -107,8 +98,9 @@ export interface FullTextSearchOptions {
|
||||
*
|
||||
* @hideconstructor
|
||||
*/
|
||||
export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
implements AsyncIterable<RecordBatch>
|
||||
export class QueryBase<
|
||||
NativeQueryType extends NativeQuery | NativeVectorQuery | NativeTakeQuery,
|
||||
> implements AsyncIterable<RecordBatch>
|
||||
{
|
||||
/**
|
||||
* @hidden
|
||||
@@ -133,56 +125,6 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
fn(this.inner);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
*
|
||||
* The filter should be supplied as an SQL query string. For example:
|
||||
* @example
|
||||
* x > 10
|
||||
* y > 0 AND y < 100
|
||||
* x > 5 OR y = 'test'
|
||||
*
|
||||
* Filtering performance can often be improved by creating a scalar index
|
||||
* on the filter column(s).
|
||||
*/
|
||||
where(predicate: string): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.onlyIf(predicate));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
* @see where
|
||||
* @deprecated Use `where` instead
|
||||
*/
|
||||
filter(predicate: string): this {
|
||||
return this.where(predicate);
|
||||
}
|
||||
|
||||
fullTextSearch(
|
||||
query: string | FullTextQuery,
|
||||
options?: Partial<FullTextSearchOptions>,
|
||||
): this {
|
||||
let columns: string[] | null = null;
|
||||
if (options) {
|
||||
if (typeof options.columns === "string") {
|
||||
columns = [options.columns];
|
||||
} else if (Array.isArray(options.columns)) {
|
||||
columns = options.columns;
|
||||
}
|
||||
}
|
||||
|
||||
this.doCall((inner: NativeQueryType) => {
|
||||
if (typeof query === "string") {
|
||||
inner.fullTextSearch({
|
||||
query: query,
|
||||
columns: columns,
|
||||
});
|
||||
} else {
|
||||
inner.fullTextSearch({ query: query.inner });
|
||||
}
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return only the specified columns.
|
||||
@@ -241,33 +183,6 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum number of results to return.
|
||||
*
|
||||
* By default, a plain search has no limit. If this method is not
|
||||
* called then every valid row from the table will be returned.
|
||||
*/
|
||||
limit(limit: number): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.limit(limit));
|
||||
return this;
|
||||
}
|
||||
|
||||
offset(offset: number): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.offset(offset));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip searching un-indexed data. This can make search faster, but will miss
|
||||
* any data that is not yet indexed.
|
||||
*
|
||||
* Use {@link Table#optimize} to index all un-indexed data.
|
||||
*/
|
||||
fastSearch(): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.fastSearch());
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to return the row id in the results.
|
||||
*
|
||||
@@ -306,10 +221,8 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
* single query)
|
||||
*
|
||||
*/
|
||||
protected execute(
|
||||
options?: Partial<QueryExecutionOptions>,
|
||||
): RecordBatchIterator {
|
||||
return new RecordBatchIterator(this.nativeExecute(options));
|
||||
protected execute(options?: Partial<QueryExecutionOptions>) {
|
||||
return RecordBatchIterator(this.nativeExecute(options));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -317,8 +230,7 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
*/
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
[Symbol.asyncIterator](): AsyncIterator<RecordBatch<any>> {
|
||||
const promise = this.nativeExecute();
|
||||
return new RecordBatchIterator(promise);
|
||||
return RecordBatchIterator(this.nativeExecute());
|
||||
}
|
||||
|
||||
/** Collect the results as an Arrow @see {@link ArrowTable}. */
|
||||
@@ -401,6 +313,119 @@ export class QueryBase<NativeQueryType extends NativeQuery | NativeVectorQuery>
|
||||
return this.inner.analyzePlan();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the schema of the output that will be returned by this query.
|
||||
*
|
||||
* This can be used to inspect the types and names of the columns that will be
|
||||
* returned by the query before executing it.
|
||||
*
|
||||
* @returns An Arrow Schema describing the output columns.
|
||||
*/
|
||||
async outputSchema(): Promise<import("./arrow").Schema> {
|
||||
let schemaBuffer: Buffer;
|
||||
if (this.inner instanceof Promise) {
|
||||
schemaBuffer = await this.inner.then((inner) => inner.outputSchema());
|
||||
} else {
|
||||
schemaBuffer = await this.inner.outputSchema();
|
||||
}
|
||||
const schema = tableFromIPC(schemaBuffer).schema;
|
||||
return schema;
|
||||
}
|
||||
}
|
||||
|
||||
export class StandardQueryBase<
|
||||
NativeQueryType extends NativeQuery | NativeVectorQuery,
|
||||
>
|
||||
extends QueryBase<NativeQueryType>
|
||||
implements ExecutableQuery
|
||||
{
|
||||
constructor(inner: NativeQueryType | Promise<NativeQueryType>) {
|
||||
super(inner);
|
||||
}
|
||||
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
*
|
||||
* The filter should be supplied as an SQL query string. For example:
|
||||
* @example
|
||||
* x > 10
|
||||
* y > 0 AND y < 100
|
||||
* x > 5 OR y = 'test'
|
||||
*
|
||||
* Filtering performance can often be improved by creating a scalar index
|
||||
* on the filter column(s).
|
||||
*/
|
||||
where(predicate: string): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.onlyIf(predicate));
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* A filter statement to be applied to this query.
|
||||
* @see where
|
||||
* @deprecated Use `where` instead
|
||||
*/
|
||||
filter(predicate: string): this {
|
||||
return this.where(predicate);
|
||||
}
|
||||
|
||||
fullTextSearch(
|
||||
query: string | FullTextQuery,
|
||||
options?: Partial<FullTextSearchOptions>,
|
||||
): this {
|
||||
let columns: string[] | null = null;
|
||||
if (options) {
|
||||
if (typeof options.columns === "string") {
|
||||
columns = [options.columns];
|
||||
} else if (Array.isArray(options.columns)) {
|
||||
columns = options.columns;
|
||||
}
|
||||
}
|
||||
|
||||
this.doCall((inner: NativeQueryType) => {
|
||||
if (typeof query === "string") {
|
||||
inner.fullTextSearch({
|
||||
query: query,
|
||||
columns: columns,
|
||||
});
|
||||
} else {
|
||||
inner.fullTextSearch({ query: query.inner });
|
||||
}
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the maximum number of results to return.
|
||||
*
|
||||
* By default, a plain search has no limit. If this method is not
|
||||
* called then every valid row from the table will be returned.
|
||||
*/
|
||||
limit(limit: number): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.limit(limit));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of rows to skip before returning results.
|
||||
*
|
||||
* This is useful for pagination.
|
||||
*/
|
||||
offset(offset: number): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.offset(offset));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip searching un-indexed data. This can make search faster, but will miss
|
||||
* any data that is not yet indexed.
|
||||
*
|
||||
* Use {@link Table#optimize} to index all un-indexed data.
|
||||
*/
|
||||
fastSearch(): this {
|
||||
this.doCall((inner: NativeQueryType) => inner.fastSearch());
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -419,7 +444,7 @@ export interface ExecutableQuery {}
|
||||
*
|
||||
* @hideconstructor
|
||||
*/
|
||||
export class VectorQuery extends QueryBase<NativeVectorQuery> {
|
||||
export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
|
||||
/**
|
||||
* @hidden
|
||||
*/
|
||||
@@ -679,13 +704,24 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A query that returns a subset of the rows in the table.
|
||||
*
|
||||
* @hideconstructor
|
||||
*/
|
||||
export class TakeQuery extends QueryBase<NativeTakeQuery> {
|
||||
constructor(inner: NativeTakeQuery) {
|
||||
super(inner);
|
||||
}
|
||||
}
|
||||
|
||||
/** A builder for LanceDB queries.
|
||||
*
|
||||
* @see {@link Table#query}, {@link Table#search}
|
||||
*
|
||||
* @hideconstructor
|
||||
*/
|
||||
export class Query extends QueryBase<NativeQuery> {
|
||||
export class Query extends StandardQueryBase<NativeQuery> {
|
||||
/**
|
||||
* @hidden
|
||||
*/
|
||||
|
||||
@@ -326,6 +326,9 @@ export function sanitizeDictionary(typeLike: object) {
|
||||
|
||||
// biome-ignore lint/suspicious/noExplicitAny: skip
|
||||
export function sanitizeType(typeLike: unknown): DataType<any> {
|
||||
if (typeof typeLike === "string") {
|
||||
return dataTypeFromName(typeLike);
|
||||
}
|
||||
if (typeof typeLike !== "object" || typeLike === null) {
|
||||
throw Error("Expected a Type but object was null/undefined");
|
||||
}
|
||||
@@ -447,7 +450,7 @@ export function sanitizeType(typeLike: unknown): DataType<any> {
|
||||
case Type.DurationSecond:
|
||||
return new DurationSecond();
|
||||
default:
|
||||
throw new Error("Unrecoginized type id in schema: " + typeId);
|
||||
throw new Error("Unrecognized type id in schema: " + typeId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -467,7 +470,15 @@ export function sanitizeField(fieldLike: unknown): Field {
|
||||
"The field passed in is missing a `type`/`name`/`nullable` property",
|
||||
);
|
||||
}
|
||||
const type = sanitizeType(fieldLike.type);
|
||||
let type: DataType;
|
||||
try {
|
||||
type = sanitizeType(fieldLike.type);
|
||||
} catch (error: unknown) {
|
||||
throw Error(
|
||||
`Unable to sanitize type for field: ${fieldLike.name} due to error: ${error}`,
|
||||
{ cause: error },
|
||||
);
|
||||
}
|
||||
const name = fieldLike.name;
|
||||
if (!(typeof name === "string")) {
|
||||
throw Error("The field passed in had a non-string `name` property");
|
||||
@@ -581,3 +592,46 @@ function sanitizeData(
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const constructorsByTypeName = {
|
||||
null: () => new Null(),
|
||||
binary: () => new Binary(),
|
||||
utf8: () => new Utf8(),
|
||||
bool: () => new Bool(),
|
||||
int8: () => new Int8(),
|
||||
int16: () => new Int16(),
|
||||
int32: () => new Int32(),
|
||||
int64: () => new Int64(),
|
||||
uint8: () => new Uint8(),
|
||||
uint16: () => new Uint16(),
|
||||
uint32: () => new Uint32(),
|
||||
uint64: () => new Uint64(),
|
||||
float16: () => new Float16(),
|
||||
float32: () => new Float32(),
|
||||
float64: () => new Float64(),
|
||||
datemillisecond: () => new DateMillisecond(),
|
||||
dateday: () => new DateDay(),
|
||||
timenanosecond: () => new TimeNanosecond(),
|
||||
timemicrosecond: () => new TimeMicrosecond(),
|
||||
timemillisecond: () => new TimeMillisecond(),
|
||||
timesecond: () => new TimeSecond(),
|
||||
intervaldaytime: () => new IntervalDayTime(),
|
||||
intervalyearmonth: () => new IntervalYearMonth(),
|
||||
durationnanosecond: () => new DurationNanosecond(),
|
||||
durationmicrosecond: () => new DurationMicrosecond(),
|
||||
durationmillisecond: () => new DurationMillisecond(),
|
||||
durationsecond: () => new DurationSecond(),
|
||||
} as const;
|
||||
|
||||
type MappableTypeName = keyof typeof constructorsByTypeName;
|
||||
|
||||
export function dataTypeFromName(typeName: string): DataType {
|
||||
const normalizedTypeName = typeName.toLowerCase() as MappableTypeName;
|
||||
const _constructor = constructorsByTypeName[normalizedTypeName];
|
||||
|
||||
if (!_constructor) {
|
||||
throw new Error("Unrecognized type name in schema: " + typeName);
|
||||
}
|
||||
|
||||
return _constructor();
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ import {
|
||||
import {
|
||||
FullTextQuery,
|
||||
Query,
|
||||
TakeQuery,
|
||||
VectorQuery,
|
||||
instanceOfFullTextQuery,
|
||||
} from "./query";
|
||||
@@ -336,6 +337,20 @@ export abstract class Table {
|
||||
*/
|
||||
abstract query(): Query;
|
||||
|
||||
/**
|
||||
* Create a query that returns a subset of the rows in the table.
|
||||
* @param offsets The offsets of the rows to return.
|
||||
* @returns A builder that can be used to parameterize the query.
|
||||
*/
|
||||
abstract takeOffsets(offsets: number[]): TakeQuery;
|
||||
|
||||
/**
|
||||
* Create a query that returns a subset of the rows in the table.
|
||||
* @param rowIds The row ids of the rows to return.
|
||||
* @returns A builder that can be used to parameterize the query.
|
||||
*/
|
||||
abstract takeRowIds(rowIds: number[]): TakeQuery;
|
||||
|
||||
/**
|
||||
* Create a search query to find the nearest neighbors
|
||||
* of the given query
|
||||
@@ -647,6 +662,8 @@ export class LocalTable extends Table {
|
||||
column,
|
||||
options?.replace,
|
||||
options?.waitTimeoutSeconds,
|
||||
options?.name,
|
||||
options?.train,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -665,6 +682,14 @@ export class LocalTable extends Table {
|
||||
await this.inner.waitForIndex(indexNames, timeoutSeconds);
|
||||
}
|
||||
|
||||
takeOffsets(offsets: number[]): TakeQuery {
|
||||
return new TakeQuery(this.inner.takeOffsets(offsets));
|
||||
}
|
||||
|
||||
takeRowIds(rowIds: number[]): TakeQuery {
|
||||
return new TakeQuery(this.inner.takeRowIds(rowIds));
|
||||
}
|
||||
|
||||
query(): Query {
|
||||
return new Query(this.inner);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.21.2",
|
||||
"version": "0.22.3-beta.5",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user