Compare commits

..

2 Commits

Author SHA1 Message Date
David Myriel
9e278fc5a6 fix small details 2025-05-05 23:03:17 +02:00
David Myriel
09fed1f286 add quickstart doc 2025-05-05 22:02:11 +02:00
333 changed files with 19261 additions and 32580 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.22.3"
current_version = "0.19.1-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.
@@ -50,6 +50,11 @@ pre_commit_hooks = [
optional_value = "final"
values = ["beta", "final"]
[[tool.bumpversion.files]]
filename = "node/package.json"
replace = "\"version\": \"{new_version}\","
search = "\"version\": \"{current_version}\","
[[tool.bumpversion.files]]
filename = "nodejs/package.json"
replace = "\"version\": \"{new_version}\","
@@ -61,8 +66,39 @@ glob = "nodejs/npm/*/package.json"
replace = "\"version\": \"{new_version}\","
search = "\"version\": \"{current_version}\","
# vectodb node binary packages
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-darwin-arm64\": \"{new_version}\""
search = "\"@lancedb/vectordb-darwin-arm64\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-darwin-x64\": \"{new_version}\""
search = "\"@lancedb/vectordb-darwin-x64\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{new_version}\""
search = "\"@lancedb/vectordb-linux-arm64-gnu\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-linux-x64-gnu\": \"{new_version}\""
search = "\"@lancedb/vectordb-linux-x64-gnu\": \"{current_version}\""
[[tool.bumpversion.files]]
glob = "node/package.json"
replace = "\"@lancedb/vectordb-win32-x64-msvc\": \"{new_version}\""
search = "\"@lancedb/vectordb-win32-x64-msvc\": \"{current_version}\""
# Cargo files
# ------------
[[tool.bumpversion.files]]
filename = "rust/ffi/node/Cargo.toml"
replace = "\nversion = \"{new_version}\""
search = "\nversion = \"{current_version}\""
[[tool.bumpversion.files]]
filename = "rust/lancedb/Cargo.toml"
replace = "\nversion = \"{new_version}\""

View File

@@ -1,45 +0,0 @@
name: Create Failure Issue
description: Creates a GitHub issue if any jobs in the workflow failed
inputs:
job-results:
description: 'JSON string of job results from needs context'
required: true
workflow-name:
description: 'Name of the workflow'
required: true
runs:
using: composite
steps:
- name: Check for failures and create issue
shell: bash
env:
JOB_RESULTS: ${{ inputs.job-results }}
WORKFLOW_NAME: ${{ inputs.workflow-name }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
GH_TOKEN: ${{ github.token }}
run: |
# Check if any job failed
if echo "$JOB_RESULTS" | jq -e 'to_entries | any(.value.result == "failure")' > /dev/null; then
echo "Detected job failures, creating issue..."
# Extract failed job names
FAILED_JOBS=$(echo "$JOB_RESULTS" | jq -r 'to_entries | map(select(.value.result == "failure")) | map(.key) | join(", ")')
# Create issue with workflow name, failed jobs, and run URL
gh issue create \
--title "$WORKFLOW_NAME Failed ($FAILED_JOBS)" \
--body "The workflow **$WORKFLOW_NAME** failed during execution.
**Failed jobs:** $FAILED_JOBS
**Run URL:** $RUN_URL
Please investigate the failed jobs and address any issues." \
--label "ci"
echo "Issue created successfully"
else
echo "No job failures detected, skipping issue creation"
fi

View File

@@ -5,8 +5,8 @@ on:
tags-ignore:
# We don't publish pre-releases for Rust. Crates.io is just a source
# distribution, so we don't need to publish pre-releases.
- "v*-beta*"
- "*-v*" # for example, python-vX.Y.Z
- 'v*-beta*'
- '*-v*' # for example, python-vX.Y.Z
env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
@@ -19,8 +19,6 @@ env:
jobs:
build:
runs-on: ubuntu-22.04
permissions:
id-token: write
timeout-minutes: 30
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
@@ -33,22 +31,6 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: rust-lang/crates-io-auth-action@v1
id: auth
- name: Publish the package
run: |
cargo publish -p lancedb --all-features --token ${{ steps.auth.outputs.token }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [build]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}
cargo publish -p lancedb --all-features --token ${{ secrets.CARGO_REGISTRY_TOKEN }}

View File

@@ -1,107 +0,0 @@
name: Codex Update Lance Dependency
on:
workflow_call:
inputs:
tag:
description: "Tag name from Lance"
required: true
type: string
workflow_dispatch:
inputs:
tag:
description: "Tag name from Lance"
required: true
type: string
permissions:
contents: write
pull-requests: write
actions: read
jobs:
update:
runs-on: ubuntu-latest
steps:
- name: Show inputs
run: |
echo "tag = ${{ inputs.tag }}"
- name: Checkout Repo LanceDB
uses: actions/checkout@v4
with:
fetch-depth: 0
persist-credentials: true
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install Codex CLI
run: npm install -g @openai/codex
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
components: clippy, rustfmt
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler libssl-dev
- name: Install cargo-info
run: cargo install cargo-info
- name: Install Python dependencies
run: python3 -m pip install --upgrade pip packaging
- name: Configure git user
run: |
git config user.name "lancedb automation"
git config user.email "robot@lancedb.com"
- name: Configure Codex authentication
env:
CODEX_TOKEN_B64: ${{ secrets.CODEX_TOKEN }}
run: |
if [ -z "${CODEX_TOKEN_B64}" ]; then
echo "Repository secret CODEX_TOKEN is not defined; skipping Codex execution."
exit 1
fi
mkdir -p ~/.codex
echo "${CODEX_TOKEN_B64}" | base64 --decode > ~/.codex/auth.json
- name: Run Codex to update Lance dependency
env:
TAG: ${{ inputs.tag }}
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
run: |
set -euo pipefail
VERSION="${TAG#refs/tags/}"
VERSION="${VERSION#v}"
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
Follow these steps exactly:
1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints:
- Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
- Do not merge the PR.
- If any command fails, diagnose and fix the issue instead of aborting.
EOF
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"

View File

@@ -56,12 +56,22 @@ jobs:
with:
node-version: 20
cache: 'npm'
cache-dependency-path: docs/package-lock.json
cache-dependency-path: node/package-lock.json
- name: Install node dependencies
working-directory: nodejs
working-directory: node
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build node
working-directory: node
run: |
npm ci
npm run build
npm run tsc
- name: Create markdown files
working-directory: node
run: |
npx typedoc --plugin typedoc-plugin-markdown --out ../docs/src/javascript src/index.ts
- name: Build docs
working-directory: docs
run: |

View File

@@ -24,8 +24,7 @@ env:
jobs:
test-python:
name: Test doc python code
runs-on: warp-ubuntu-2204-x64-8x
timeout-minutes: 60
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -59,3 +58,51 @@ jobs:
run: |
cd docs/test/python
for d in *; do cd "$d"; echo "$d".py; python "$d".py; cd ..; done
test-node:
name: Test doc nodejs code
runs-on: ubuntu-24.04
timeout-minutes: 60
strategy:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- name: Print CPU capabilities
run: cat /proc/cpuinfo
- name: Set up Node
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install protobuf
run: |
sudo apt update
sudo apt install -y protobuf-compiler
- name: Install dependecies needed for ubuntu
run: |
sudo apt install -y libssl-dev
rustup update && rustup default
- name: Rust cache
uses: swatinem/rust-cache@v2
- name: Install node dependencies
run: |
sudo swapoff -a
sudo fallocate -l 8G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
sudo swapon --show
cd node
npm ci
npm run build-release
cd ../docs
npm install
- name: Test
env:
LANCEDB_URI: ${{ secrets.LANCEDB_URI }}
LANCEDB_DEV_API_KEY: ${{ secrets.LANCEDB_DEV_API_KEY }}
run: |
cd docs
npm t

View File

@@ -43,6 +43,7 @@ jobs:
- uses: Swatinem/rust-cache@v2
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: "1.81.0"
cache-workspaces: "./java/core/lancedb-jni"
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
@@ -111,17 +112,3 @@ jobs:
env:
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [linux-arm64, linux-x86, macos-arm64]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -35,9 +35,6 @@ jobs:
- uses: Swatinem/rust-cache@v2
with:
workspaces: java/core/lancedb-jni
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --check
working-directory: ./java/core/lancedb-jni
@@ -71,9 +68,6 @@ jobs:
- uses: Swatinem/rust-cache@v2
with:
workspaces: java/core/lancedb-jni
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --check
working-directory: ./java/core/lancedb-jni
@@ -116,3 +110,4 @@ jobs:
-Djdk.reflect.useDirectMethodHandle=false \
-Dio.netty.tryReflectionSetAccessible=true"
JAVA_HOME=$JAVA_17 mvn clean test

View File

@@ -84,7 +84,6 @@ jobs:
run: |
pip install bump-my-version PyGithub packaging
bash ci/bump_version.sh ${{ inputs.type }} ${{ inputs.bump-minor }} v $COMMIT_BEFORE_BUMP
bash ci/update_lockfiles.sh --amend
- name: Push new version tag
if: ${{ !inputs.dry_run }}
uses: ad-m/github-push-action@master
@@ -93,3 +92,11 @@ jobs:
github_token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
branch: ${{ github.ref }}
tags: true
- uses: ./.github/workflows/update_package_lock
if: ${{ !inputs.dry_run && inputs.other }}
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
- uses: ./.github/workflows/update_package_lock_nodejs
if: ${{ !inputs.dry_run && inputs.other }}
with:
github_token: ${{ secrets.GITHUB_TOKEN }}

147
.github/workflows/node.yml vendored Normal file
View File

@@ -0,0 +1,147 @@
name: Node
on:
push:
branches:
- main
pull_request:
paths:
- node/**
- rust/ffi/node/**
- .github/workflows/node.yml
- docker-compose.yml
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
#
# Use native CPU to accelerate tests if possible, especially for f16
# target-cpu=haswell fixes failing ci build
RUSTFLAGS: "-C debuginfo=1 -C target-cpu=haswell -C target-feature=+f16c,+avx2,+fma"
RUST_BACKTRACE: "1"
jobs:
linux:
name: Linux (Node ${{ matrix.node-version }})
timeout-minutes: 30
strategy:
matrix:
node-version: [ "18", "20" ]
runs-on: "ubuntu-22.04"
defaults:
run:
shell: bash
working-directory: node
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: node/package-lock.json
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: |
npm ci
npm run build
npm run pack-build
npm install --no-save ./dist/lancedb-vectordb-*.tgz
# Remove index.node to test with dependency installed
rm index.node
- name: Test
run: npm run test
macos:
timeout-minutes: 30
runs-on: "macos-13"
defaults:
run:
shell: bash
working-directory: node
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'
cache-dependency-path: node/package-lock.json
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: brew install protobuf
- name: Build
run: |
npm ci
npm run build
npm run pack-build
npm install --no-save ./dist/lancedb-vectordb-*.tgz
# Remove index.node to test with dependency installed
rm index.node
- name: Test
run: |
npm run test
aws-integtest:
timeout-minutes: 45
runs-on: "ubuntu-22.04"
defaults:
run:
shell: bash
working-directory: node
env:
AWS_ACCESS_KEY_ID: ACCESSKEY
AWS_SECRET_ACCESS_KEY: SECRETKEY
AWS_DEFAULT_REGION: us-west-2
# this one is for s3
AWS_ENDPOINT: http://localhost:4566
# this one is for dynamodb
DYNAMODB_ENDPOINT: http://localhost:4566
ALLOW_HTTP: true
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v3
with:
node-version: 20
cache: 'npm'
cache-dependency-path: node/package-lock.json
- name: start local stack
run: docker compose -f ../docker-compose.yml up -d --wait
- name: create s3
run: aws s3 mb s3://lancedb-integtest --endpoint $AWS_ENDPOINT
- name: create ddb
run: |
aws dynamodb create-table \
--table-name lancedb-integtest \
--attribute-definitions '[{"AttributeName": "base_uri", "AttributeType": "S"}, {"AttributeName": "version", "AttributeType": "N"}]' \
--key-schema '[{"AttributeName": "base_uri", "KeyType": "HASH"}, {"AttributeName": "version", "KeyType": "RANGE"}]' \
--provisioned-throughput '{"ReadCapacityUnits": 10, "WriteCapacityUnits": 10}' \
--endpoint-url $DYNAMODB_ENDPOINT
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: |
npm ci
npm run build
npm run pack-build
npm install --no-save ./dist/lancedb-vectordb-*.tgz
# Remove index.node to test with dependency installed
rm index.node
- name: Test
run: npm run integration-test

View File

@@ -6,7 +6,6 @@ on:
- main
pull_request:
paths:
- Cargo.toml
- nodejs/**
- .github/workflows/nodejs.yml
- docker-compose.yml
@@ -48,9 +47,6 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- name: Lint
run: |
cargo fmt --all -- --check
@@ -80,7 +76,7 @@ jobs:
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
cache-dependency-path: node/package-lock.json
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |
@@ -117,7 +113,7 @@ jobs:
set -e
npm ci
npm run docs
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
if ! git diff --exit-code; then
echo "Docs need to be updated"
echo "Run 'npm run docs', fix any warnings, and commit the changes."
exit 1
@@ -138,7 +134,7 @@ jobs:
with:
node-version: 20
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
cache-dependency-path: node/package-lock.json
- uses: Swatinem/rust-cache@v2
- name: Install dependencies
run: |

View File

@@ -365,17 +365,202 @@ jobs:
ARGS="$ARGS --tag preview"
fi
npm publish $ARGS
report-failure:
name: Report Workflow Failure
# ----------------------------------------------------------------------------
# vectordb release (legacy)
# ----------------------------------------------------------------------------
# TODO: delete this when we drop vectordb
node:
name: vectordb Typescript
runs-on: ubuntu-latest
needs: [build-lancedb, test-lancedb, publish]
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write
defaults:
run:
shell: bash
working-directory: node
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-node@v3
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}
node-version: 20
cache: "npm"
cache-dependency-path: node/package-lock.json
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
- name: Build
run: |
npm ci
npm run tsc
npm pack
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v4
with:
name: node-package
path: |
node/vectordb-*.tgz
node-macos:
name: vectordb ${{ matrix.config.arch }}
strategy:
matrix:
config:
- arch: x86_64-apple-darwin
runner: macos-13
- arch: aarch64-apple-darwin
# xlarge is implicitly arm64.
runner: macos-14
runs-on: ${{ matrix.config.runner }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install system dependencies
run: brew install protobuf
- name: Install npm dependencies
run: |
cd node
npm ci
- name: Build MacOS native node modules
run: bash ci/build_macos_artifacts.sh ${{ matrix.config.arch }}
- name: Upload Darwin Artifacts
uses: actions/upload-artifact@v4
with:
name: node-native-darwin-${{ matrix.config.arch }}
path: |
node/dist/lancedb-vectordb-darwin*.tgz
node-linux-gnu:
name: vectordb (${{ matrix.config.arch}}-unknown-linux-gnu)
runs-on: ${{ matrix.config.runner }}
strategy:
fail-fast: false
matrix:
config:
- arch: x86_64
runner: ubuntu-latest
- arch: aarch64
# For successful fat LTO builds, we need a large runner to avoid OOM errors.
runner: warp-ubuntu-latest-arm64-4x
steps:
- name: Checkout
uses: actions/checkout@v4
# To avoid OOM errors on ARM, we create a swap file.
- name: Configure aarch64 build
if: ${{ matrix.config.arch == 'aarch64' }}
run: |
free -h
sudo fallocate -l 16G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
echo "/swapfile swap swap defaults 0 0" >> sudo /etc/fstab
# print info
swapon --show
free -h
- name: Build Linux Artifacts
run: |
bash ci/build_linux_artifacts.sh ${{ matrix.config.arch }} ${{ matrix.config.arch }}-unknown-linux-gnu
- name: Upload Linux Artifacts
uses: actions/upload-artifact@v4
with:
name: node-native-linux-${{ matrix.config.arch }}-gnu
path: |
node/dist/lancedb-vectordb-linux*.tgz
node-windows:
name: vectordb ${{ matrix.target }}
runs-on: windows-2022
strategy:
fail-fast: false
matrix:
target: [x86_64-pc-windows-msvc]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Protoc v21.12
working-directory: C:\
run: |
New-Item -Path 'C:\protoc' -ItemType Directory
Set-Location C:\protoc
Invoke-WebRequest https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip -OutFile C:\protoc\protoc.zip
7z x protoc.zip
Add-Content $env:GITHUB_PATH "C:\protoc\bin"
shell: powershell
- name: Install npm dependencies
run: |
cd node
npm ci
- name: Build Windows native node modules
run: .\ci\build_windows_artifacts.ps1 ${{ matrix.target }}
- name: Upload Windows Artifacts
uses: actions/upload-artifact@v4
with:
name: node-native-windows
path: |
node/dist/lancedb-vectordb-win32*.tgz
release:
name: vectordb NPM Publish
needs: [node, node-macos, node-linux-gnu, node-windows]
runs-on: ubuntu-latest
# Only runs on tags that matches the make-release action
if: startsWith(github.ref, 'refs/tags/v')
steps:
- uses: actions/download-artifact@v4
with:
pattern: node-*
- name: Display structure of downloaded files
run: ls -R
- uses: actions/setup-node@v3
with:
node-version: 20
registry-url: "https://registry.npmjs.org"
- name: Publish to NPM
env:
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
run: |
# Tag beta as "preview" instead of default "latest". See lancedb
# npm publish step for more info.
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
PUBLISH_ARGS="--tag preview"
fi
mv */*.tgz .
for filename in *.tgz; do
npm publish $PUBLISH_ARGS $filename
done
- name: Deprecate
env:
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
# We need to deprecate the old package to avoid confusion.
# Each time we publish a new version, it gets undeprecated.
run: npm deprecate vectordb "Use @lancedb/lancedb instead."
- name: Notify Slack Action
uses: ravsamhq/notify-slack-action@2.3.0
if: ${{ always() }}
with:
status: ${{ job.status }}
notify_when: "failure"
notification_title: "{workflow} is failing"
env:
SLACK_WEBHOOK_URL: ${{ secrets.ACTION_MONITORING_SLACK }}
update-package-lock:
if: startsWith(github.ref, 'refs/tags/v')
needs: [release]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
ref: main
token: ${{ secrets.LANCEDB_RELEASE_TOKEN }}
fetch-depth: 0
lfs: true
- uses: ./.github/workflows/update_package_lock
with:
github_token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -56,7 +56,7 @@ jobs:
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
fury_token: ${{ secrets.FURY_TOKEN }}
mac:
timeout-minutes: 90
timeout-minutes: 60
runs-on: ${{ matrix.config.runner }}
strategy:
matrix:
@@ -64,7 +64,7 @@ jobs:
- target: x86_64-apple-darwin
runner: macos-13
- target: aarch64-apple-darwin
runner: warp-macos-14-arm64-6x
runner: macos-14
env:
MACOSX_DEPLOYMENT_TARGET: 10.15
steps:
@@ -173,17 +173,3 @@ jobs:
generate_release_notes: false
name: Python LanceDB v${{ steps.extract_version.outputs.version }}
body: ${{ steps.python_release_notes.outputs.changelog }}
report-failure:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [linux, mac, windows]
permissions:
contents: read
issues: write
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue
with:
job-results: ${{ toJSON(needs) }}
workflow-name: ${{ github.workflow }}

View File

@@ -6,7 +6,6 @@ on:
- main
pull_request:
paths:
- Cargo.toml
- python/**
- .github/workflows/python.yml

View File

@@ -24,8 +24,8 @@ runs:
- name: pytest (with integration)
shell: bash
if: ${{ inputs.integration == 'true' }}
run: pytest -m "not slow" -vv --durations=30 python/python/tests
run: pytest -m "not slow" -x -v --durations=30 python/python/tests
- name: pytest (no integration tests)
shell: bash
if: ${{ inputs.integration != 'true' }}
run: pytest -m "not slow and not s3_test" -vv --durations=30 python/python/tests
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/python/tests

View File

@@ -40,9 +40,6 @@ jobs:
with:
fetch-depth: 0
lfs: true
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
with:
workspaces: rust
@@ -96,7 +93,6 @@ jobs:
# Need up-to-date compilers for kernels
CC: clang-18
CXX: clang++-18
GH_TOKEN: ${{ secrets.SOPHON_READ_TOKEN }}
steps:
- uses: actions/checkout@v4
with:
@@ -118,17 +114,15 @@ jobs:
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
- name: Start S3 integration test environment
working-directory: .
run: docker compose up --detach --wait
- name: Build
run: cargo build --all-features --tests --locked --examples
- name: Run feature tests
run: make -C ./lancedb feature-tests
- name: Run tests
run: cargo test --all-features --locked
- name: Run examples
run: cargo run --example simple --locked
- name: Run remote tests
# Running this requires access to secrets, so skip if this is
# a PR from a fork.
if: github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork
run: make -C ./lancedb remote-tests
macos:
timeout-minutes: 30
@@ -166,8 +160,8 @@ jobs:
strategy:
matrix:
target:
- x86_64-pc-windows-msvc
- aarch64-pc-windows-msvc
- x86_64-pc-windows-msvc
- aarch64-pc-windows-msvc
defaults:
run:
working-directory: rust/lancedb

View File

@@ -0,0 +1,26 @@
name: Trigger vectordb-recipers workflow
on:
push:
branches: [ main ]
pull_request:
paths:
- .github/workflows/trigger-vectordb-recipes.yml
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Trigger vectordb-recipes workflow
uses: actions/github-script@v6
with:
github-token: ${{ secrets.VECTORDB_RECIPES_ACTION_TOKEN }}
script: |
const result = await github.rest.actions.createWorkflowDispatch({
owner: 'lancedb',
repo: 'vectordb-recipes',
workflow_id: 'examples-test.yml',
ref: 'main'
});
console.log(result);

View File

@@ -0,0 +1,33 @@
name: update_package_lock
description: "Update node's package.lock"
inputs:
github_token:
required: true
description: "github token for the repo"
runs:
using: "composite"
steps:
- uses: actions/setup-node@v3
with:
node-version: 20
- name: Set git configs
shell: bash
run: |
git config user.name 'Lance Release'
git config user.email 'lance-dev@lancedb.com'
- name: Update package-lock.json file
working-directory: ./node
run: |
npm install
git add package-lock.json
git commit -m "Updating package-lock.json"
shell: bash
- name: Push changes
if: ${{ inputs.dry_run }} == "false"
uses: ad-m/github-push-action@master
with:
github_token: ${{ inputs.github_token }}
branch: main
tags: true

View File

@@ -0,0 +1,33 @@
name: update_package_lock_nodejs
description: "Update nodejs's package.lock"
inputs:
github_token:
required: true
description: "github token for the repo"
runs:
using: "composite"
steps:
- uses: actions/setup-node@v3
with:
node-version: 20
- name: Set git configs
shell: bash
run: |
git config user.name 'Lance Release'
git config user.email 'lance-dev@lancedb.com'
- name: Update package-lock.json file
working-directory: ./nodejs
run: |
npm install
git add package-lock.json
git commit -m "Updating package-lock.json"
shell: bash
- name: Push changes
if: ${{ inputs.dry_run }} == "false"
uses: ad-m/github-push-action@master
with:
github_token: ${{ inputs.github_token }}
branch: main
tags: true

3
.gitignore vendored
View File

@@ -31,6 +31,9 @@ python/dist
*.node
**/node_modules
**/.DS_Store
node/dist
node/examples/**/package-lock.json
node/examples/**/dist
nodejs/lancedb/native*
dist

101
AGENTS.md
View File

@@ -1,101 +0,0 @@
LanceDB is a database designed for retrieval, including vector, full-text, and hybrid search.
It is a wrapper around Lance. There are two backends: local (in-process like SQLite) and
remote (against LanceDB Cloud).
The core of LanceDB is written in Rust. There are bindings in Python, Typescript, and Java.
Project layout:
* `rust/lancedb`: The LanceDB core Rust implementation.
* `python`: The Python bindings, using PyO3.
* `nodejs`: The Typescript bindings, using napi-rs
* `java`: The Java bindings
Common commands:
* Check for compiler errors: `cargo check --quiet --features remote --tests --examples`
* Run tests: `cargo test --quiet --features remote --tests`
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
* Lint: `cargo clippy --quiet --features remote --tests --examples`
* Format: `cargo fmt --all`
Before committing changes, run formatting.
## Coding tips
* When writing Rust doctests for things that require a connection or table reference,
write them as a function instead of a fully executable test. This allows type checking
to run but avoids needing a full test environment. For example:
```rust
/// ```
/// use lance_index::scalar::FullTextSearchQuery;
/// use lancedb::query::{QueryBase, ExecutableQuery};
///
/// # use lancedb::Table;
/// # async fn query(table: &Table) -> Result<(), Box<dyn std::error::Error>> {
/// let results = table.query()
/// .full_text_search(FullTextSearchQuery::new("hello world".into()))
/// .execute()
/// .await?;
/// # Ok(())
/// # }
/// ```
```
## Example plan: adding a new method on Table
Adding a new method involves first adding it to the Rust core, then exposing it
in the Python and TypeScript bindings. There are both local and remote tables.
Remote tables are implemented via a HTTP API and require the `remote` cargo
feature flag to be enabled. Python has both sync and async methods.
Rust core changes:
1. Add method on `Table` struct in `rust/lancedb/src/table.rs` (calls `BaseTable` trait).
2. Add method to `BaseTable` trait in `rust/lancedb/src/table.rs`.
3. Implement new trait method on `NativeTable` in `rust/lancedb/src/table.rs`.
* Test with unit test in `rust/lancedb/src/table.rs`.
4. Implement new trait method on `RemoteTable` in `rust/lancedb/src/remote/table.rs`.
* Test with unit test in `rust/lancedb/src/remote/table.rs` against mocked endpoint.
Python bindings changes:
1. Add PyO3 method binding in `python/src/table.rs`. Run `make develop` to compile bindings.
2. Add types for PyO3 method in `python/python/lancedb/_lancedb.pyi`.
3. Add method to `AsyncTable` class in `python/python/lancedb/table.py`.
4. Add abstract method to `Table` abstract base class in `python/python/lancedb/table.py`.
5. Add concrete sync method to `LanceTable` class in `python/python/lancedb/table.py`.
* Should use `LOOP.run()` to call the corresponding `AsyncTable` method.
6. Add concrete sync method to `RemoteTable` class in `python/python/lancedb/remote/table.py`.
7. Add unit test in `python/tests/test_table.py`.
TypeScript bindings changes:
1. Add napi-rs method binding on `Table` in `nodejs/src/table.rs`.
2. Run `npm run build` to generate TypeScript definitions.
3. Add typescript method on abstract class `Table` in `nodejs/src/table.ts`.
4. Add concrete method on `LocalTable` class in `nodejs/src/native_table.ts`.
* Note: despite the name, this class is also used for remote tables.
5. Add test in `nodejs/__test__/table.test.ts`.
6. Run `npm run docs` to generate TypeScript documentation.
## Review Guidelines
Please consider the following when reviewing code contributions.
### Rust API design
* Design public APIs so they can be evolved easily in the future without breaking
changes. Often this means using builder patterns or options structs instead of
long argument lists.
* For public APIs, prefer inputs that use `Into<T>` or `AsRef<T>` traits to allow
more flexible inputs. For example, use `name: Into<String>` instead of `name: String`,
so we don't have to write `func("my_string".to_string())`.
### Testing
* Ensure all new public APIs have documentation and examples.
* Ensure that all bugfixes and features have corresponding tests. **We do not merge
code without tests.**
### Documentation
* New features must include updates to the rust documentation comments. Link to
relevant structs and methods to increase the value of documentation.

View File

@@ -1 +0,0 @@
AGENTS.md

3862
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,11 @@
[workspace]
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
members = [
"rust/ffi/node",
"rust/lancedb",
"nodejs",
"python",
"java/core/lancedb-jni",
]
# Python package needs to be built by maturin.
exclude = ["python"]
resolver = "2"
@@ -15,51 +21,55 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=0.39.0", default-features = false }
lance-core = "=0.39.0"
lance-datagen = "=0.39.0"
lance-file = "=0.39.0"
lance-io = { "version" = "=0.39.0", default-features = false }
lance-index = "=0.39.0"
lance-linalg = "=0.39.0"
lance-namespace = "=0.39.0"
lance-namespace-impls = { "version" = "=0.39.0", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"] }
lance-table = "=0.39.0"
lance-testing = "=0.39.0"
lance-datafusion = "=0.39.0"
lance-encoding = "=0.39.0"
lance-arrow = "=0.39.0"
ahash = "0.8"
lance = { "version" = "=0.27.0", "features" = ["dynamodb"], tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-io = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-index = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-linalg = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-table = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-testing = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-datafusion = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
lance-encoding = { version = "=0.27.0", tag = "v0.27.0-beta.2", git="https://github.com/lancedb/lance.git" }
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }
arrow-array = "56.2"
arrow-data = "56.2"
arrow-ipc = "56.2"
arrow-ord = "56.2"
arrow-schema = "56.2"
arrow-select = "56.2"
arrow-cast = "56.2"
arrow = { version = "54.1", optional = false }
arrow-array = "54.1"
arrow-data = "54.1"
arrow-ipc = "54.1"
arrow-ord = "54.1"
arrow-schema = "54.1"
arrow-arith = "54.1"
arrow-cast = "54.1"
async-trait = "0"
datafusion = { version = "50.1", default-features = false }
datafusion-catalog = "50.1"
datafusion-common = { version = "50.1", default-features = false }
datafusion-execution = "50.1"
datafusion-expr = "50.1"
datafusion-physical-plan = "50.1"
datafusion = { version = "46.0", default-features = false }
datafusion-catalog = "46.0"
datafusion-common = { version = "46.0", default-features = false }
datafusion-execution = "46.0"
datafusion-expr = "46.0"
datafusion-physical-plan = "46.0"
env_logger = "0.11"
half = { "version" = "2.6.0", default-features = false, features = [
half = { "version" = "=2.4.1", default-features = false, features = [
"num-traits",
] }
futures = "0"
log = "0.4"
moka = { version = "0.12", features = ["future"] }
object_store = "0.12.0"
object_store = "0.11.0"
pin-project = "1.0.7"
rand = "0.9"
snafu = "0.8"
url = "2"
num-traits = "0.2"
rand = "0.8"
regex = "1.10"
lazy_static = "1"
semver = "1.0.25"
chrono = "0.4"
# Temporary pins to work around downstream issues
# https://github.com/apache/arrow-rs/commit/2fddf85afcd20110ce783ed5b4cdeb82293da30b
chrono = "=0.4.39"
# https://github.com/RustCrypto/formats/issues/1684
base64ct = "=1.6.0"
# Workaround for: https://github.com/eira-fransham/crunchy/issues/13
crunchy = "=0.2.2"
# Workaround for: https://github.com/Lokathor/bytemuck/issues/306
bytemuck_derive = ">=1.8.1, <1.9.0"

167
README.md
View File

@@ -1,97 +1,94 @@
<a href="https://cloud.lancedb.com" target="_blank">
<img src="https://github.com/user-attachments/assets/92dad0a2-2a37-4ce1-b783-0d1b4f30a00c" alt="LanceDB Cloud Public Beta" width="100%" style="max-width: 100%;">
</a>
<div align="center">
<p align="center">
[![LanceDB](docs/src/assets/hero-header.png)](https://lancedb.com)
[![Website](https://img.shields.io/badge/-Website-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://lancedb.com/)
[![Blog](https://img.shields.io/badge/Blog-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://blog.lancedb.com/)
[![Discord](https://img.shields.io/badge/-Discord-100000?style=for-the-badge&logo=discord&logoColor=white&labelColor=645cfb&color=645cfb)](https://discord.gg/zMM32dvNtd)
[![Twitter](https://img.shields.io/badge/-Twitter-100000?style=for-the-badge&logo=x&logoColor=white&labelColor=645cfb&color=645cfb)](https://twitter.com/lancedb)
[![LinkedIn](https://img.shields.io/badge/-LinkedIn-100000?style=for-the-badge&logo=linkedin&logoColor=white&labelColor=645cfb&color=645cfb)](https://www.linkedin.com/company/lancedb/)
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/ac270358-333e-4bea-a132-acefaa94040e">
<source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0">
<img alt="LanceDB Logo" src="https://github.com/user-attachments/assets/b864d814-0d29-4784-8fd9-807297c758c0" width=300>
</picture>
**Search More, Manage Less**
<img src="docs/src/assets/lancedb.png" alt="LanceDB" width="50%">
<a href='https://github.com/lancedb/vectordb-recipes/tree/main' target="_blank"><img alt='LanceDB' src='https://img.shields.io/badge/VectorDB_Recipes-100000?style=for-the-badge&logo=LanceDB&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
<a href='https://lancedb.github.io/lancedb/' target="_blank"><img alt='lancdb' src='https://img.shields.io/badge/DOCS-100000?style=for-the-badge&logo=lancdb&logoColor=white&labelColor=645cfb&color=645cfb'/></a>
[![Blog](https://img.shields.io/badge/Blog-12100E?style=for-the-badge&logoColor=white)](https://blog.lancedb.com/)
[![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/zMM32dvNtd)
[![Twitter](https://img.shields.io/badge/Twitter-%231DA1F2.svg?style=for-the-badge&logo=Twitter&logoColor=white)](https://twitter.com/lancedb)
[![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20LanceDB%20Guru-006BFF?style=for-the-badge)](https://gurubase.io/g/lancedb)
# **The Multimodal AI Lakehouse**
</p>
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.github.io/lancedb/) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
**The ultimate multimodal data platform for AI/ML applications.**
LanceDB is designed for fast, scalable, and production-ready vector search. It is built on top of the Lance columnar format. You can store, index, and search over petabytes of multimodal data and vectors with ease.
LanceDB is a central location where developers can build, train and analyze their AI workloads.
</div>
<br>
## **Demo: Multimodal Search by Keyword, Vector or with SQL**
<img max-width="750px" alt="LanceDB Multimodal Search" src="https://github.com/lancedb/lancedb/assets/917119/09c5afc5-7816-4687-bae4-f2ca194426ec">
## **Star LanceDB to get updates!**
<details>
<summary>⭐ Click here ⭐ to see how fast we're growing!</summary>
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=lancedb/lancedb&theme=dark&type=Date">
<img width="100%" src="https://api.star-history.com/svg?repos=lancedb/lancedb&theme=dark&type=Date">
</picture>
</details>
## **Key Features**:
- **Fast Vector Search**: Search billions of vectors in milliseconds with state-of-the-art indexing.
- **Comprehensive Search**: Support for vector similarity search, full-text search and SQL.
- **Multimodal Support**: Store, query and filter vectors, metadata and multimodal data (text, images, videos, point clouds, and more).
- **Advanced Features**: Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure. GPU support in building vector index.
### **Products**:
- **Open Source & Local**: 100% open source, runs locally or in your cloud. No vendor lock-in.
- **Cloud and Enterprise**: Production-scale vector search with no servers to manage. Complete data sovereignty and security.
### **Ecosystem**:
- **Columnar Storage**: Built on the Lance columnar format for efficient storage and analytics.
- **Seamless Integration**: Python, Node.js, Rust, and REST APIs for easy integration. Native Python and Javascript/Typescript support.
- **Rich Ecosystem**: Integrations with [**LangChain** 🦜️🔗](https://python.langchain.com/docs/integrations/vectorstores/lancedb/), [**LlamaIndex** 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
## **How to Install**:
Follow the [Quickstart](https://lancedb.github.io/lancedb/basic/) doc to set up LanceDB locally.
**API & SDK:** We also support Python, Typescript and Rust SDKs
| Interface | Documentation |
|-----------|---------------|
| Python SDK | https://lancedb.github.io/lancedb/python/python/ |
| Typescript SDK | https://lancedb.github.io/lancedb/js/globals/ |
| Rust SDK | https://docs.rs/lancedb/latest/lancedb/index.html |
| REST API | https://docs.lancedb.com/api-reference/introduction |
## **Join Us and Contribute**
We welcome contributions from everyone! Whether you're a developer, researcher, or just someone who wants to help out.
If you have any suggestions or feature requests, please feel free to open an issue on GitHub or discuss it on our [**Discord**](https://discord.gg/G5DcmnZWKB) server.
[**Check out the GitHub Issues**](https://github.com/lancedb/lancedb/issues) if you would like to work on the features that are planned for the future. If you have any suggestions or feature requests, please feel free to open an issue on GitHub.
## **Contributors**
<a href="https://github.com/lancedb/lancedb/graphs/contributors">
<img src="https://contrib.rocks/image?repo=lancedb/lancedb" />
</a>
## **Stay in Touch With Us**
<div align="center">
</br>
[![Website](https://img.shields.io/badge/-Website-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://lancedb.com/)
[![Blog](https://img.shields.io/badge/Blog-100000?style=for-the-badge&labelColor=645cfb&color=645cfb)](https://blog.lancedb.com/)
[![Discord](https://img.shields.io/badge/-Discord-100000?style=for-the-badge&logo=discord&logoColor=white&labelColor=645cfb&color=645cfb)](https://discord.gg/zMM32dvNtd)
[![Twitter](https://img.shields.io/badge/-Twitter-100000?style=for-the-badge&logo=x&logoColor=white&labelColor=645cfb&color=645cfb)](https://twitter.com/lancedb)
[![LinkedIn](https://img.shields.io/badge/-LinkedIn-100000?style=for-the-badge&logo=linkedin&logoColor=white&labelColor=645cfb&color=645cfb)](https://www.linkedin.com/company/lancedb/)
</p>
</div>
<hr />
LanceDB is an open-source database for vector-search built with persistent storage, which greatly simplifies retrieval, filtering and management of embeddings.
The key features of LanceDB include:
* Production-scale vector search with no servers to manage.
* Store, query and filter vectors, metadata and multi-modal data (text, images, videos, point clouds, and more).
* Support for vector similarity search, full-text search and SQL.
* Native Python and Javascript/Typescript support.
* Zero-copy, automatic versioning, manage versions of your data without needing extra infrastructure.
* GPU support in building vector index(*).
* Ecosystem integrations with [LangChain 🦜️🔗](https://python.langchain.com/docs/integrations/vectorstores/lancedb/), [LlamaIndex 🦙](https://gpt-index.readthedocs.io/en/latest/examples/vector_stores/LanceDBIndexDemo.html), Apache-Arrow, Pandas, Polars, DuckDB and more on the way.
LanceDB's core is written in Rust 🦀 and is built using <a href="https://github.com/lancedb/lance">Lance</a>, an open-source columnar format designed for performant ML workloads.
## Quick Start
**Javascript**
```shell
npm install @lancedb/lancedb
```
```javascript
import * as lancedb from "@lancedb/lancedb";
const db = await lancedb.connect("data/sample-lancedb");
const table = await db.createTable("vectors", [
{ id: 1, vector: [0.1, 0.2], item: "foo", price: 10 },
{ id: 2, vector: [1.1, 1.2], item: "bar", price: 50 },
], {mode: 'overwrite'});
const query = table.vectorSearch([0.1, 0.3]).limit(2);
const results = await query.toArray();
// You can also search for rows by specific criteria without involving a vector search.
const rowsByCriteria = await table.query().where("price >= 10").toArray();
```
**Python**
```shell
pip install lancedb
```
```python
import lancedb
uri = "data/sample-lancedb"
db = lancedb.connect(uri)
table = db.create_table("my_table",
data=[{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}])
result = table.search([100, 100]).limit(2).to_pandas()
```
## Blogs, Tutorials & Videos
* 📈 <a href="https://blog.lancedb.com/benchmarking-random-access-in-lance/">2000x better performance with Lance over Parquet</a>
* 🤖 <a href="https://github.com/lancedb/vectordb-recipes/tree/main/examples/Youtube-Search-QA-Bot">Build a question and answer bot with LanceDB</a>

22
ci/build_linux_artifacts.sh Executable file
View File

@@ -0,0 +1,22 @@
#!/bin/bash
set -e
ARCH=${1:-x86_64}
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
# We pass down the current user so that when we later mount the local files
# into the container, the files are accessible by the current user.
pushd ci/manylinux_node
docker build \
-t lancedb-node-manylinux \
--build-arg="ARCH=$ARCH" \
--build-arg="DOCKER_USER=$(id -u)" \
--progress=plain \
.
popd
# We turn on memory swap to avoid OOM killer
docker run \
-v $(pwd):/io -w /io \
--memory-swap=-1 \
lancedb-node-manylinux \
bash ci/manylinux_node/build_vectordb.sh $ARCH $TARGET_TRIPLE

View File

@@ -0,0 +1,34 @@
# Builds the macOS artifacts (node binaries).
# Usage: ./ci/build_macos_artifacts.sh [target]
# Targets supported: x86_64-apple-darwin aarch64-apple-darwin
set -e
prebuild_rust() {
# Building here for the sake of easier debugging.
pushd rust/ffi/node
echo "Building rust library for $1"
export RUST_BACKTRACE=1
cargo build --release --target $1
popd
}
build_node_binaries() {
pushd node
echo "Building node library for $1"
npm run build-release -- --target $1
npm run pack-build -- --target $1
popd
}
if [ -n "$1" ]; then
targets=$1
else
targets="x86_64-apple-darwin aarch64-apple-darwin"
fi
echo "Building artifacts for targets: $targets"
for target in $targets
do
prebuild_rust $target
build_node_binaries $target
done

View File

@@ -0,0 +1,42 @@
# Builds the Windows artifacts (node binaries).
# Usage: .\ci\build_windows_artifacts.ps1 [target]
# Targets supported:
# - x86_64-pc-windows-msvc
# - i686-pc-windows-msvc
# - aarch64-pc-windows-msvc
function Prebuild-Rust {
param (
[string]$target
)
# Building here for the sake of easier debugging.
Push-Location -Path "rust/ffi/node"
Write-Host "Building rust library for $target"
$env:RUST_BACKTRACE=1
cargo build --release --target $target
Pop-Location
}
function Build-NodeBinaries {
param (
[string]$target
)
Push-Location -Path "node"
Write-Host "Building node library for $target"
npm run build-release -- --target $target
npm run pack-build -- --target $target
Pop-Location
}
$targets = $args[0]
if (-not $targets) {
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
}
Write-Host "Building artifacts for targets: $targets"
foreach ($target in $targets) {
Prebuild-Rust $target
Build-NodeBinaries $target
}

View File

@@ -0,0 +1,42 @@
# Builds the Windows artifacts (nodejs binaries).
# Usage: .\ci\build_windows_artifacts_nodejs.ps1 [target]
# Targets supported:
# - x86_64-pc-windows-msvc
# - i686-pc-windows-msvc
# - aarch64-pc-windows-msvc
function Prebuild-Rust {
param (
[string]$target
)
# Building here for the sake of easier debugging.
Push-Location -Path "rust/lancedb"
Write-Host "Building rust library for $target"
$env:RUST_BACKTRACE=1
cargo build --release --target $target
Pop-Location
}
function Build-NodeBinaries {
param (
[string]$target
)
Push-Location -Path "nodejs"
Write-Host "Building nodejs library for $target"
$env:RUST_TARGET=$target
npm run build-release
Pop-Location
}
$targets = $args[0]
if (-not $targets) {
$targets = "x86_64-pc-windows-msvc", "aarch64-pc-windows-msvc"
}
Write-Host "Building artifacts for targets: $targets"
foreach ($target in $targets) {
Prebuild-Rust $target
Build-NodeBinaries $target
}

View File

@@ -1,4 +0,0 @@
#!/usr/bin/env bash
export RUST_LOG=info
exec ./lancedb server --port 0 --sql-port 0 --data-dir "${1}"

View File

@@ -0,0 +1,27 @@
# Many linux dockerfile with Rust, Node, and Lance dependencies installed.
# This container allows building the node modules native libraries in an
# environment with a very old glibc, so that we are compatible with a wide
# range of linux distributions.
ARG ARCH=x86_64
FROM quay.io/pypa/manylinux_2_28_${ARCH}
ARG ARCH=x86_64
ARG DOCKER_USER=default_user
# Protobuf is also installed as root.
COPY install_protobuf.sh install_protobuf.sh
RUN ./install_protobuf.sh ${ARCH}
ENV DOCKER_USER=${DOCKER_USER}
# Create a group and user, but only if it doesn't exist
RUN echo ${ARCH} && id -u ${DOCKER_USER} >/dev/null 2>&1 || adduser --user-group --create-home --uid ${DOCKER_USER} build_user
# We switch to the user to install Rust and Node, since those like to be
# installed at the user level.
USER ${DOCKER_USER}
COPY prepare_manylinux_node.sh prepare_manylinux_node.sh
RUN cp /prepare_manylinux_node.sh $HOME/ && \
cd $HOME && \
./prepare_manylinux_node.sh ${ARCH}

View File

@@ -0,0 +1,13 @@
#!/bin/bash
# Builds the node module for manylinux. Invoked by ci/build_linux_artifacts.sh.
set -e
ARCH=${1:-x86_64}
TARGET_TRIPLE=${2:-x86_64-unknown-linux-gnu}
#Alpine doesn't have .bashrc
FILE=$HOME/.bashrc && test -f $FILE && source $FILE
cd node
npm ci
npm run build-release
npm run pack-build -- -t $TARGET_TRIPLE

View File

@@ -0,0 +1,15 @@
#!/bin/bash
# Installs protobuf compiler. Should be run as root.
set -e
if [[ $1 == x86_64* ]]; then
ARCH=x86_64
else
# gnu target
ARCH=aarch_64
fi
PB_REL=https://github.com/protocolbuffers/protobuf/releases
PB_VERSION=23.1
curl -LO $PB_REL/download/v$PB_VERSION/protoc-$PB_VERSION-linux-$ARCH.zip
unzip protoc-$PB_VERSION-linux-$ARCH.zip -d /usr/local

View File

@@ -0,0 +1,21 @@
#!/bin/bash
set -e
install_node() {
echo "Installing node..."
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
source "$HOME"/.bashrc
nvm install --no-progress 18
}
install_rust() {
echo "Installing rust..."
curl https://sh.rustup.rs -sSf | bash -s -- -y
export PATH="$PATH:/root/.cargo/bin"
}
install_node
install_rust

View File

@@ -1,18 +0,0 @@
#!/usr/bin/env bash
#
# A script for running the given command together with a docker compose environment.
#
# Bring down the docker setup once the command is done running.
tear_down() {
docker compose -p fixture down
}
trap tear_down EXIT
set +xe
# Clean up any existing docker setup and bring up a new one.
docker compose -p fixture up --detach --wait || exit 1
"${@}"

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env bash
#
# A script for running the given command together with the lancedb cli.
#
die() {
echo $?
exit 1
}
check_command_exists() {
command="${1}"
which ${command} &> /dev/null || \
die "Unable to locate command: ${command}. Did you install it?"
}
if [[ ! -e ./lancedb ]]; then
if [[ -v SOPHON_READ_TOKEN ]]; then
INPUT="lancedb-linux-x64"
gh release \
--repo lancedb/lancedb \
download ci-support-binaries \
--pattern "${INPUT}" \
|| die "failed to fetch cli."
check_command_exists openssl
openssl enc -aes-256-cbc \
-d -pbkdf2 \
-pass "env:SOPHON_READ_TOKEN" \
-in "${INPUT}" \
-out ./lancedb-linux-x64.tar.gz \
|| die "openssl failed"
TARGET="${INPUT}.tar.gz"
else
ARCH="x64"
if [[ $OSTYPE == 'darwin'* ]]; then
UNAME=$(uname -m)
if [[ $UNAME == 'arm64' ]]; then
ARCH='arm64'
fi
OSTYPE="macos"
elif [[ $OSTYPE == 'linux'* ]]; then
if [[ $UNAME == 'aarch64' ]]; then
ARCH='arm64'
fi
OSTYPE="linux"
else
die "unknown OSTYPE: $OSTYPE"
fi
check_command_exists gh
TARGET="lancedb-${OSTYPE}-${ARCH}.tar.gz"
gh release \
--repo lancedb/sophon \
download lancedb-cli-v0.0.3 \
--pattern "${TARGET}" \
|| die "failed to fetch cli."
fi
check_command_exists tar
tar xvf "${TARGET}" || die "tar failed."
[[ -e ./lancedb ]] || die "failed to extract lancedb."
fi
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
export CREATE_LANCEDB_TEST_CONNECTION_SCRIPT="${SCRIPT_DIR}/create_lancedb_test_connection.sh"
"${@}"

View File

@@ -1,268 +0,0 @@
import argparse
import re
import sys
import json
def run_command(command: str) -> str:
"""
Run a shell command and return stdout as a string.
If exit code is not 0, raise an exception with the stderr output.
"""
import subprocess
result = subprocess.run(command, shell=True, capture_output=True, text=True)
if result.returncode != 0:
raise Exception(f"Command failed with error: {result.stderr.strip()}")
return result.stdout.strip()
def get_latest_stable_version() -> str:
version_line = run_command("cargo info lance | grep '^version:'")
# Example output: "version: 0.35.0 (latest 0.37.0)"
match = re.search(r'\(latest ([0-9.]+)\)', version_line)
if match:
return match.group(1)
# Fallback: use the first version after 'version:'
return version_line.split("version:")[1].split()[0].strip()
def get_latest_preview_version() -> str:
lance_tags = run_command(
"git ls-remote --tags https://github.com/lancedb/lance.git | grep 'refs/tags/v[0-9beta.-]\\+$'"
).splitlines()
lance_tags = (
tag.split("refs/tags/")[1]
for tag in lance_tags
if "refs/tags/" in tag and "beta" in tag
)
from packaging.version import Version
latest = max(
(tag[1:] for tag in lance_tags if tag.startswith("v")), key=lambda t: Version(t)
)
return str(latest)
def extract_features(line: str) -> list:
"""
Extracts the features from a line in Cargo.toml.
Example: 'lance = { "version" = "=0.29.0", "features" = ["dynamodb"] }'
Returns: ['dynamodb']
"""
import re
match = re.search(r'"features"\s*=\s*\[\s*(.*?)\s*\]', line, re.DOTALL)
if match:
features_str = match.group(1)
return [f.strip().strip('"') for f in features_str.split(",") if f.strip()]
return []
def extract_default_features(line: str) -> bool:
"""
Checks if default-features = false is present in a line in Cargo.toml.
Example: 'lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }'
Returns: True if default-features = false is present, False otherwise
"""
import re
match = re.search(r'default-features\s*=\s*false', line)
return match is not None
def dict_to_toml_line(package_name: str, config: dict) -> str:
"""
Converts a configuration dictionary to a TOML dependency line.
Dictionary insertion order is preserved (Python 3.7+), so the caller
controls the order of fields in the output.
Args:
package_name: The name of the package (e.g., "lance", "lance-io")
config: Dictionary with keys like "version", "path", "git", "tag", "features", "default-features"
The order of keys in this dict determines the order in the output.
Returns:
A properly formatted TOML line with a trailing newline
"""
# If only version is specified, use simple format
if len(config) == 1 and "version" in config:
return f'{package_name} = "{config["version"]}"\n'
# Otherwise, use inline table format
parts = []
for key, value in config.items():
if key == "default-features" and not value:
parts.append("default-features = false")
elif key == "features":
parts.append(f'"features" = {json.dumps(value)}')
elif isinstance(value, str):
parts.append(f'"{key}" = "{value}"')
else:
# This shouldn't happen with our current usage
parts.append(f'"{key}" = {json.dumps(value)}')
return f'{package_name} = {{ {", ".join(parts)} }}\n'
def update_cargo_toml(line_updater):
"""
Updates the Cargo.toml file by applying the line_updater function to each line.
The line_updater function should take a line as input and return the updated line.
"""
with open("Cargo.toml", "r") as f:
lines = f.readlines()
new_lines = []
lance_line = ""
is_parsing_lance_line = False
for line in lines:
if line.startswith("lance"):
# Check if this is a single-line or multi-line entry
# Single-line entries either:
# 1. End with } (complete inline table)
# 2. End with " (simple version string)
# Multi-line entries start with { but don't end with }
if line.strip().endswith("}") or line.strip().endswith('"'):
# Single-line entry - process immediately
new_lines.append(line_updater(line))
elif "{" in line and not line.strip().endswith("}"):
# Multi-line entry - start accumulating
lance_line = line
is_parsing_lance_line = True
else:
# Single-line entry without quotes or braces (shouldn't happen but handle it)
new_lines.append(line_updater(line))
elif is_parsing_lance_line:
lance_line += line
if line.strip().endswith("}"):
new_lines.append(line_updater(lance_line))
lance_line = ""
is_parsing_lance_line = False
else:
# Keep the line unchanged
new_lines.append(line)
with open("Cargo.toml", "w") as f:
f.writelines(new_lines)
def set_stable_version(version: str):
"""
Sets lines to
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"] }
lance-io = { "version" = "=0.29.0", default-features = false }
...
"""
def line_updater(line: str) -> str:
package_name = line.split("=", maxsplit=1)[0].strip()
# Build config in desired order: version, default-features, features
config = {"version": f"={version}"}
if extract_default_features(line):
config["default-features"] = False
features = extract_features(line)
if features:
config["features"] = features
return dict_to_toml_line(package_name, config)
update_cargo_toml(line_updater)
def set_preview_version(version: str):
"""
Sets lines to
lance = { "version" = "=0.29.0", default-features = false, "features" = ["dynamodb"], "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
lance-io = { "version" = "=0.29.0", default-features = false, "tag" = "v0.29.0-beta.2", "git" = "https://github.com/lancedb/lance.git" }
...
"""
def line_updater(line: str) -> str:
package_name = line.split("=", maxsplit=1)[0].strip()
# Build config in desired order: version, default-features, features, tag, git
config = {"version": f"={version}"}
if extract_default_features(line):
config["default-features"] = False
features = extract_features(line)
if features:
config["features"] = features
config["tag"] = f"v{version}"
config["git"] = "https://github.com/lancedb/lance.git"
return dict_to_toml_line(package_name, config)
update_cargo_toml(line_updater)
def set_local_version():
"""
Sets lines to
lance = { "path" = "../lance/rust/lance", default-features = false, "features" = ["dynamodb"] }
lance-io = { "path" = "../lance/rust/lance-io", default-features = false }
...
"""
def line_updater(line: str) -> str:
package_name = line.split("=", maxsplit=1)[0].strip()
# Build config in desired order: path, default-features, features
config = {"path": f"../lance/rust/{package_name}"}
if extract_default_features(line):
config["default-features"] = False
features = extract_features(line)
if features:
config["features"] = features
return dict_to_toml_line(package_name, config)
update_cargo_toml(line_updater)
parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
parser.add_argument(
"version",
type=str,
help="The version to set for the Lance package. Use 'stable' for the latest stable version, 'preview' for latest preview version, or a specific version number (e.g., '0.1.0'). You can also specify 'local' to use a local path.",
)
args = parser.parse_args()
if args.version == "stable":
latest_stable_version = get_latest_stable_version()
print(
f"Found latest stable version: \033[1mv{latest_stable_version}\033[0m",
file=sys.stderr,
)
set_stable_version(latest_stable_version)
elif args.version == "preview":
latest_preview_version = get_latest_preview_version()
print(
f"Found latest preview version: \033[1mv{latest_preview_version}\033[0m",
file=sys.stderr,
)
set_preview_version(latest_preview_version)
elif args.version == "local":
set_local_version()
else:
# Parse the version number.
version = args.version
# Ignore initial v if present.
if version.startswith("v"):
version = version[1:]
if "beta" in version:
set_preview_version(version)
else:
set_stable_version(version)
print("Updating lockfiles...", file=sys.stderr, end="")
run_command("cargo metadata > /dev/null")
print(" done.", file=sys.stderr)

View File

@@ -1,27 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
AMEND=false
for arg in "$@"; do
if [[ "$arg" == "--amend" ]]; then
AMEND=true
fi
done
# This updates the lockfile without building
cargo metadata --quiet > /dev/null
pushd nodejs || exit 1
npm install --package-lock-only --silent
popd
if git diff --quiet --exit-code; then
echo "No lockfile changes to commit; skipping amend."
elif $AMEND; then
git add Cargo.lock nodejs/package-lock.json
git commit --amend --no-edit
else
git add Cargo.lock nodejs/package-lock.json
git commit -m "Update lockfiles"
fi

View File

@@ -70,23 +70,6 @@ plugins:
- mkdocs-jupyter
- render_swagger:
allow_arbitrary_locations: true
- redirects:
redirect_maps:
# Redirect the home page and other top-level markdown files. This enables maximum SEO benefit
# other sub-pages are handled by the ingected js in overrides/partials/header.html
'index.md': 'https://lancedb.com/docs/'
'guides/tables.md': 'https://lancedb.com/docs/tables/'
'ann_indexes.md': 'https://lancedb.com/docs/indexing/'
'basic.md': 'https://lancedb.com/docs/quickstart/'
'faq.md': 'https://lancedb.com/docs/faq/'
'embeddings/understanding_embeddings.md': 'https://lancedb.com/docs/embedding/'
'integrations.md': 'https://lancedb.com/docs/integrations/'
'examples.md': 'https://lancedb.com/docs/tutorials/'
'concepts/vector_search.md': 'https://lancedb.com/docs/search/vector-search/'
'troubleshooting.md': 'https://lancedb.com/docs/troubleshooting/'
'guides/storage.md': 'https://lancedb.com/docs/storage/integrations'
markdown_extensions:
- admonition
@@ -122,7 +105,8 @@ markdown_extensions:
nav:
- Home:
- LanceDB: index.md
- 🏃🏼‍♂️ Quick start: basic.md
- 👉 Quickstart: quickstart.md
- 🏃🏼‍♂️ Basic Usage: basic.md
- 📚 Concepts:
- Vector search: concepts/vector_search.md
- Indexing:
@@ -210,7 +194,6 @@ nav:
- Pandas and PyArrow: python/pandas_and_pyarrow.md
- Polars: python/polars_arrow.md
- DuckDB: python/duckdb.md
- Datafusion: python/datafusion.md
- LangChain:
- LangChain 🔗: integrations/langchain.md
- LangChain demo: notebooks/langchain_demo.ipynb
@@ -223,7 +206,6 @@ nav:
- PromptTools: integrations/prompttools.md
- dlt: integrations/dlt.md
- phidata: integrations/phidata.md
- Genkit: integrations/genkit.md
- 🎯 Examples:
- Overview: examples/index.md
- 🐍 Python:
@@ -256,7 +238,9 @@ nav:
- 👾 JavaScript (lancedb): js/globals.md
- 🦀 Rust: https://docs.rs/lancedb/latest/lancedb/
- Quick start: basic.md
- Getting Started:
- Quickstart: quickstart.md
- Basic Usage: basic.md
- Concepts:
- Vector search: concepts/vector_search.md
- Indexing:
@@ -266,7 +250,6 @@ nav:
- Data management: concepts/data_management.md
- Guides:
- Working with tables: guides/tables.md
- Working with SQL: guides/sql_querying.md
- Building an ANN index: ann_indexes.md
- Vector Search: search.md
- Full-text search (native): fts.md
@@ -343,7 +326,6 @@ nav:
- Pandas and PyArrow: python/pandas_and_pyarrow.md
- Polars: python/polars_arrow.md
- DuckDB: python/duckdb.md
- Datafusion: python/datafusion.md
- LangChain 🦜️🔗↗: integrations/langchain.md
- LangChain.js 🦜️🔗↗: https://js.langchain.com/docs/integrations/vectorstores/lancedb
- LlamaIndex 🦙↗: integrations/llamaIndex.md
@@ -352,7 +334,6 @@ nav:
- PromptTools: integrations/prompttools.md
- dlt: integrations/dlt.md
- phidata: integrations/phidata.md
- Genkit: integrations/genkit.md
- Examples:
- examples/index.md
- 🐍 Python:

View File

@@ -19,13 +19,7 @@
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
-->
<div id="deprecation-banner" style="background-color: #f8d7da; color: #721c24; padding: 1em; text-align: center;">
<p style="margin: 0; font-size: 1.1em;">
<strong>This documentation site is deprecated.</strong>
Please visit our new documentation site at <a href="https://lancedb.com/docs" style="color: #721c24; text-decoration: underline;">
lancedb.com/docs</a> for the latest information.
</p>
</div>
{% set class = "md-header" %}
{% if "navigation.tabs.sticky" in features %}
{% set class = class ~ " md-header--shadow md-header--lifted" %}
@@ -156,9 +150,9 @@
<div style="margin-left: 10px; margin-right: 5px;">
<a href="https://discord.com/invite/zMM32dvNtd" target="_blank" rel="noopener noreferrer">
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
</a>
</div>
<svg fill="#FFFFFF" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" width="25px" height="25px"><path d="M 41.625 10.769531 C 37.644531 7.566406 31.347656 7.023438 31.078125 7.003906 C 30.660156 6.96875 30.261719 7.203125 30.089844 7.589844 C 30.074219 7.613281 29.9375 7.929688 29.785156 8.421875 C 32.417969 8.867188 35.652344 9.761719 38.578125 11.578125 C 39.046875 11.867188 39.191406 12.484375 38.902344 12.953125 C 38.710938 13.261719 38.386719 13.429688 38.050781 13.429688 C 37.871094 13.429688 37.6875 13.378906 37.523438 13.277344 C 32.492188 10.15625 26.210938 10 25 10 C 23.789063 10 17.503906 10.15625 12.476563 13.277344 C 12.007813 13.570313 11.390625 13.425781 11.101563 12.957031 C 10.808594 12.484375 10.953125 11.871094 11.421875 11.578125 C 14.347656 9.765625 17.582031 8.867188 20.214844 8.425781 C 20.0625 7.929688 19.925781 7.617188 19.914063 7.589844 C 19.738281 7.203125 19.34375 6.960938 18.921875 7.003906 C 18.652344 7.023438 12.355469 7.566406 8.320313 10.8125 C 6.214844 12.761719 2 24.152344 2 34 C 2 34.175781 2.046875 34.34375 2.132813 34.496094 C 5.039063 39.605469 12.972656 40.941406 14.78125 41 C 14.789063 41 14.800781 41 14.8125 41 C 15.132813 41 15.433594 40.847656 15.621094 40.589844 L 17.449219 38.074219 C 12.515625 36.800781 9.996094 34.636719 9.851563 34.507813 C 9.4375 34.144531 9.398438 33.511719 9.765625 33.097656 C 10.128906 32.683594 10.761719 32.644531 11.175781 33.007813 C 11.234375 33.0625 15.875 37 25 37 C 34.140625 37 38.78125 33.046875 38.828125 33.007813 C 39.242188 32.648438 39.871094 32.683594 40.238281 33.101563 C 40.601563 33.515625 40.5625 34.144531 40.148438 34.507813 C 40.003906 34.636719 37.484375 36.800781 32.550781 38.074219 L 34.378906 40.589844 C 34.566406 40.847656 34.867188 41 35.1875 41 C 35.199219 41 35.210938 41 35.21875 41 C 37.027344 40.941406 44.960938 39.605469 47.867188 34.496094 C 47.953125 34.34375 48 34.175781 48 34 C 48 24.152344 43.785156 12.761719 41.625 10.769531 Z M 18.5 30 C 16.566406 30 15 28.210938 15 26 C 15 23.789063 16.566406 22 18.5 22 C 20.433594 22 22 23.789063 22 26 C 22 28.210938 20.433594 30 18.5 30 Z M 31.5 30 C 29.566406 30 28 28.210938 28 26 C 28 23.789063 29.566406 22 31.5 22 C 33.433594 22 35 23.789063 35 26 C 35 28.210938 33.433594 30 31.5 30 Z"/></svg>
</a>
</div>
<div style="margin-left: 5px; margin-right: 5px;">
<a href="https://twitter.com/lancedb" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0,0,256,256" width="25px" height="25px" fill-rule="nonzero"><g fill-opacity="0" fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><path d="M0,256v-256h256v256z" id="bgRectangle"></path></g><g fill="#ffffff" fill-rule="nonzero" stroke="none" stroke-width="1" stroke-linecap="butt" stroke-linejoin="miter" stroke-miterlimit="10" stroke-dasharray="" stroke-dashoffset="0" font-family="none" font-weight="none" font-size="none" text-anchor="none" style="mix-blend-mode: normal"><g transform="scale(4,4)"><path d="M57,17.114c-1.32,1.973 -2.991,3.707 -4.916,5.097c0.018,0.423 0.028,0.847 0.028,1.274c0,13.013 -9.902,28.018 -28.016,28.018c-5.562,0 -12.81,-1.948 -15.095,-4.423c0.772,0.092 1.556,0.138 2.35,0.138c4.615,0 8.861,-1.575 12.23,-4.216c-4.309,-0.079 -7.946,-2.928 -9.199,-6.84c1.96,0.308 4.447,-0.17 4.447,-0.17c0,0 -7.7,-1.322 -7.899,-9.779c2.226,1.291 4.46,1.231 4.46,1.231c0,0 -4.441,-2.734 -4.379,-8.195c0.037,-3.221 1.331,-4.953 1.331,-4.953c8.414,10.361 20.298,10.29 20.298,10.29c0,0 -0.255,-1.471 -0.255,-2.243c0,-5.437 4.408,-9.847 9.847,-9.847c2.832,0 5.391,1.196 7.187,3.111c2.245,-0.443 4.353,-1.263 6.255,-2.391c-0.859,3.44 -4.329,5.448 -4.329,5.448c0,0 2.969,-0.329 5.655,-1.55z"></path></g></g></svg>
@@ -179,77 +173,4 @@
{% include "partials/tabs.html" %}
{% endif %}
{% endif %}
</header>
<script>
(function() {
function checkPathAndRedirect() {
var banner = document.getElementById('deprecation-banner');
if (document.querySelector('meta[http-equiv="refresh"]')) {
return; // The redirects plugin is already handling this page.
}
var currentPath = window.location.pathname;
var cleanPath = currentPath.endsWith('/') && currentPath.length > 1
? currentPath.slice(0, -1)
: currentPath;
// These are the ONLY paths that should remain on the old site
var apiPaths = [
'/lancedb/python',
'/lancedb/javascript',
'/lancedb/js',
'/lancedb/api_reference'
];
var isApiPage = apiPaths.some(function(apiPath) {
return cleanPath.startsWith(apiPath);
});
if (isApiPage) {
if (banner) {
banner.style.display = 'none';
}
} else {
if (banner) {
banner.style.display = 'block';
}
// Add noindex meta tag to prevent indexing of old docs for seo
var noindexMeta = document.createElement('meta');
noindexMeta.setAttribute('name', 'robots');
noindexMeta.setAttribute('content', 'noindex, follow');
document.head.appendChild(noindexMeta);
// Add canonical link to point to the new docs to reward new site for seo
var canonicalLink = document.createElement('link');
canonicalLink.setAttribute('rel', 'canonical');
canonicalLink.setAttribute('href', 'https://lancedb.com/docs');
document.head.appendChild(canonicalLink);
window.location.replace('https://lancedb.com/docs');
}
}
// Run the check only if doc is ready. This makes sure we catch the initial load
// and redirect.
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', checkPathAndRedirect);
} else {
checkPathAndRedirect();
}
// Use an interval to handle subsequent navigation clicks.
var lastPath = window.location.pathname;
setInterval(function() {
if (window.location.pathname !== lastPath) {
lastPath = window.location.pathname;
checkPathAndRedirect();
}
}, 2000); // keeping it 2 second to make it easy for user to understand
// what's happening
})();
</script>
</header>

View File

@@ -1,5 +0,0 @@
{% extends "base.html" %}
{% block announce %}
📚 Starting June 1st, 2025, please use <a href="https://lancedb.github.io/documentation" target="_blank" rel="noopener noreferrer">lancedb.github.io/documentation</a> for the latest docs.
{% endblock %}

12
docs/package-lock.json generated
View File

@@ -19,7 +19,7 @@
},
"../node": {
"name": "vectordb",
"version": "0.21.2-beta.0",
"version": "0.12.0",
"cpu": [
"x64",
"arm64"
@@ -65,11 +65,11 @@
"uuid": "^9.0.0"
},
"optionalDependencies": {
"@lancedb/vectordb-darwin-arm64": "0.21.2-beta.0",
"@lancedb/vectordb-darwin-x64": "0.21.2-beta.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-linux-x64-gnu": "0.21.2-beta.0",
"@lancedb/vectordb-win32-x64-msvc": "0.21.2-beta.0"
"@lancedb/vectordb-darwin-arm64": "0.12.0",
"@lancedb/vectordb-darwin-x64": "0.12.0",
"@lancedb/vectordb-linux-arm64-gnu": "0.12.0",
"@lancedb/vectordb-linux-x64-gnu": "0.12.0",
"@lancedb/vectordb-win32-x64-msvc": "0.12.0"
},
"peerDependencies": {
"@apache-arrow/ts": "^14.0.2",

View File

@@ -5,4 +5,3 @@ mkdocstrings[python]==0.25.2
griffe
mkdocs-render-swagger-plugin
pydantic
mkdocs-redirects

View File

@@ -291,7 +291,7 @@ Product quantization can lead to approximately `16 * sizeof(float32) / 1 = 64` t
`num_partitions` is used to decide how many partitions the first level `IVF` index uses.
Higher number of partitions could lead to more efficient I/O during queries and better accuracy, but it takes much more time to train.
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 4K-8K rows lead to a good latency / recall.
On `SIFT-1M` dataset, our benchmark shows that keeping each partition 1K-4K rows lead to a good latency / recall.
`num_sub_vectors` specifies how many Product Quantization (PQ) short codes to generate on each vector. The number should be a factor of the vector dimension. Because
PQ is a lossy compression of the original vector, a higher `num_sub_vectors` usually results in

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

View File

@@ -1,4 +1,4 @@
# Quick start
# Basic Usage
!!! info "LanceDB can be run in a number of ways:"

View File

@@ -13,7 +13,7 @@ The following concepts are important to keep in mind:
- Data is versioned, with each insert operation creating a new version of the dataset and an update to the manifest that tracks versions via metadata
!!! note
1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.
1. First, each version contains metadata and just the new/updated data in your transaction. So if you have 100 versions, they aren't 100 duplicates of the same data. However, they do have 100x the metadata overhead of a single version, which can result in slower queries.
2. Second, these versions exist to keep LanceDB scalable and consistent. We do not immediately blow away old versions when creating new ones because other clients might be in the middle of querying the old version. It's important to retain older versions for as long as they might be queried.
## What are fragments?
@@ -37,10 +37,6 @@ Depending on the use case and dataset, optimal compaction will have different re
- Its always better to use *batch* inserts rather than adding 1 row at a time (to avoid too small fragments). If single-row inserts are unavoidable, run compaction on a regular basis to merge them into larger fragments.
- Keep the number of fragments under 100, which is suitable for most use cases (for *really* large datasets of >500M rows, more fragments might be needed)
!!! note
LanceDB Cloud/Enterprise supports [auto-compaction](https://docs.lancedb.com/enterprise/architecture/architecture#write-path) which automatically optimizes fragments in the background as data changes.
## Deletion
Although Lance allows you to delete rows from a dataset, it does not actually delete the data immediately. It simply marks the row as deleted in the `DataFile` that represents a fragment. For a given version of the dataset, each fragment can have up to one deletion file (if no rows were ever deleted from that fragment, it will not have a deletion file). This is important to keep in mind because it means that the data is still there, and can be recovered if needed, as long as that version still exists based on your backup policy.
@@ -54,9 +50,13 @@ Reindexing is the process of updating the index to account for new data, keeping
Both LanceDB OSS and Cloud support reindexing, but the process (at least for now) is different for each, depending on the type of index.
In LanceDB OSS, re-indexing happens synchronously when you call either `create_index` or `optimize` on a table. In LanceDB Cloud, re-indexing happens asynchronously as you add and update data in your table.
When a reindex job is triggered in the background, the entire data is reindexed, but in the interim as new queries come in, LanceDB will combine results from the existing index with exhaustive kNN search on the new data. This is done to ensure that you're still searching on all your data, but it does come at a performance cost. The more data that you add without reindexing, the impact on latency (due to exhaustive search) can be noticeable.
By default, queries will search new data even if it has yet to be indexed. This is done using brute-force methods, such as kNN for vector search, and combined with the fast index search results. This is done to ensure that you're always searching over all your data, but it does come at a performance cost. Without reindexing, adding more data to a table will make queries slower and more expensive. This behavior can be disabled by setting the [fast_search](https://lancedb.github.io/lancedb/python/python/#lancedb.query.AsyncQuery.fast_search) parameter which will instruct the query to ignore un-indexed data.
### Vector reindex
* LanceDB Cloud/Enterprise supports [automatic incremental reindexing](https://docs.lancedb.com/core#vector-index) for vector, scalar, and FTS indices, where a background process will trigger a new index build for you automatically when new data is added or modified in a dataset
* LanceDB Cloud supports incremental reindexing, where a background process will trigger a new index build for you automatically when new data is added to a dataset
* LanceDB OSS requires you to manually trigger a reindex operation -- we are working on adding incremental reindexing to LanceDB OSS as well
### FTS reindex
FTS reindexing is supported in both LanceDB OSS and Cloud, but requires that it's manually rebuilt once you have a significant enough amount of new data added that needs to be reindexed. We [updated](https://github.com/lancedb/lancedb/pull/762) Tantivy's default heap size from 128MB to 1GB in LanceDB to make it much faster to reindex, by up to 10x from the default settings.

View File

@@ -1,97 +0,0 @@
# VoyageAI Embeddings : Multimodal
VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
Supported parameters (to be passed in `create` method) are:
| Parameter | Type | Default Value | Description |
|---|---|-------------------------|-------------------------------------------|
| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
Usage Example:
```python
import base64
import os
from io import BytesIO
import requests
import lancedb
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry
import pandas as pd
os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
db = lancedb.connect(".lancedb")
func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
def image_to_base64(image_bytes: bytes):
buffered = BytesIO(image_bytes)
img_str = base64.b64encode(buffered.getvalue())
return img_str.decode("utf-8")
class Images(LanceModel):
label: str
image_uri: str = func.SourceField() # image uri as the source
image_bytes: str = func.SourceField() # image bytes base64 encoded as the source
vector: Vector(func.ndims()) = func.VectorField() # vector column
vec_from_bytes: Vector(func.ndims()) = func.VectorField() # Another vector column
if "images" in db.table_names():
db.drop_table("images")
table = db.create_table("images", schema=Images)
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
uris = [
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
]
# get each uri as bytes
images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
table.add(
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
)
```
Now we can search using text from both the default vector column and the custom vector column
```python
# text search
actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
print(actual.label) # prints "dog"
frombytes = (
table.search("man's best friend", vector_column_name="vec_from_bytes")
.limit(1)
.to_pydantic(Images)[0]
)
print(frombytes.label)
```
Because we're using a multi-modal embedding function, we can also search using images
```python
# image search
query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
image_bytes = requests.get(query_image_uri).content
query_image = Image.open(BytesIO(image_bytes))
actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
print(actual.label == "dog")
# image search using a custom vector column
other = (
table.search(query_image, vector_column_name="vec_from_bytes")
.limit(1)
.to_pydantic(Images)[0]
)
print(actual.label)
```

View File

@@ -1,60 +0,0 @@
# SQL Querying
You can use DuckDB and Apache Datafusion to query your LanceDB tables using SQL.
This guide will show how to query Lance tables them using both.
We will re-use the dataset [created previously](./tables.md):
```python
import lancedb
db = lancedb.connect("data/sample-lancedb")
data = [
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [5.9, 26.5], "item": "bar", "price": 20.0}
]
table = db.create_table("pd_table", data=data)
```
## Querying a LanceDB Table with DuckDb
The `to_lance` method converts the LanceDB table to a `LanceDataset`, which is accessible to DuckDB through the Arrow compatibility layer.
To query the resulting Lance dataset in DuckDB, all you need to do is reference the dataset by the same name in your SQL query.
```python
import duckdb
arrow_table = table.to_lance()
duckdb.query("SELECT * FROM arrow_table")
```
| vector | item | price |
| ----------- | ---- | ----- |
| [3.1, 4.1] | foo | 10.0 |
| [5.9, 26.5] | bar | 20.0 |
## Querying a LanceDB Table with Apache Datafusion
Have the required imports before doing any querying.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_guide_tables.py:import-lancedb"
--8<-- "python/python/tests/docs/test_guide_tables.py:import-session-context"
--8<-- "python/python/tests/docs/test_guide_tables.py:import-ffi-dataset"
```
Register the table created with the Datafusion session context.
=== "Python"
```python
--8<-- "python/python/tests/docs/test_guide_tables.py:lance_sql_basic"
```
| vector | item | price |
| ----------- | ---- | ----- |
| [3.1, 4.1] | foo | 10.0 |
| [5.9, 26.5] | bar | 20.0 |

View File

@@ -397,6 +397,117 @@ For **read-only access**, LanceDB will need a policy such as:
}
```
#### DynamoDB Commit Store for concurrent writes
By default, S3 does not support concurrent writes. Having two or more processes
writing to the same table at the same time can lead to data corruption. This is
because S3, unlike other object stores, does not have any atomic put or copy
operation.
To enable concurrent writes, you can configure LanceDB to use a DynamoDB table
as a commit store. This table will be used to coordinate writes between
different processes. To enable this feature, you must modify your connection
URI to use the `s3+ddb` scheme and add a query parameter `ddbTableName` with the
name of the table to use.
=== "Python"
=== "Sync API"
```python
import lancedb
db = lancedb.connect(
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
)
```
=== "Async API"
```python
import lancedb
async_db = await lancedb.connect_async(
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
)
```
=== "JavaScript"
```javascript
const lancedb = require("lancedb");
const db = await lancedb.connect(
"s3+ddb://bucket/path?ddbTableName=my-dynamodb-table",
);
```
The DynamoDB table must be created with the following schema:
- Hash key: `base_uri` (string)
- Range key: `version` (number)
You can create this programmatically with:
=== "Python"
<!-- skip-test -->
```python
import boto3
dynamodb = boto3.client("dynamodb")
table = dynamodb.create_table(
TableName=table_name,
KeySchema=[
{"AttributeName": "base_uri", "KeyType": "HASH"},
{"AttributeName": "version", "KeyType": "RANGE"},
],
AttributeDefinitions=[
{"AttributeName": "base_uri", "AttributeType": "S"},
{"AttributeName": "version", "AttributeType": "N"},
],
ProvisionedThroughput={"ReadCapacityUnits": 1, "WriteCapacityUnits": 1},
)
```
=== "JavaScript"
<!-- skip-test -->
```javascript
import {
CreateTableCommand,
DynamoDBClient,
} from "@aws-sdk/client-dynamodb";
const dynamodb = new DynamoDBClient({
region: CONFIG.awsRegion,
credentials: {
accessKeyId: CONFIG.awsAccessKeyId,
secretAccessKey: CONFIG.awsSecretAccessKey,
},
endpoint: CONFIG.awsEndpoint,
});
const command = new CreateTableCommand({
TableName: table_name,
AttributeDefinitions: [
{
AttributeName: "base_uri",
AttributeType: "S",
},
{
AttributeName: "version",
AttributeType: "N",
},
],
KeySchema: [
{ AttributeName: "base_uri", KeyType: "HASH" },
{ AttributeName: "version", KeyType: "RANGE" },
],
ProvisionedThroughput: {
ReadCapacityUnits: 1,
WriteCapacityUnits: 1,
},
});
await client.send(command);
```
#### S3-compatible stores

View File

@@ -765,7 +765,7 @@ This can be used to update zero to all rows depending on how many rows match the
];
const tbl = await db.createTable("my_table", data)
await tbl.update({
await tbl.update({
values: { vector: [10, 10] },
where: "x = 2"
});
@@ -787,9 +787,9 @@ This can be used to update zero to all rows depending on how many rows match the
];
const tbl = await db.createTable("my_table", data)
await tbl.update({
where: "x = 2",
values: { vector: [10, 10] }
await tbl.update({
where: "x = 2",
values: { vector: [10, 10] }
});
```

View File

@@ -1,183 +0,0 @@
### genkitx-lancedb
This is a lancedb plugin for genkit framework. It allows you to use LanceDB for ingesting and rereiving data using genkit framework.
![integration-banner-genkit](https://github.com/user-attachments/assets/a6cc28af-98e9-4425-b87c-7ab139bd7893)
### Installation
```bash
pnpm install genkitx-lancedb
```
### Usage
Adding LanceDB plugin to your genkit instance.
```ts
import { lancedbIndexerRef, lancedb, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb';
import { textEmbedding004, vertexAI } from '@genkit-ai/vertexai';
import { gemini } from '@genkit-ai/vertexai';
import { z, genkit } from 'genkit';
import { Document } from 'genkit/retriever';
import { chunk } from 'llm-chunk';
import { readFile } from 'fs/promises';
import path from 'path';
import pdf from 'pdf-parse/lib/pdf-parse';
const ai = genkit({
plugins: [
// vertexAI provides the textEmbedding004 embedder
vertexAI(),
// the local vector store requires an embedder to translate from text to vector
lancedb([
{
dbUri: '.db', // optional lancedb uri, default to .db
tableName: 'table', // optional table name, default to table
embedder: textEmbedding004,
},
]),
],
});
```
You can run this app with the following command:
```bash
genkit start -- tsx --watch src/index.ts
```
This'll add LanceDB as a retriever and indexer to the genkit instance. You can see it in the GUI view
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05PM" src="https://github.com/user-attachments/assets/e752f7f4-785b-4797-a11e-72ab06a531b7" />
**Testing retrieval on a sample table**
Let's see the raw retrieval results
<img width="1710" alt="Screenshot 2025-05-11 at 7 21 05PM" src="https://github.com/user-attachments/assets/b8d356ed-8421-4790-8fc0-d6af563b9657" />
On running this query, you'll 5 results fetched from the lancedb table, where each result looks something like this:
<img width="1417" alt="Screenshot 2025-05-11 at 7 21 18PM" src="https://github.com/user-attachments/assets/77429525-36e2-4da6-a694-e58c1cf9eb83" />
## Creating a custom RAG flow
Now that we've seen how you can use LanceDB for in a genkit pipeline, let's refine the flow and create a RAG. A RAG flow will consist of an index and a retreiver with its outputs postprocessed an fed into an LLM for final response
### Creating custom indexer flows
You can also create custom indexer flows, utilizing more options and features provided by LanceDB.
```ts
export const menuPdfIndexer = lancedbIndexerRef({
// Using all defaults, for dbUri, tableName, and embedder, etc
});
const chunkingConfig = {
minLength: 1000,
maxLength: 2000,
splitter: 'sentence',
overlap: 100,
delimiters: '',
} as any;
async function extractTextFromPdf(filePath: string) {
const pdfFile = path.resolve(filePath);
const dataBuffer = await readFile(pdfFile);
const data = await pdf(dataBuffer);
return data.text;
}
export const indexMenu = ai.defineFlow(
{
name: 'indexMenu',
inputSchema: z.string().describe('PDF file path'),
outputSchema: z.void(),
},
async (filePath: string) => {
filePath = path.resolve(filePath);
// Read the pdf.
const pdfTxt = await ai.run('extract-text', () =>
extractTextFromPdf(filePath)
);
// Divide the pdf text into segments.
const chunks = await ai.run('chunk-it', async () =>
chunk(pdfTxt, chunkingConfig)
);
// Convert chunks of text into documents to store in the index.
const documents = chunks.map((text) => {
return Document.fromText(text, { filePath });
});
// Add documents to the index.
await ai.index({
indexer: menuPdfIndexer,
documents,
options: {
writeMode: WriteMode.Overwrite,
} as any
});
}
);
```
<img width="1316" alt="Screenshot 2025-05-11 at 8 35 56PM" src="https://github.com/user-attachments/assets/e2a20ce4-d1d0-4fa2-9a84-f2cc26e3a29f" />
In your console, you can see the logs
<img width="511" alt="Screenshot 2025-05-11 at 7 19 14PM" src="https://github.com/user-attachments/assets/243f26c5-ed38-40b6-b661-002f40f0423a" />
### Creating custom retriever flows
You can also create custom retriever flows, utilizing more options and features provided by LanceDB.
```ts
export const menuRetriever = lancedbRetrieverRef({
tableName: "table", // Use the same table name as the indexer.
displayName: "Menu", // Use a custom display name.
export const menuQAFlow = ai.defineFlow(
{ name: "Menu", inputSchema: z.string(), outputSchema: z.string() },
async (input: string) => {
// retrieve relevant documents
const docs = await ai.retrieve({
retriever: menuRetriever,
query: input,
options: {
k: 3,
},
});
const extractedContent = docs.map(doc => {
if (doc.content && Array.isArray(doc.content) && doc.content.length > 0) {
if (doc.content[0].media && doc.content[0].media.url) {
return doc.content[0].media.url;
}
}
return "No content found";
});
console.log("Extracted content:", extractedContent);
const { text } = await ai.generate({
model: gemini('gemini-2.0-flash'),
prompt: `
You are acting as a helpful AI assistant that can answer
questions about the food available on the menu at Genkit Grub Pub.
Use only the context provided to answer the question.
If you don't know, do not make up an answer.
Do not add or change items on the menu.
Context:
${extractedContent.join('\n\n')}
Question: ${input}`,
docs,
});
return text;
}
);
```
Now using our retrieval flow, we can ask question about the ingsted PDF
<img width="1306" alt="Screenshot 2025-05-11 at 7 18 45PM" src="https://github.com/user-attachments/assets/86c66b13-7c12-4d5f-9d81-ae36bfb1c346" />

View File

@@ -1,53 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / BooleanQuery
# Class: BooleanQuery
Represents a full-text query interface.
This interface defines the structure and behavior for full-text queries,
including methods to retrieve the query type and convert the query to a dictionary format.
## Implements
- [`FullTextQuery`](../interfaces/FullTextQuery.md)
## Constructors
### new BooleanQuery()
```ts
new BooleanQuery(queries): BooleanQuery
```
Creates an instance of BooleanQuery.
#### Parameters
* **queries**: [[`Occur`](../enumerations/Occur.md), [`FullTextQuery`](../interfaces/FullTextQuery.md)][]
An array of (Occur, FullTextQuery objects) to combine.
Occur specifies whether the query must match, or should match.
#### Returns
[`BooleanQuery`](BooleanQuery.md)
## Methods
### queryType()
```ts
queryType(): FullTextQueryType
```
The type of the full-text query.
#### Returns
[`FullTextQueryType`](../enumerations/FullTextQueryType.md)
#### Implementation of
[`FullTextQuery`](../interfaces/FullTextQuery.md).[`queryType`](../interfaces/FullTextQuery.md#querytype)

View File

@@ -25,51 +25,6 @@ the underlying connection has been closed.
## Methods
### cloneTable()
```ts
abstract cloneTable(
targetTableName,
sourceUri,
options?): Promise<Table>
```
Clone a table from a source table.
A shallow clone creates a new table that shares the underlying data files
with the source table but has its own independent manifest. This allows
both the source and cloned tables to evolve independently while initially
sharing the same data, deletion, and index files.
#### Parameters
* **targetTableName**: `string`
The name of the target table to create.
* **sourceUri**: `string`
The URI of the source table to clone from.
* **options?**
Clone options.
* **options.isShallow?**: `boolean`
Whether to perform a shallow clone (defaults to true).
* **options.sourceTag?**: `string`
The tag of the source table to clone.
* **options.sourceVersion?**: `number`
The version of the source table to clone.
* **options.targetNamespace?**: `string`[]
The namespace for the target table (defaults to root namespace).
#### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
***
### close()
```ts
@@ -90,8 +45,6 @@ Any attempt to use the connection after it is closed will result in an error.
### createEmptyTable()
#### createEmptyTable(name, schema, options)
```ts
abstract createEmptyTable(
name,
@@ -101,7 +54,7 @@ abstract createEmptyTable(
Creates a new empty Table
##### Parameters
#### Parameters
* **name**: `string`
The name of the table.
@@ -110,39 +63,8 @@ Creates a new empty Table
The schema of the table
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options (backwards compatibility)
##### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
#### createEmptyTable(name, schema, namespace, options)
```ts
abstract createEmptyTable(
name,
schema,
namespace?,
options?): Promise<Table>
```
Creates a new empty Table
##### Parameters
* **name**: `string`
The name of the table.
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
The schema of the table
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options
##### Returns
#### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
@@ -150,10 +72,10 @@ Creates a new empty Table
### createTable()
#### createTable(options, namespace)
#### createTable(options)
```ts
abstract createTable(options, namespace?): Promise<Table>
abstract createTable(options): Promise<Table>
```
Creates a new Table and initialize it with new data.
@@ -163,9 +85,6 @@ Creates a new Table and initialize it with new data.
* **options**: `object` & `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
The options object.
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
##### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
@@ -191,38 +110,6 @@ Creates a new Table and initialize it with new data.
to be inserted into the table
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options (backwards compatibility)
##### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
#### createTable(name, data, namespace, options)
```ts
abstract createTable(
name,
data,
namespace?,
options?): Promise<Table>
```
Creates a new Table and initialize it with new data.
##### Parameters
* **name**: `string`
The name of the table.
* **data**: [`TableLike`](../type-aliases/TableLike.md) \| `Record`&lt;`string`, `unknown`&gt;[]
Non-empty Array of Records
to be inserted into the table
* **namespace?**: `string`[]
The namespace to create the table in (defaults to root namespace)
* **options?**: `Partial`&lt;[`CreateTableOptions`](../interfaces/CreateTableOptions.md)&gt;
Additional options
##### Returns
@@ -247,16 +134,11 @@ Return a brief description of the connection
### dropAllTables()
```ts
abstract dropAllTables(namespace?): Promise<void>
abstract dropAllTables(): Promise<void>
```
Drop all tables in the database.
#### Parameters
* **namespace?**: `string`[]
The namespace to drop tables from (defaults to root namespace).
#### Returns
`Promise`&lt;`void`&gt;
@@ -266,7 +148,7 @@ Drop all tables in the database.
### dropTable()
```ts
abstract dropTable(name, namespace?): Promise<void>
abstract dropTable(name): Promise<void>
```
Drop an existing table.
@@ -276,9 +158,6 @@ Drop an existing table.
* **name**: `string`
The name of the table to drop.
* **namespace?**: `string`[]
The namespace of the table (defaults to root namespace).
#### Returns
`Promise`&lt;`void`&gt;
@@ -302,10 +181,7 @@ Return true if the connection has not been closed
### openTable()
```ts
abstract openTable(
name,
namespace?,
options?): Promise<Table>
abstract openTable(name, options?): Promise<Table>
```
Open a table in the database.
@@ -315,11 +191,7 @@ Open a table in the database.
* **name**: `string`
The name of the table
* **namespace?**: `string`[]
The namespace of the table (defaults to root namespace)
* **options?**: `Partial`&lt;[`OpenTableOptions`](../interfaces/OpenTableOptions.md)&gt;
Additional options
#### Returns
@@ -329,8 +201,6 @@ Open a table in the database.
### tableNames()
#### tableNames(options)
```ts
abstract tableNames(options?): Promise<string[]>
```
@@ -339,35 +209,12 @@ List all the table names in this database.
Tables will be returned in lexicographical order.
##### Parameters
* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
options to control the
paging / start point (backwards compatibility)
##### Returns
`Promise`&lt;`string`[]&gt;
#### tableNames(namespace, options)
```ts
abstract tableNames(namespace?, options?): Promise<string[]>
```
List all the table names in this database.
Tables will be returned in lexicographical order.
##### Parameters
* **namespace?**: `string`[]
The namespace to list tables from (defaults to root namespace)
#### Parameters
* **options?**: `Partial`&lt;[`TableNamesOptions`](../interfaces/TableNamesOptions.md)&gt;
options to control the
paging / start point
##### Returns
#### Returns
`Promise`&lt;`string`[]&gt;

View File

@@ -1,85 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / HeaderProvider
# Class: `abstract` HeaderProvider
Abstract base class for providing custom headers for each request.
Users can implement this interface to provide dynamic headers for various purposes
such as authentication (OAuth tokens, API keys), request tracking (correlation IDs),
custom metadata, or any other header-based requirements. The provider is called
before each request to ensure fresh header values are always used.
## Examples
Simple JWT token provider:
```typescript
class JWTProvider extends HeaderProvider {
constructor(private token: string) {
super();
}
getHeaders(): Record<string, string> {
return { authorization: `Bearer ${this.token}` };
}
}
```
Provider with request tracking:
```typescript
class RequestTrackingProvider extends HeaderProvider {
constructor(private sessionId: string) {
super();
}
getHeaders(): Record<string, string> {
return {
"X-Session-Id": this.sessionId,
"X-Request-Id": `req-${Date.now()}`
};
}
}
```
## Extended by
- [`StaticHeaderProvider`](StaticHeaderProvider.md)
- [`OAuthHeaderProvider`](OAuthHeaderProvider.md)
## Constructors
### new HeaderProvider()
```ts
new HeaderProvider(): HeaderProvider
```
#### Returns
[`HeaderProvider`](HeaderProvider.md)
## Methods
### getHeaders()
```ts
abstract getHeaders(): Record<string, string>
```
Get the latest headers to be added to requests.
This method is called before each request to the remote LanceDB server.
Implementations should return headers that will be merged with existing headers.
#### Returns
`Record`&lt;`string`, `string`&gt;
Dictionary of header names to values to add to the request.
#### Throws
If unable to fetch headers, the exception will be propagated and the request will fail.

View File

@@ -194,37 +194,6 @@ currently is also a memory intensive operation.
***
### ivfRq()
```ts
static ivfRq(options?): Index
```
Create an IvfRq index
IVF-RQ (RabitQ Quantization) compresses vectors using RabitQ quantization
and organizes them into IVF partitions.
The compression scheme is called RabitQ quantization. Each dimension is quantized into a small number of bits.
The parameters `num_bits` and `num_partitions` control this process, providing a tradeoff
between index size (and thus search speed) and index accuracy.
The partitioning process is called IVF and the `num_partitions` parameter controls how
many groups to create.
Note that training an IVF RQ index on a large dataset is a slow operation and
currently is also a memory intensive operation.
#### Parameters
* **options?**: `Partial`&lt;[`IvfRqOptions`](../interfaces/IvfRqOptions.md)&gt;
#### Returns
[`Index`](Index.md)
***
### labelList()
```ts

View File

@@ -40,8 +40,6 @@ Creates an instance of MatchQuery.
- `boost`: The boost factor for the query (default is 1.0).
- `fuzziness`: The fuzziness level for the query (default is 0).
- `maxExpansions`: The maximum number of terms to consider for fuzzy matching (default is 50).
- `operator`: The logical operator to use for combining terms in the query (default is "OR").
- `prefixLength`: The number of beginning characters being unchanged for fuzzy matching.
* **options.boost?**: `number`
@@ -49,10 +47,6 @@ Creates an instance of MatchQuery.
* **options.maxExpansions?**: `number`
* **options.operator?**: [`Operator`](../enumerations/Operator.md)
* **options.prefixLength?**: `number`
#### Returns
[`MatchQuery`](MatchQuery.md)

View File

@@ -33,7 +33,7 @@ Construct a MergeInsertBuilder. __Internal use only.__
### execute()
```ts
execute(data, execOptions?): Promise<MergeResult>
execute(data): Promise<MergeStats>
```
Executes the merge insert operation
@@ -42,37 +42,11 @@ Executes the merge insert operation
* **data**: [`Data`](../type-aliases/Data.md)
* **execOptions?**: `Partial`&lt;[`WriteExecutionOptions`](../interfaces/WriteExecutionOptions.md)&gt;
#### Returns
`Promise`&lt;[`MergeResult`](../interfaces/MergeResult.md)&gt;
`Promise`&lt;[`MergeStats`](../interfaces/MergeStats.md)&gt;
the merge result
***
### useIndex()
```ts
useIndex(useIndex): MergeInsertBuilder
```
Controls whether to use indexes for the merge operation.
When set to `true` (the default), the operation will use an index if available
on the join key for improved performance. When set to `false`, it forces a full
table scan even if an index exists. This can be useful for benchmarking or when
the query optimizer chooses a suboptimal path.
#### Parameters
* **useIndex**: `boolean`
Whether to use indices for the merge operation. Defaults to `true`.
#### Returns
[`MergeInsertBuilder`](MergeInsertBuilder.md)
Statistics about the merge operation: counts of inserted, updated, and deleted rows
***

View File

@@ -38,12 +38,9 @@ Creates an instance of MultiMatchQuery.
* **options?**
Optional parameters for the multi-match query.
- `boosts`: An array of boost factors for each column (default is 1.0 for all).
- `operator`: The logical operator to use for combining terms in the query (default is "OR").
* **options.boosts?**: `number`[]
* **options.operator?**: [`Operator`](../enumerations/Operator.md)
#### Returns
[`MultiMatchQuery`](MultiMatchQuery.md)

View File

@@ -1,29 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / NativeJsHeaderProvider
# Class: NativeJsHeaderProvider
JavaScript HeaderProvider implementation that wraps a JavaScript callback.
This is the only native header provider - all header provider implementations
should provide a JavaScript function that returns headers.
## Constructors
### new NativeJsHeaderProvider()
```ts
new NativeJsHeaderProvider(getHeadersCallback): NativeJsHeaderProvider
```
Create a new JsHeaderProvider from a JavaScript callback
#### Parameters
* **getHeadersCallback**
#### Returns
[`NativeJsHeaderProvider`](NativeJsHeaderProvider.md)

View File

@@ -1,108 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / OAuthHeaderProvider
# Class: OAuthHeaderProvider
Example implementation: OAuth token provider with automatic refresh.
This is an example implementation showing how to manage OAuth tokens
with automatic refresh when they expire.
## Example
```typescript
async function fetchToken(): Promise<TokenResponse> {
const response = await fetch("https://oauth.example.com/token", {
method: "POST",
body: JSON.stringify({
grant_type: "client_credentials",
client_id: "your-client-id",
client_secret: "your-client-secret"
}),
headers: { "Content-Type": "application/json" }
});
const data = await response.json();
return {
accessToken: data.access_token,
expiresIn: data.expires_in
};
}
const provider = new OAuthHeaderProvider(fetchToken);
const headers = provider.getHeaders();
// Returns: {"authorization": "Bearer <your-token>"}
```
## Extends
- [`HeaderProvider`](HeaderProvider.md)
## Constructors
### new OAuthHeaderProvider()
```ts
new OAuthHeaderProvider(tokenFetcher, refreshBufferSeconds): OAuthHeaderProvider
```
Initialize the OAuth provider.
#### Parameters
* **tokenFetcher**
Function to fetch new tokens. Should return object with 'accessToken' and optionally 'expiresIn'.
* **refreshBufferSeconds**: `number` = `300`
Seconds before expiry to refresh token. Default 300 (5 minutes).
#### Returns
[`OAuthHeaderProvider`](OAuthHeaderProvider.md)
#### Overrides
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
## Methods
### getHeaders()
```ts
getHeaders(): Record<string, string>
```
Get OAuth headers, refreshing token if needed.
Note: This is synchronous for now as the Rust implementation expects sync.
In a real implementation, this would need to handle async properly.
#### Returns
`Record`&lt;`string`, `string`&gt;
Headers with Bearer token authorization.
#### Throws
If unable to fetch or refresh token.
#### Overrides
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)
***
### refreshToken()
```ts
refreshToken(): Promise<void>
```
Manually refresh the token.
Call this before using getHeaders() to ensure token is available.
#### Returns
`Promise`&lt;`void`&gt;

View File

@@ -1,250 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / PermutationBuilder
# Class: PermutationBuilder
A PermutationBuilder for creating data permutations with splits, shuffling, and filtering.
This class provides a TypeScript wrapper around the native Rust PermutationBuilder,
offering methods to configure data splits, shuffling, and filtering before executing
the permutation to create a new table.
## Methods
### execute()
```ts
execute(): Promise<Table>
```
Execute the permutation and create the destination table.
#### Returns
`Promise`&lt;[`Table`](Table.md)&gt;
A Promise that resolves to the new Table instance
#### Example
```ts
const permutationTable = await builder.execute();
console.log(`Created table: ${permutationTable.name}`);
```
***
### filter()
```ts
filter(filter): PermutationBuilder
```
Configure filtering for the permutation.
#### Parameters
* **filter**: `string`
SQL filter expression
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
builder.filter("age > 18 AND status = 'active'");
```
***
### persist()
```ts
persist(connection, tableName): PermutationBuilder
```
Configure the permutation to be persisted.
#### Parameters
* **connection**: [`Connection`](Connection.md)
The connection to persist the permutation to
* **tableName**: `string`
The name of the table to create
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
builder.persist(connection, "permutation_table");
```
***
### shuffle()
```ts
shuffle(options): PermutationBuilder
```
Configure shuffling for the permutation.
#### Parameters
* **options**: [`ShuffleOptions`](../interfaces/ShuffleOptions.md)
Configuration for shuffling
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
// Basic shuffle
builder.shuffle({ seed: 42 });
// Shuffle with clump size
builder.shuffle({ seed: 42, clumpSize: 10 });
```
***
### splitCalculated()
```ts
splitCalculated(options): PermutationBuilder
```
Configure calculated splits for the permutation.
#### Parameters
* **options**: [`SplitCalculatedOptions`](../interfaces/SplitCalculatedOptions.md)
Configuration for calculated splitting
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
builder.splitCalculated("user_id % 3");
```
***
### splitHash()
```ts
splitHash(options): PermutationBuilder
```
Configure hash-based splits for the permutation.
#### Parameters
* **options**: [`SplitHashOptions`](../interfaces/SplitHashOptions.md)
Configuration for hash-based splitting
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
builder.splitHash({
columns: ["user_id"],
splitWeights: [70, 30],
discardWeight: 0
});
```
***
### splitRandom()
```ts
splitRandom(options): PermutationBuilder
```
Configure random splits for the permutation.
#### Parameters
* **options**: [`SplitRandomOptions`](../interfaces/SplitRandomOptions.md)
Configuration for random splitting
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
// Split by ratios
builder.splitRandom({ ratios: [0.7, 0.3], seed: 42 });
// Split by counts
builder.splitRandom({ counts: [1000, 500], seed: 42 });
// Split with fixed size
builder.splitRandom({ fixed: 100, seed: 42 });
```
***
### splitSequential()
```ts
splitSequential(options): PermutationBuilder
```
Configure sequential splits for the permutation.
#### Parameters
* **options**: [`SplitSequentialOptions`](../interfaces/SplitSequentialOptions.md)
Configuration for sequential splitting
#### Returns
[`PermutationBuilder`](PermutationBuilder.md)
A new PermutationBuilder instance
#### Example
```ts
// Split by ratios
builder.splitSequential({ ratios: [0.8, 0.2] });
// Split by counts
builder.splitSequential({ counts: [800, 200] });
// Split with fixed size
builder.splitSequential({ fixed: 1000 });
```

View File

@@ -19,10 +19,7 @@ including methods to retrieve the query type and convert the query to a dictiona
### new PhraseQuery()
```ts
new PhraseQuery(
query,
column,
options?): PhraseQuery
new PhraseQuery(query, column): PhraseQuery
```
Creates an instance of `PhraseQuery`.
@@ -35,12 +32,6 @@ Creates an instance of `PhraseQuery`.
* **column**: `string`
The name of the column to search within.
* **options?**
Optional parameters for the phrase query.
- `slop`: The maximum number of intervening unmatched positions allowed between words in the phrase (default is 0).
* **options.slop?**: `number`
#### Returns
[`PhraseQuery`](PhraseQuery.md)

View File

@@ -14,7 +14,7 @@ A builder for LanceDB queries.
## Extends
- `StandardQueryBase`&lt;`NativeQuery`&gt;
- [`QueryBase`](QueryBase.md)&lt;`NativeQuery`&gt;
## Properties
@@ -26,7 +26,7 @@ protected inner: Query | Promise<Query>;
#### Inherited from
`StandardQueryBase.inner`
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
## Methods
@@ -73,14 +73,14 @@ AnalyzeExec verbose=true, metrics=[]
#### Inherited from
`StandardQueryBase.analyzePlan`
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
***
### execute()
```ts
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
protected execute(options?): RecordBatchIterator
```
Execute the query and return the results as an
@@ -91,7 +91,7 @@ Execute the query and return the results as an
#### Returns
`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
[`RecordBatchIterator`](RecordBatchIterator.md)
#### See
@@ -107,7 +107,7 @@ single query)
#### Inherited from
`StandardQueryBase.execute`
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
***
@@ -143,7 +143,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
#### Inherited from
`StandardQueryBase.explainPlan`
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
***
@@ -164,7 +164,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
#### Inherited from
`StandardQueryBase.fastSearch`
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
***
@@ -194,7 +194,7 @@ Use `where` instead
#### Inherited from
`StandardQueryBase.filter`
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
***
@@ -216,7 +216,7 @@ fullTextSearch(query, options?): this
#### Inherited from
`StandardQueryBase.fullTextSearch`
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
***
@@ -241,7 +241,7 @@ called then every valid row from the table will be returned.
#### Inherited from
`StandardQueryBase.limit`
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
***
@@ -325,10 +325,6 @@ nearestToText(query, columns?): Query
offset(offset): this
```
Set the number of rows to skip before returning results.
This is useful for pagination.
#### Parameters
* **offset**: `number`
@@ -339,30 +335,7 @@ This is useful for pagination.
#### Inherited from
`StandardQueryBase.offset`
***
### outputSchema()
```ts
outputSchema(): Promise<Schema<any>>
```
Returns the schema of the output that will be returned by this query.
This can be used to inspect the types and names of the columns that will be
returned by the query before executing it.
#### Returns
`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
An Arrow Schema describing the output columns.
#### Inherited from
`StandardQueryBase.outputSchema`
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
***
@@ -415,7 +388,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
#### Inherited from
`StandardQueryBase.select`
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
***
@@ -437,7 +410,7 @@ Collect the results as an array of objects.
#### Inherited from
`StandardQueryBase.toArray`
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
***
@@ -463,7 +436,7 @@ ArrowTable.
#### Inherited from
`StandardQueryBase.toArrow`
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
***
@@ -498,7 +471,7 @@ on the filter column(s).
#### Inherited from
`StandardQueryBase.where`
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
***
@@ -520,4 +493,4 @@ order to perform hybrid search.
#### Inherited from
`StandardQueryBase.withRowId`
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)

View File

@@ -15,11 +15,12 @@ Common methods supported by all query types
## Extended by
- [`TakeQuery`](TakeQuery.md)
- [`Query`](Query.md)
- [`VectorQuery`](VectorQuery.md)
## Type Parameters
**NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery` \| `NativeTakeQuery`
**NativeQueryType** *extends* `NativeQuery` \| `NativeVectorQuery`
## Implements
@@ -81,7 +82,7 @@ AnalyzeExec verbose=true, metrics=[]
### execute()
```ts
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
protected execute(options?): RecordBatchIterator
```
Execute the query and return the results as an
@@ -92,7 +93,7 @@ Execute the query and return the results as an
#### Returns
`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
[`RecordBatchIterator`](RecordBatchIterator.md)
#### See
@@ -140,22 +141,101 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
***
### outputSchema()
### fastSearch()
```ts
outputSchema(): Promise<Schema<any>>
fastSearch(): this
```
Returns the schema of the output that will be returned by this query.
Skip searching un-indexed data. This can make search faster, but will miss
any data that is not yet indexed.
This can be used to inspect the types and names of the columns that will be
returned by the query before executing it.
Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
#### Returns
`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
`this`
An Arrow Schema describing the output columns.
***
### ~~filter()~~
```ts
filter(predicate): this
```
A filter statement to be applied to this query.
#### Parameters
* **predicate**: `string`
#### Returns
`this`
#### See
where
#### Deprecated
Use `where` instead
***
### fullTextSearch()
```ts
fullTextSearch(query, options?): this
```
#### Parameters
* **query**: `string` \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **options?**: `Partial`&lt;[`FullTextSearchOptions`](../interfaces/FullTextSearchOptions.md)&gt;
#### Returns
`this`
***
### limit()
```ts
limit(limit): this
```
Set the maximum number of results to return.
By default, a plain search has no limit. If this method is not
called then every valid row from the table will be returned.
#### Parameters
* **limit**: `number`
#### Returns
`this`
***
### offset()
```ts
offset(offset): this
```
#### Parameters
* **offset**: `number`
#### Returns
`this`
***
@@ -248,6 +328,37 @@ ArrowTable.
***
### where()
```ts
where(predicate): this
```
A filter statement to be applied to this query.
The filter should be supplied as an SQL query string. For example:
#### Parameters
* **predicate**: `string`
#### Returns
`this`
#### Example
```ts
x > 10
y > 0 AND y < 100
x > 5 OR y = 'test'
Filtering performance can often be improved by creating a scalar index
on the filter column(s).
```
***
### withRowId()
```ts

View File

@@ -0,0 +1,43 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / RecordBatchIterator
# Class: RecordBatchIterator
## Implements
- `AsyncIterator`&lt;`RecordBatch`&gt;
## Constructors
### new RecordBatchIterator()
```ts
new RecordBatchIterator(promise?): RecordBatchIterator
```
#### Parameters
* **promise?**: `Promise`&lt;`RecordBatchIterator`&gt;
#### Returns
[`RecordBatchIterator`](RecordBatchIterator.md)
## Methods
### next()
```ts
next(): Promise<IteratorResult<RecordBatch<any>, any>>
```
#### Returns
`Promise`&lt;`IteratorResult`&lt;`RecordBatch`&lt;`any`&gt;, `any`&gt;&gt;
#### Implementation of
`AsyncIterator.next`

View File

@@ -1,88 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / Session
# Class: Session
A session for managing caches and object stores across LanceDB operations.
Sessions allow you to configure cache sizes for index and metadata caches,
which can significantly impact memory use and performance. They can
also be re-used across multiple connections to share the same cache state.
## Constructors
### new Session()
```ts
new Session(indexCacheSizeBytes?, metadataCacheSizeBytes?): Session
```
Create a new session with custom cache sizes.
# Parameters
- `index_cache_size_bytes`: The size of the index cache in bytes.
Index data is stored in memory in this cache to speed up queries.
Defaults to 6GB if not specified.
- `metadata_cache_size_bytes`: The size of the metadata cache in bytes.
The metadata cache stores file metadata and schema information in memory.
This cache improves scan and write performance.
Defaults to 1GB if not specified.
#### Parameters
* **indexCacheSizeBytes?**: `null` \| `bigint`
* **metadataCacheSizeBytes?**: `null` \| `bigint`
#### Returns
[`Session`](Session.md)
## Methods
### approxNumItems()
```ts
approxNumItems(): number
```
Get the approximate number of items cached in the session.
#### Returns
`number`
***
### sizeBytes()
```ts
sizeBytes(): bigint
```
Get the current size of the session caches in bytes.
#### Returns
`bigint`
***
### default()
```ts
static default(): Session
```
Create a session with default cache sizes.
This is equivalent to creating a session with 6GB index cache
and 1GB metadata cache.
#### Returns
[`Session`](Session.md)

View File

@@ -1,70 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / StaticHeaderProvider
# Class: StaticHeaderProvider
Example implementation: A simple header provider that returns static headers.
This is an example implementation showing how to create a HeaderProvider
for cases where headers don't change during the session.
## Example
```typescript
const provider = new StaticHeaderProvider({
authorization: "Bearer my-token",
"X-Custom-Header": "custom-value"
});
const headers = provider.getHeaders();
// Returns: {authorization: 'Bearer my-token', 'X-Custom-Header': 'custom-value'}
```
## Extends
- [`HeaderProvider`](HeaderProvider.md)
## Constructors
### new StaticHeaderProvider()
```ts
new StaticHeaderProvider(headers): StaticHeaderProvider
```
Initialize with static headers.
#### Parameters
* **headers**: `Record`&lt;`string`, `string`&gt;
Headers to return for every request.
#### Returns
[`StaticHeaderProvider`](StaticHeaderProvider.md)
#### Overrides
[`HeaderProvider`](HeaderProvider.md).[`constructor`](HeaderProvider.md#constructors)
## Methods
### getHeaders()
```ts
getHeaders(): Record<string, string>
```
Return the static headers.
#### Returns
`Record`&lt;`string`, `string`&gt;
Copy of the static headers.
#### Overrides
[`HeaderProvider`](HeaderProvider.md).[`getHeaders`](HeaderProvider.md#getheaders)

View File

@@ -40,7 +40,7 @@ Returns the name of the table
### add()
```ts
abstract add(data, options?): Promise<AddResult>
abstract add(data, options?): Promise<void>
```
Insert records into this Table.
@@ -54,17 +54,14 @@ Insert records into this Table.
#### Returns
`Promise`&lt;[`AddResult`](../interfaces/AddResult.md)&gt;
A promise that resolves to an object
containing the new version number of the table
`Promise`&lt;`void`&gt;
***
### addColumns()
```ts
abstract addColumns(newColumnTransforms): Promise<AddColumnsResult>
abstract addColumns(newColumnTransforms): Promise<void>
```
Add new columns with defined values.
@@ -79,17 +76,14 @@ Add new columns with defined values.
#### Returns
`Promise`&lt;[`AddColumnsResult`](../interfaces/AddColumnsResult.md)&gt;
A promise that resolves to an object
containing the new version number of the table after adding the columns.
`Promise`&lt;`void`&gt;
***
### alterColumns()
```ts
abstract alterColumns(columnAlterations): Promise<AlterColumnsResult>
abstract alterColumns(columnAlterations): Promise<void>
```
Alter the name or nullability of columns.
@@ -102,10 +96,7 @@ Alter the name or nullability of columns.
#### Returns
`Promise`&lt;[`AlterColumnsResult`](../interfaces/AlterColumnsResult.md)&gt;
A promise that resolves to an object
containing the new version number of the table after altering the columns.
`Promise`&lt;`void`&gt;
***
@@ -261,7 +252,7 @@ await table.createIndex("my_float_col");
### delete()
```ts
abstract delete(predicate): Promise<DeleteResult>
abstract delete(predicate): Promise<void>
```
Delete the rows that satisfy the predicate.
@@ -272,10 +263,7 @@ Delete the rows that satisfy the predicate.
#### Returns
`Promise`&lt;[`DeleteResult`](../interfaces/DeleteResult.md)&gt;
A promise that resolves to an object
containing the new version number of the table
`Promise`&lt;`void`&gt;
***
@@ -296,7 +284,7 @@ Return a brief description of the table
### dropColumns()
```ts
abstract dropColumns(columnNames): Promise<DropColumnsResult>
abstract dropColumns(columnNames): Promise<void>
```
Drop one or more columns from the dataset
@@ -315,10 +303,7 @@ then call ``cleanup_files`` to remove the old files.
#### Returns
`Promise`&lt;[`DropColumnsResult`](../interfaces/DropColumnsResult.md)&gt;
A promise that resolves to an object
containing the new version number of the table after dropping the columns.
`Promise`&lt;`void`&gt;
***
@@ -612,7 +597,7 @@ of the given query
#### Parameters
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
* **query**: `string` \| [`IntoVector`](../type-aliases/IntoVector.md) \| [`FullTextQuery`](../interfaces/FullTextQuery.md)
the query, a vector or string
* **queryType?**: `string`
@@ -674,48 +659,6 @@ console.log(tags); // { "v1": { version: 1, manifestSize: ... } }
***
### takeOffsets()
```ts
abstract takeOffsets(offsets): TakeQuery
```
Create a query that returns a subset of the rows in the table.
#### Parameters
* **offsets**: `number`[]
The offsets of the rows to return.
#### Returns
[`TakeQuery`](TakeQuery.md)
A builder that can be used to parameterize the query.
***
### takeRowIds()
```ts
abstract takeRowIds(rowIds): TakeQuery
```
Create a query that returns a subset of the rows in the table.
#### Parameters
* **rowIds**: `number`[]
The row ids of the rows to return.
#### Returns
[`TakeQuery`](TakeQuery.md)
A builder that can be used to parameterize the query.
***
### toArrow()
```ts
@@ -735,7 +678,7 @@ Return the table as an arrow table
#### update(opts)
```ts
abstract update(opts): Promise<UpdateResult>
abstract update(opts): Promise<void>
```
Update existing records in the Table
@@ -746,10 +689,7 @@ Update existing records in the Table
##### Returns
`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
A promise that resolves to an object containing
the number of rows updated and the new version number
`Promise`&lt;`void`&gt;
##### Example
@@ -760,7 +700,7 @@ table.update({where:"x = 2", values:{"vector": [10, 10]}})
#### update(opts)
```ts
abstract update(opts): Promise<UpdateResult>
abstract update(opts): Promise<void>
```
Update existing records in the Table
@@ -771,10 +711,7 @@ Update existing records in the Table
##### Returns
`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
A promise that resolves to an object containing
the number of rows updated and the new version number
`Promise`&lt;`void`&gt;
##### Example
@@ -785,7 +722,7 @@ table.update({where:"x = 2", valuesSql:{"x": "x + 1"}})
#### update(updates, options)
```ts
abstract update(updates, options?): Promise<UpdateResult>
abstract update(updates, options?): Promise<void>
```
Update existing records in the Table
@@ -808,6 +745,10 @@ repeatedly calilng this method.
* **updates**: `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
the
columns to update
Keys in the map should specify the name of the column to update.
Values in the map provide the new value of the column. These can
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
based on the row being updated (e.g. "my_col + 1")
* **options?**: `Partial`&lt;[`UpdateOptions`](../interfaces/UpdateOptions.md)&gt;
additional options to control
@@ -815,15 +756,7 @@ repeatedly calilng this method.
##### Returns
`Promise`&lt;[`UpdateResult`](../interfaces/UpdateResult.md)&gt;
A promise that resolves to an object
containing the number of rows updated and the new version number
Keys in the map should specify the name of the column to update.
Values in the map provide the new value of the column. These can
be SQL literal strings (e.g. "7" or "'foo'") or they can be expressions
based on the row being updated (e.g. "my_col + 1")
`Promise`&lt;`void`&gt;
***
@@ -841,7 +774,7 @@ by `query`.
#### Parameters
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md) \| [`MultiVector`](../type-aliases/MultiVector.md)
* **vector**: [`IntoVector`](../type-aliases/IntoVector.md)
#### Returns

View File

@@ -1,288 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / TakeQuery
# Class: TakeQuery
A query that returns a subset of the rows in the table.
## Extends
- [`QueryBase`](QueryBase.md)&lt;`NativeTakeQuery`&gt;
## Properties
### inner
```ts
protected inner: TakeQuery | Promise<TakeQuery>;
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
## Methods
### analyzePlan()
```ts
analyzePlan(): Promise<string>
```
Executes the query and returns the physical query plan annotated with runtime metrics.
This is useful for debugging and performance analysis, as it shows how the query was executed
and includes metrics such as elapsed time, rows processed, and I/O statistics.
#### Returns
`Promise`&lt;`string`&gt;
A query execution plan with runtime metrics for each step.
#### Example
```ts
import * as lancedb from "@lancedb/lancedb"
const db = await lancedb.connect("./.lancedb");
const table = await db.createTable("my_table", [
{ vector: [1.1, 0.9], id: "1" },
]);
const plan = await table.query().nearestTo([0.5, 0.2]).analyzePlan();
Example output (with runtime metrics inlined):
AnalyzeExec verbose=true, metrics=[]
ProjectionExec: expr=[id@3 as id, vector@0 as vector, _distance@2 as _distance], metrics=[output_rows=1, elapsed_compute=3.292µs]
Take: columns="vector, _rowid, _distance, (id)", metrics=[output_rows=1, elapsed_compute=66.001µs, batches_processed=1, bytes_read=8, iops=1, requests=1]
CoalesceBatchesExec: target_batch_size=1024, metrics=[output_rows=1, elapsed_compute=3.333µs]
GlobalLimitExec: skip=0, fetch=10, metrics=[output_rows=1, elapsed_compute=167ns]
FilterExec: _distance@2 IS NOT NULL, metrics=[output_rows=1, elapsed_compute=8.542µs]
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], metrics=[output_rows=1, elapsed_compute=63.25µs, row_replacements=1]
KNNVectorDistance: metric=l2, metrics=[output_rows=1, elapsed_compute=114.333µs, output_batches=1]
LanceScan: uri=/path/to/data, projection=[vector], row_id=true, row_addr=false, ordered=false, metrics=[output_rows=1, elapsed_compute=103.626µs, bytes_read=549, iops=2, requests=2]
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
***
### execute()
```ts
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
```
Execute the query and return the results as an
#### Parameters
* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
#### Returns
`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
#### See
- AsyncIterator
of
- RecordBatch.
By default, LanceDb will use many threads to calculate results and, when
the result set is large, multiple batches will be processed at one time.
This readahead is limited however and backpressure will be applied if this
stream is consumed slowly (this constrains the maximum memory used by a
single query)
#### Inherited from
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
***
### explainPlan()
```ts
explainPlan(verbose): Promise<string>
```
Generates an explanation of the query execution plan.
#### Parameters
* **verbose**: `boolean` = `false`
If true, provides a more detailed explanation. Defaults to false.
#### Returns
`Promise`&lt;`string`&gt;
A Promise that resolves to a string containing the query execution plan explanation.
#### Example
```ts
import * as lancedb from "@lancedb/lancedb"
const db = await lancedb.connect("./.lancedb");
const table = await db.createTable("my_table", [
{ vector: [1.1, 0.9], id: "1" },
]);
const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
***
### outputSchema()
```ts
outputSchema(): Promise<Schema<any>>
```
Returns the schema of the output that will be returned by this query.
This can be used to inspect the types and names of the columns that will be
returned by the query before executing it.
#### Returns
`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
An Arrow Schema describing the output columns.
#### Inherited from
[`QueryBase`](QueryBase.md).[`outputSchema`](QueryBase.md#outputschema)
***
### select()
```ts
select(columns): this
```
Return only the specified columns.
By default a query will return all columns from the table. However, this can have
a very significant impact on latency. LanceDb stores data in a columnar fashion. This
means we can finely tune our I/O to select exactly the columns we need.
As a best practice you should always limit queries to the columns that you need. If you
pass in an array of column names then only those columns will be returned.
You can also use this method to create new "dynamic" columns based on your existing columns.
For example, you may not care about "a" or "b" but instead simply want "a + b". This is often
seen in the SELECT clause of an SQL query (e.g. `SELECT a+b FROM my_table`).
To create dynamic columns you can pass in a Map<string, string>. A column will be returned
for each entry in the map. The key provides the name of the column. The value is
an SQL string used to specify how the column is calculated.
For example, an SQL query might state `SELECT a + b AS combined, c`. The equivalent
input to this method would be:
#### Parameters
* **columns**: `string` \| `string`[] \| `Record`&lt;`string`, `string`&gt; \| `Map`&lt;`string`, `string`&gt;
#### Returns
`this`
#### Example
```ts
new Map([["combined", "a + b"], ["c", "c"]])
Columns will always be returned in the order given, even if that order is different than
the order used when adding the data.
Note that you can pass in a `Record<string, string>` (e.g. an object literal). This method
uses `Object.entries` which should preserve the insertion order of the object. However,
object insertion order is easy to get wrong and `Map` is more foolproof.
```
#### Inherited from
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
***
### toArray()
```ts
toArray(options?): Promise<any[]>
```
Collect the results as an array of objects.
#### Parameters
* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
#### Returns
`Promise`&lt;`any`[]&gt;
#### Inherited from
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
***
### toArrow()
```ts
toArrow(options?): Promise<Table<any>>
```
Collect the results as an Arrow
#### Parameters
* **options?**: `Partial`&lt;[`QueryExecutionOptions`](../interfaces/QueryExecutionOptions.md)&gt;
#### Returns
`Promise`&lt;`Table`&lt;`any`&gt;&gt;
#### See
ArrowTable.
#### Inherited from
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
***
### withRowId()
```ts
withRowId(): this
```
Whether to return the row id in the results.
This column can be used to match results between different queries. For
example, to match results from a full text search and a vector search in
order to perform hybrid search.
#### Returns
`this`
#### Inherited from
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)

View File

@@ -16,7 +16,7 @@ This builder can be reused to execute the query many times.
## Extends
- `StandardQueryBase`&lt;`NativeVectorQuery`&gt;
- [`QueryBase`](QueryBase.md)&lt;`NativeVectorQuery`&gt;
## Properties
@@ -28,7 +28,7 @@ protected inner: VectorQuery | Promise<VectorQuery>;
#### Inherited from
`StandardQueryBase.inner`
[`QueryBase`](QueryBase.md).[`inner`](QueryBase.md#inner)
## Methods
@@ -91,7 +91,7 @@ AnalyzeExec verbose=true, metrics=[]
#### Inherited from
`StandardQueryBase.analyzePlan`
[`QueryBase`](QueryBase.md).[`analyzePlan`](QueryBase.md#analyzeplan)
***
@@ -221,7 +221,7 @@ also increase the latency of your query. The default value is 1.5*limit.
### execute()
```ts
protected execute(options?): AsyncGenerator<RecordBatch<any>, void, unknown>
protected execute(options?): RecordBatchIterator
```
Execute the query and return the results as an
@@ -232,7 +232,7 @@ Execute the query and return the results as an
#### Returns
`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;
[`RecordBatchIterator`](RecordBatchIterator.md)
#### See
@@ -248,7 +248,7 @@ single query)
#### Inherited from
`StandardQueryBase.execute`
[`QueryBase`](QueryBase.md).[`execute`](QueryBase.md#execute)
***
@@ -284,7 +284,7 @@ const plan = await table.query().nearestTo([0.5, 0.2]).explainPlan();
#### Inherited from
`StandardQueryBase.explainPlan`
[`QueryBase`](QueryBase.md).[`explainPlan`](QueryBase.md#explainplan)
***
@@ -305,7 +305,7 @@ Use [Table#optimize](Table.md#optimize) to index all un-indexed data.
#### Inherited from
`StandardQueryBase.fastSearch`
[`QueryBase`](QueryBase.md).[`fastSearch`](QueryBase.md#fastsearch)
***
@@ -335,7 +335,7 @@ Use `where` instead
#### Inherited from
`StandardQueryBase.filter`
[`QueryBase`](QueryBase.md).[`filter`](QueryBase.md#filter)
***
@@ -357,7 +357,7 @@ fullTextSearch(query, options?): this
#### Inherited from
`StandardQueryBase.fullTextSearch`
[`QueryBase`](QueryBase.md).[`fullTextSearch`](QueryBase.md#fulltextsearch)
***
@@ -382,54 +382,7 @@ called then every valid row from the table will be returned.
#### Inherited from
`StandardQueryBase.limit`
***
### maximumNprobes()
```ts
maximumNprobes(maximumNprobes): VectorQuery
```
Set the maximum number of probes used.
This controls the maximum number of partitions that will be searched. If this
number is greater than minimumNprobes then the excess partitions will _only_ be
searched if we have not found enough results. This can be useful when there is
a narrow filter to allow these queries to spend more time searching and avoid
potential false negatives.
#### Parameters
* **maximumNprobes**: `number`
#### Returns
[`VectorQuery`](VectorQuery.md)
***
### minimumNprobes()
```ts
minimumNprobes(minimumNprobes): VectorQuery
```
Set the minimum number of probes used.
This controls the minimum number of partitions that will be searched. This
parameter will impact every query against a vector index, regardless of the
filter. See `nprobes` for more details. Higher values will increase recall
but will also increase latency.
#### Parameters
* **minimumNprobes**: `number`
#### Returns
[`VectorQuery`](VectorQuery.md)
[`QueryBase`](QueryBase.md).[`limit`](QueryBase.md#limit)
***
@@ -460,10 +413,6 @@ For best results we recommend tuning this parameter with a benchmark against
your actual data to find the smallest possible value that will still give
you the desired recall.
For more fine grained control over behavior when you have a very narrow filter
you can use `minimumNprobes` and `maximumNprobes`. This method sets both
the minimum and maximum to the same value.
#### Parameters
* **nprobes**: `number`
@@ -480,10 +429,6 @@ the minimum and maximum to the same value.
offset(offset): this
```
Set the number of rows to skip before returning results.
This is useful for pagination.
#### Parameters
* **offset**: `number`
@@ -494,30 +439,7 @@ This is useful for pagination.
#### Inherited from
`StandardQueryBase.offset`
***
### outputSchema()
```ts
outputSchema(): Promise<Schema<any>>
```
Returns the schema of the output that will be returned by this query.
This can be used to inspect the types and names of the columns that will be
returned by the query before executing it.
#### Returns
`Promise`&lt;`Schema`&lt;`any`&gt;&gt;
An Arrow Schema describing the output columns.
#### Inherited from
`StandardQueryBase.outputSchema`
[`QueryBase`](QueryBase.md).[`offset`](QueryBase.md#offset)
***
@@ -664,7 +586,7 @@ object insertion order is easy to get wrong and `Map` is more foolproof.
#### Inherited from
`StandardQueryBase.select`
[`QueryBase`](QueryBase.md).[`select`](QueryBase.md#select)
***
@@ -686,7 +608,7 @@ Collect the results as an array of objects.
#### Inherited from
`StandardQueryBase.toArray`
[`QueryBase`](QueryBase.md).[`toArray`](QueryBase.md#toarray)
***
@@ -712,7 +634,7 @@ ArrowTable.
#### Inherited from
`StandardQueryBase.toArrow`
[`QueryBase`](QueryBase.md).[`toArrow`](QueryBase.md#toarrow)
***
@@ -747,7 +669,7 @@ on the filter column(s).
#### Inherited from
`StandardQueryBase.where`
[`QueryBase`](QueryBase.md).[`where`](QueryBase.md#where)
***
@@ -769,4 +691,4 @@ order to perform hybrid search.
#### Inherited from
`StandardQueryBase.withRowId`
[`QueryBase`](QueryBase.md).[`withRowId`](QueryBase.md#withrowid)

View File

@@ -15,14 +15,6 @@ Enum representing the types of full-text queries supported.
## Enumeration Members
### Boolean
```ts
Boolean: "boolean";
```
***
### Boost
```ts

View File

@@ -1,37 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / Occur
# Enumeration: Occur
Enum representing the occurrence of terms in full-text queries.
- `Must`: The term must be present in the document.
- `Should`: The term should contribute to the document score, but is not required.
- `MustNot`: The term must not be present in the document.
## Enumeration Members
### Must
```ts
Must: "MUST";
```
***
### MustNot
```ts
MustNot: "MUST_NOT";
```
***
### Should
```ts
Should: "SHOULD";
```

View File

@@ -1,28 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / Operator
# Enumeration: Operator
Enum representing the logical operators used in full-text queries.
- `And`: All terms must match.
- `Or`: At least one term must match.
## Enumeration Members
### And
```ts
And: "AND";
```
***
### Or
```ts
Or: "OR";
```

View File

@@ -1,19 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / RecordBatchIterator
# Function: RecordBatchIterator()
```ts
function RecordBatchIterator(promisedInner): AsyncGenerator<RecordBatch<any>, void, unknown>
```
## Parameters
* **promisedInner**: `Promise`&lt;`RecordBatchIterator`&gt;
## Returns
`AsyncGenerator`&lt;`RecordBatch`&lt;`any`&gt;, `void`, `unknown`&gt;

View File

@@ -6,14 +6,10 @@
# Function: connect()
## connect(uri, options, session, headerProvider)
## connect(uri, options)
```ts
function connect(
uri,
options?,
session?,
headerProvider?): Promise<Connection>
function connect(uri, options?): Promise<Connection>
```
Connect to a LanceDB instance at the given URI.
@@ -33,10 +29,6 @@ Accepted formats:
* **options?**: `Partial`&lt;[`ConnectionOptions`](../interfaces/ConnectionOptions.md)&gt;
The options to use when connecting to the database
* **session?**: [`Session`](../classes/Session.md)
* **headerProvider?**: [`HeaderProvider`](../classes/HeaderProvider.md) \| () => `Record`&lt;`string`, `string`&gt; \| () => `Promise`&lt;`Record`&lt;`string`, `string`&gt;&gt;
### Returns
`Promise`&lt;[`Connection`](../classes/Connection.md)&gt;
@@ -58,18 +50,6 @@ const conn = await connect(
});
```
Using with a header provider for per-request authentication:
```ts
const provider = new StaticHeaderProvider({
"X-API-Key": "my-key"
});
const conn = await connectWithHeaderProvider(
"db://host:port",
options,
provider
);
```
## connect(options)
```ts
@@ -97,7 +77,7 @@ Accepted formats:
[ConnectionOptions](../interfaces/ConnectionOptions.md) for more details on the URI format.
### Examples
### Example
```ts
const conn = await connect({
@@ -105,11 +85,3 @@ const conn = await connect({
storageOptions: {timeout: "60s"}
});
```
```ts
const session = Session.default();
const conn = await connect({
uri: "/path/to/database",
session: session
});
```

View File

@@ -13,7 +13,7 @@ function makeArrowTable(
metadata?): ArrowTable
```
An enhanced version of the apache-arrow makeTable function from Apache Arrow
An enhanced version of the makeTable function from Apache Arrow
that supports nested fields and embeddings columns.
(typically you do not need to call this function. It will be called automatically

View File

@@ -1,34 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / permutationBuilder
# Function: permutationBuilder()
```ts
function permutationBuilder(table): PermutationBuilder
```
Create a permutation builder for the given table.
## Parameters
* **table**: [`Table`](../classes/Table.md)
The source table to create a permutation from
## Returns
[`PermutationBuilder`](../classes/PermutationBuilder.md)
A PermutationBuilder instance
## Example
```ts
const builder = permutationBuilder(sourceTable, "training_data")
.splitRandom({ ratios: [0.8, 0.2], seed: 42 })
.shuffle({ seed: 123 });
const trainingTable = await builder.execute();
```

View File

@@ -12,49 +12,35 @@
## Enumerations
- [FullTextQueryType](enumerations/FullTextQueryType.md)
- [Occur](enumerations/Occur.md)
- [Operator](enumerations/Operator.md)
## Classes
- [BooleanQuery](classes/BooleanQuery.md)
- [BoostQuery](classes/BoostQuery.md)
- [Connection](classes/Connection.md)
- [HeaderProvider](classes/HeaderProvider.md)
- [Index](classes/Index.md)
- [MakeArrowTableOptions](classes/MakeArrowTableOptions.md)
- [MatchQuery](classes/MatchQuery.md)
- [MergeInsertBuilder](classes/MergeInsertBuilder.md)
- [MultiMatchQuery](classes/MultiMatchQuery.md)
- [NativeJsHeaderProvider](classes/NativeJsHeaderProvider.md)
- [OAuthHeaderProvider](classes/OAuthHeaderProvider.md)
- [PermutationBuilder](classes/PermutationBuilder.md)
- [PhraseQuery](classes/PhraseQuery.md)
- [Query](classes/Query.md)
- [QueryBase](classes/QueryBase.md)
- [Session](classes/Session.md)
- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
- [RecordBatchIterator](classes/RecordBatchIterator.md)
- [Table](classes/Table.md)
- [TagContents](classes/TagContents.md)
- [Tags](classes/Tags.md)
- [TakeQuery](classes/TakeQuery.md)
- [VectorColumnOptions](classes/VectorColumnOptions.md)
- [VectorQuery](classes/VectorQuery.md)
## Interfaces
- [AddColumnsResult](interfaces/AddColumnsResult.md)
- [AddColumnsSql](interfaces/AddColumnsSql.md)
- [AddDataOptions](interfaces/AddDataOptions.md)
- [AddResult](interfaces/AddResult.md)
- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
- [ClientConfig](interfaces/ClientConfig.md)
- [ColumnAlteration](interfaces/ColumnAlteration.md)
- [CompactionStats](interfaces/CompactionStats.md)
- [ConnectionOptions](interfaces/ConnectionOptions.md)
- [CreateTableOptions](interfaces/CreateTableOptions.md)
- [DeleteResult](interfaces/DeleteResult.md)
- [DropColumnsResult](interfaces/DropColumnsResult.md)
- [ExecutableQuery](interfaces/ExecutableQuery.md)
- [FragmentStatistics](interfaces/FragmentStatistics.md)
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
@@ -68,28 +54,18 @@
- [IndexStatistics](interfaces/IndexStatistics.md)
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
- [IvfPqOptions](interfaces/IvfPqOptions.md)
- [IvfRqOptions](interfaces/IvfRqOptions.md)
- [MergeResult](interfaces/MergeResult.md)
- [MergeStats](interfaces/MergeStats.md)
- [OpenTableOptions](interfaces/OpenTableOptions.md)
- [OptimizeOptions](interfaces/OptimizeOptions.md)
- [OptimizeStats](interfaces/OptimizeStats.md)
- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
- [RemovalStats](interfaces/RemovalStats.md)
- [RetryConfig](interfaces/RetryConfig.md)
- [ShuffleOptions](interfaces/ShuffleOptions.md)
- [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
- [SplitHashOptions](interfaces/SplitHashOptions.md)
- [SplitRandomOptions](interfaces/SplitRandomOptions.md)
- [SplitSequentialOptions](interfaces/SplitSequentialOptions.md)
- [TableNamesOptions](interfaces/TableNamesOptions.md)
- [TableStatistics](interfaces/TableStatistics.md)
- [TimeoutConfig](interfaces/TimeoutConfig.md)
- [TlsConfig](interfaces/TlsConfig.md)
- [TokenResponse](interfaces/TokenResponse.md)
- [UpdateOptions](interfaces/UpdateOptions.md)
- [UpdateResult](interfaces/UpdateResult.md)
- [Version](interfaces/Version.md)
- [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)
## Type Aliases
@@ -98,15 +74,12 @@
- [FieldLike](type-aliases/FieldLike.md)
- [IntoSql](type-aliases/IntoSql.md)
- [IntoVector](type-aliases/IntoVector.md)
- [MultiVector](type-aliases/MultiVector.md)
- [RecordBatchLike](type-aliases/RecordBatchLike.md)
- [SchemaLike](type-aliases/SchemaLike.md)
- [TableLike](type-aliases/TableLike.md)
## Functions
- [RecordBatchIterator](functions/RecordBatchIterator.md)
- [connect](functions/connect.md)
- [makeArrowTable](functions/makeArrowTable.md)
- [packBits](functions/packBits.md)
- [permutationBuilder](functions/permutationBuilder.md)

View File

@@ -1,15 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / AddColumnsResult
# Interface: AddColumnsResult
## Properties
### version
```ts
version: number;
```

View File

@@ -1,15 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / AddResult
# Interface: AddResult
## Properties
### version
```ts
version: number;
```

View File

@@ -1,15 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / AlterColumnsResult
# Interface: AlterColumnsResult
## Properties
### version
```ts
version: number;
```

View File

@@ -16,14 +16,6 @@ optional extraHeaders: Record<string, string>;
***
### idDelimiter?
```ts
optional idDelimiter: string;
```
***
### retryConfig?
```ts
@@ -40,14 +32,6 @@ optional timeoutConfig: TimeoutConfig;
***
### tlsConfig?
```ts
optional tlsConfig: TlsConfig;
```
***
### userAgent?
```ts

View File

@@ -70,17 +70,6 @@ Defaults to 'us-east-1'.
***
### session?
```ts
optional session: Session;
```
(For LanceDB OSS only): the session to use for this connection. Holds
shared caches and other session-specific state.
***
### storageOptions?
```ts

View File

@@ -1,15 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / DeleteResult
# Interface: DeleteResult
## Properties
### version
```ts
version: number;
```

View File

@@ -1,15 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / DropColumnsResult
# Interface: DropColumnsResult
## Properties
### version
```ts
version: number;
```

View File

@@ -23,7 +23,7 @@ whether to remove punctuation
### baseTokenizer?
```ts
optional baseTokenizer: "raw" | "simple" | "whitespace" | "ngram";
optional baseTokenizer: "raw" | "simple" | "whitespace";
```
The tokenizer to use when building the index.
@@ -71,36 +71,6 @@ tokens longer than this length will be ignored
***
### ngramMaxLength?
```ts
optional ngramMaxLength: number;
```
ngram max length
***
### ngramMinLength?
```ts
optional ngramMinLength: number;
```
ngram min length
***
### prefixOnly?
```ts
optional prefixOnly: boolean;
```
whether to only index the prefix of the token for ngram tokenizer
***
### removeStopWords?
```ts

View File

@@ -26,18 +26,6 @@ will be used to determine the most useful kind of index to create.
***
### name?
```ts
optional name: string;
```
Optional custom name for the index.
If not provided, a default name will be generated based on the column name.
***
### replace?
```ts
@@ -54,27 +42,8 @@ The default is true
***
### train?
```ts
optional train: boolean;
```
Whether to train the index with existing data.
If true (default), the index will be trained with existing data in the table.
If false, the index will be created empty and populated as new data is added.
Note: This option is only supported for scalar indices. Vector indices always train.
***
### waitTimeoutSeconds?
```ts
optional waitTimeoutSeconds: number;
```
Timeout in seconds to wait for index creation to complete.
If not specified, the method will return immediately after starting the index creation.

View File

@@ -1,101 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / IvfRqOptions
# Interface: IvfRqOptions
## Properties
### distanceType?
```ts
optional distanceType: "l2" | "cosine" | "dot";
```
Distance type to use to build the index.
Default value is "l2".
This is used when training the index to calculate the IVF partitions
(vectors are grouped in partitions with similar vectors according to this
distance type) and during quantization.
The distance type used to train an index MUST match the distance type used
to search the index. Failure to do so will yield inaccurate results.
The following distance types are available:
"l2" - Euclidean distance.
"cosine" - Cosine distance.
"dot" - Dot product.
***
### maxIterations?
```ts
optional maxIterations: number;
```
Max iterations to train IVF kmeans.
When training an IVF index we use kmeans to calculate the partitions. This parameter
controls how many iterations of kmeans to run.
The default value is 50.
***
### numBits?
```ts
optional numBits: number;
```
Number of bits per dimension for residual quantization.
This value controls how much each residual component is compressed. The more
bits, the more accurate the index will be but the slower search. Typical values
are small integers; the default is 1 bit per dimension.
***
### numPartitions?
```ts
optional numPartitions: number;
```
The number of IVF partitions to create.
This value should generally scale with the number of rows in the dataset.
By default the number of partitions is the square root of the number of
rows.
If this value is too large then the first part of the search (picking the
right partition) will be slow. If this value is too small then the second
part of the search (searching within a partition) will be slow.
***
### sampleRate?
```ts
optional sampleRate: number;
```
The number of vectors, per partition, to sample when training IVF kmeans.
When an IVF index is trained, we need to calculate partitions. These are groups
of vectors that are similar to each other. To do this we use an algorithm called kmeans.
Running kmeans on a large dataset can be slow. To speed this up we run kmeans on a
random sample of the data. This parameter controls the size of the sample. The total
number of vectors used to train the index is `sample_rate * num_partitions`.
Increasing this value might improve the quality of the index but in most cases the
default should be sufficient.
The default value is 256.

View File

@@ -1,39 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / MergeResult
# Interface: MergeResult
## Properties
### numDeletedRows
```ts
numDeletedRows: number;
```
***
### numInsertedRows
```ts
numInsertedRows: number;
```
***
### numUpdatedRows
```ts
numUpdatedRows: number;
```
***
### version
```ts
version: number;
```

View File

@@ -0,0 +1,31 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / MergeStats
# Interface: MergeStats
## Properties
### numDeletedRows
```ts
numDeletedRows: bigint;
```
***
### numInsertedRows
```ts
numInsertedRows: bigint;
```
***
### numUpdatedRows
```ts
numUpdatedRows: bigint;
```

View File

@@ -8,7 +8,7 @@
## Properties
### ~~indexCacheSize?~~
### indexCacheSize?
```ts
optional indexCacheSize: number;
@@ -16,11 +16,6 @@ optional indexCacheSize: number;
Set the size of the index cache, specified as a number of entries
#### Deprecated
Use session-level cache configuration instead.
Create a Session with custom cache sizes and pass it to the connect() function.
The exact meaning of an "entry" will depend on the type of index:
- IVF: there is one entry for each IVF partition
- BTREE: there is one entry for the entire index

View File

@@ -24,10 +24,10 @@ The default is 7 days
// Delete all versions older than 1 day
const olderThan = new Date();
olderThan.setDate(olderThan.getDate() - 1));
tbl.optimize({cleanupOlderThan: olderThan});
tbl.cleanupOlderVersions(olderThan);
// Delete all versions except the current version
tbl.optimize({cleanupOlderThan: new Date()});
tbl.cleanupOlderVersions(new Date());
```
***

View File

@@ -1,23 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / ShuffleOptions
# Interface: ShuffleOptions
## Properties
### clumpSize?
```ts
optional clumpSize: number;
```
***
### seed?
```ts
optional seed: number;
```

View File

@@ -1,23 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / SplitCalculatedOptions
# Interface: SplitCalculatedOptions
## Properties
### calculation
```ts
calculation: string;
```
***
### splitNames?
```ts
optional splitNames: string[];
```

View File

@@ -1,39 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / SplitHashOptions
# Interface: SplitHashOptions
## Properties
### columns
```ts
columns: string[];
```
***
### discardWeight?
```ts
optional discardWeight: number;
```
***
### splitNames?
```ts
optional splitNames: string[];
```
***
### splitWeights
```ts
splitWeights: number[];
```

View File

@@ -1,47 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / SplitRandomOptions
# Interface: SplitRandomOptions
## Properties
### counts?
```ts
optional counts: number[];
```
***
### fixed?
```ts
optional fixed: number;
```
***
### ratios?
```ts
optional ratios: number[];
```
***
### seed?
```ts
optional seed: number;
```
***
### splitNames?
```ts
optional splitNames: string[];
```

View File

@@ -1,39 +0,0 @@
[**@lancedb/lancedb**](../README.md) • **Docs**
***
[@lancedb/lancedb](../globals.md) / SplitSequentialOptions
# Interface: SplitSequentialOptions
## Properties
### counts?
```ts
optional counts: number[];
```
***
### fixed?
```ts
optional fixed: number;
```
***
### ratios?
```ts
optional ratios: number[];
```
***
### splitNames?
```ts
optional splitNames: string[];
```

View File

@@ -44,17 +44,3 @@ optional readTimeout: number;
The timeout for reading data from the server in seconds. Default is 300
seconds (5 minutes). This can also be set via the environment variable
`LANCE_CLIENT_READ_TIMEOUT`, as an integer number of seconds.
***
### timeout?
```ts
optional timeout: number;
```
The overall timeout for the entire request in seconds. This includes
connection, send, and read time. If the entire request doesn't complete
within this time, it will fail. Default is None (no overall timeout).
This can also be set via the environment variable `LANCE_CLIENT_TIMEOUT`,
as an integer number of seconds.

Some files were not shown because too many files have changed in this diff Show More