Compare commits

..

1 Commits

Author SHA1 Message Date
Lance Release
ec3e18ecf7 Bump version: 0.24.1-beta.0 → 0.24.1 2026-01-26 23:38:54 +00:00
164 changed files with 4829 additions and 66928 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.27.0-beta.2"
current_version = "0.24.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -3,7 +3,7 @@ name: build-linux-wheel
description: "Build a manylinux wheel for lance"
inputs:
python-minor-version:
description: "10, 11, 12, 13"
description: "8, 9, 10, 11, 12"
required: true
args:
description: "--release"

View File

@@ -3,7 +3,7 @@ name: build_wheel
description: "Build a lance wheel"
inputs:
python-minor-version:
description: "10, 11, 12, 13"
description: "8, 9, 10, 11"
required: true
args:
description: "--release"

View File

@@ -3,7 +3,7 @@ name: build_wheel
description: "Build a lance wheel"
inputs:
python-minor-version:
description: "10, 11, 12, 13, 14"
description: "8, 9, 10, 11"
required: true
args:
description: "--release"

View File

@@ -42,7 +42,7 @@ jobs:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [build]
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write

View File

@@ -1,173 +0,0 @@
name: Codex Fix CI
on:
workflow_dispatch:
inputs:
workflow_run_url:
description: "Failing CI workflow run URL (e.g., https://github.com/lancedb/lancedb/actions/runs/12345678)"
required: true
type: string
branch:
description: "Branch to fix (e.g., main, release/v2.0, or feature-branch)"
required: true
type: string
guidelines:
description: "Additional guidelines for the fix (optional)"
required: false
type: string
permissions:
contents: write
pull-requests: write
actions: read
jobs:
fix-ci:
runs-on: warp-ubuntu-latest-x64-4x
timeout-minutes: 60
env:
CC: clang
CXX: clang++
steps:
- name: Show inputs
run: |
echo "workflow_run_url = ${{ inputs.workflow_run_url }}"
echo "branch = ${{ inputs.branch }}"
echo "guidelines = ${{ inputs.guidelines }}"
- name: Checkout Repo
uses: actions/checkout@v4
with:
ref: ${{ inputs.branch }}
fetch-depth: 0
persist-credentials: true
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install Codex CLI
run: npm install -g @openai/codex
- name: Install Rust toolchain
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
components: clippy, rustfmt
- uses: Swatinem/rust-cache@v2
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler libssl-dev
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install Python dependencies
run: |
pip install maturin ruff pytest pyarrow pandas polars
- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: temurin
java-version: '11'
cache: maven
- name: Install Node.js dependencies for TypeScript bindings
run: |
cd nodejs
npm ci
- name: Configure git user
run: |
git config user.name "lancedb automation"
git config user.email "robot@lancedb.com"
- name: Run Codex to fix CI failure
env:
WORKFLOW_RUN_URL: ${{ inputs.workflow_run_url }}
BRANCH: ${{ inputs.branch }}
GUIDELINES: ${{ inputs.guidelines }}
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
run: |
set -euo pipefail
cat <<EOF >/tmp/codex-prompt.txt
You are running inside the lancedb repository on a GitHub Actions runner. Your task is to fix a CI failure.
Input parameters:
- Failing workflow run URL: ${WORKFLOW_RUN_URL}
- Branch to fix: ${BRANCH}
- Additional guidelines: ${GUIDELINES:-"None provided"}
Follow these steps exactly:
1. Extract the run ID from the workflow URL. The URL format is https://github.com/lancedb/lancedb/actions/runs/<run_id>.
2. Use "gh run view <run_id> --json jobs,conclusion,name" to get information about the failed run.
3. Identify which jobs failed. For each failed job, use "gh run view <run_id> --job <job_id> --log-failed" to get the failure logs.
4. Analyze the failure logs to understand what went wrong. Common failures include:
- Compilation errors
- Test failures
- Clippy warnings treated as errors
- Formatting issues
- Dependency issues
5. Based on the analysis, fix the issues in the codebase:
- For compilation errors: Fix the code that doesn't compile
- For test failures: Fix the failing tests or the code they test
- For clippy warnings: Apply the suggested fixes
- For formatting issues: Run "cargo fmt --all"
- For other issues: Apply appropriate fixes
6. After making fixes, verify them locally:
- Run "cargo fmt --all" to ensure formatting is correct
- Run "cargo clippy --workspace --tests --all-features -- -D warnings" to check for issues
- Run ONLY the specific failing tests to confirm they pass now:
- For Rust test failures: Run the specific test with "cargo test -p <crate> <test_name>"
- For Python test failures: Build with "cd python && maturin develop" then run "pytest <specific_test_file>::<test_name>"
- For Java test failures: Run "cd java && mvn test -Dtest=<TestClass>#<testMethod>"
- For TypeScript test failures: Run "cd nodejs && npm run build && npm test -- --testNamePattern='<test_name>'"
- Do NOT run the full test suite - only run the tests that were failing
7. If the additional guidelines are provided, follow them as well.
8. Inspect "git status --short" and "git diff" to review your changes.
9. Create a fix branch: "git checkout -b codex/fix-ci-<run_id>".
10. Stage all changes with "git add -A" and commit with message "fix: resolve CI failures from run <run_id>".
11. Push the branch: "git push origin codex/fix-ci-<run_id>". If the remote branch exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/codex/fix-ci-<run_id>" then push. Do NOT use "git push --force" or "git push -f".
12. Create a pull request targeting "${BRANCH}":
- Title: "ci: <short summary describing the fix>" (e.g., "ci: fix clippy warnings in lancedb" or "ci: resolve test flakiness in vector search")
- First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'PREOF' > /tmp/pr-body.md). The body should include:
- Link to the failing workflow run
- Summary of what failed
- Description of the fixes applied
- Then run "gh pr create --base ${BRANCH} --body-file /tmp/pr-body.md".
13. Display the new PR URL, "git status --short", and a summary of what was fixed.
Constraints:
- Use bash commands for all operations.
- Do not merge the PR.
- Do not modify GitHub workflow files unless they are the cause of the failure.
- If any command fails, diagnose and attempt to fix the issue instead of aborting immediately.
- If you cannot fix the issue automatically, create the PR anyway with a clear explanation of what you tried and what remains to be fixed.
- env "GH_TOKEN" is available, use "gh" tools for GitHub-related operations.
EOF
printenv OPENAI_API_KEY | codex login --with-api-key
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"

View File

@@ -86,17 +86,16 @@ jobs:
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
Follow these steps exactly:
1. Use script "ci/set_lance_version.py" to update Lance Rust dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
2. Update the Java lance-core dependency version in "java/pom.xml": change the "<lance-core.version>...</lance-core.version>" property to "${VERSION}".
3. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
4. After clippy succeeds, run "cargo fmt --all" to format the workspace.
5. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
6. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
7. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
8. Push the branch to origin. If the remote branch already exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/${BRANCH_NAME}" then push with "git push origin ${BRANCH_NAME}". Do NOT use "git push --force" or "git push -f".
9. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
10. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
11. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
7. Push the branch to origin. If the branch already exists, force-push your changes.
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
Constraints:
- Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.

View File

@@ -41,7 +41,7 @@ jobs:
sudo apt install -y protobuf-compiler libssl-dev
rustup update && rustup default
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"

View File

@@ -8,8 +8,6 @@ on:
paths:
- Cargo.toml
- nodejs/**
- rust/**
- docs/src/js/**
- .github/workflows/nodejs.yml
- docker-compose.yml
@@ -78,11 +76,8 @@ jobs:
fetch-depth: 0
lfs: true
- uses: actions/setup-node@v3
name: Setup Node.js 20 for build
with:
# @napi-rs/cli v3 requires Node >= 20.12 (via @inquirer/prompts@8).
# Build always on Node 20; tests run on the matrix version below.
node-version: 20
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: nodejs/package-lock.json
- uses: Swatinem/rust-cache@v2
@@ -90,16 +85,12 @@ jobs:
run: |
sudo apt update
sudo apt install -y protobuf-compiler libssl-dev
npm install -g @napi-rs/cli
- name: Build
run: |
npm ci --include=optional
npm run build:debug -- --profile ci
- uses: actions/setup-node@v3
name: Setup Node.js ${{ matrix.node-version }} for test
with:
node-version: ${{ matrix.node-version }}
- name: Compile TypeScript
run: npm run tsc
npm run tsc
- name: Setup localstack
working-directory: .
run: docker compose up --detach --wait
@@ -152,6 +143,7 @@ jobs:
- name: Install dependencies
run: |
brew install protobuf
npm install -g @napi-rs/cli
- name: Build
run: |
npm ci --include=optional

View File

@@ -128,13 +128,16 @@ jobs:
- target: x86_64-unknown-linux-musl
# This one seems to need some extra memory
host: ubuntu-2404-8x-x64
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
features: fp16kernels
pre_build: |-
set -e &&
sudo apt-get update &&
sudo apt-get install -y protobuf-compiler pkg-config &&
rustup target add x86_64-unknown-linux-musl &&
export EXTRA_ARGS="-x"
apk add protobuf-dev curl &&
ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so &&
CC=gcc &&
CXX=g++
- target: aarch64-unknown-linux-gnu
host: ubuntu-2404-8x-x64
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
@@ -150,13 +153,15 @@ jobs:
rustup target add aarch64-unknown-linux-gnu
- target: aarch64-unknown-linux-musl
host: ubuntu-2404-8x-x64
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
features: ","
pre_build: |-
set -e &&
sudo apt-get update &&
sudo apt-get install -y protobuf-compiler &&
apk add protobuf-dev &&
rustup target add aarch64-unknown-linux-musl &&
export EXTRA_ARGS="-x"
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc &&
export CXX_aarch64_unknown_linux_musl=aarch64-linux-musl-g++
name: build - ${{ matrix.settings.target }}
runs-on: ${{ matrix.settings.host }}
defaults:
@@ -187,18 +192,12 @@ jobs:
.cargo-cache
target/
key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
- name: Setup toolchain
run: ${{ matrix.settings.setup }}
if: ${{ matrix.settings.setup }}
shell: bash
- name: Install dependencies
run: npm ci
- name: Install Zig
uses: mlugg/setup-zig@v2
if: ${{ contains(matrix.settings.target, 'musl') }}
with:
version: 0.14.1
- name: Install cargo-zigbuild
uses: taiki-e/install-action@v2
if: ${{ contains(matrix.settings.target, 'musl') }}
with:
tool: cargo-zigbuild
- name: Build in docker
uses: addnab/docker-run-action@v3
if: ${{ matrix.settings.docker }}
@@ -211,24 +210,24 @@ jobs:
run: |
set -e
${{ matrix.settings.pre_build }}
npx napi build --platform --release \
npx napi build --platform --release --no-const-enum \
--features ${{ matrix.settings.features }} \
--target ${{ matrix.settings.target }} \
--dts ../lancedb/native.d.ts \
--js ../lancedb/native.js \
--strip \
--output-dir dist/
dist/
- name: Build
run: |
${{ matrix.settings.pre_build }}
npx napi build --platform --release \
npx napi build --platform --release --no-const-enum \
--features ${{ matrix.settings.features }} \
--target ${{ matrix.settings.target }} \
--dts ../lancedb/native.d.ts \
--js ../lancedb/native.js \
--strip \
$EXTRA_ARGS \
--output-dir dist/
dist/
if: ${{ !matrix.settings.docker }}
shell: bash
- name: Upload artifact
@@ -319,7 +318,7 @@ jobs:
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: 24
node-version: 20
cache: npm
cache-dependency-path: nodejs/package-lock.json
registry-url: "https://registry.npmjs.org"
@@ -349,9 +348,9 @@ jobs:
run: find npm
- name: Publish
env:
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |
npm config set provenance true
ARGS="--access public"
if [[ $DRY_RUN == "true" ]]; then
ARGS="$ARGS --dry-run"
@@ -364,7 +363,7 @@ jobs:
name: Report Workflow Failure
runs-on: ubuntu-latest
needs: [build-lancedb, test-lancedb, publish]
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
permissions:
contents: read
issues: write

View File

@@ -44,12 +44,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: 3.8
- uses: ./.github/workflows/build_linux_wheel
with:
python-minor-version: 10
python-minor-version: 8
args: "--release --strip ${{ matrix.config.extra_args }}"
arm-build: ${{ matrix.config.platform == 'aarch64' }}
manylinux: ${{ matrix.config.manylinux }}
@@ -74,12 +74,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v4
with:
python-version: "3.13"
python-version: 3.12
- uses: ./.github/workflows/build_mac_wheel
with:
python-minor-version: 10
python-minor-version: 8
args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
- uses: ./.github/workflows/upload_wheel
if: startsWith(github.ref, 'refs/tags/python-v')
@@ -95,12 +95,12 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v4
with:
python-version: "3.13"
python-version: 3.12
- uses: ./.github/workflows/build_windows_wheel
with:
python-minor-version: 10
python-minor-version: 8
args: "--release --strip"
vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
- uses: ./.github/workflows/upload_wheel
@@ -181,7 +181,7 @@ jobs:
permissions:
contents: read
issues: write
if: always() && failure() && startsWith(github.ref, 'refs/tags/python-v')
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/create-failure-issue

View File

@@ -8,7 +8,6 @@ on:
paths:
- Cargo.toml
- python/**
- rust/**
- .github/workflows/python.yml
concurrency:
@@ -37,9 +36,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: "3.12"
- name: Install ruff
run: |
pip install ruff==0.9.9
@@ -62,9 +61,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: "3.12"
- name: Install protobuf compiler
run: |
sudo apt update
@@ -91,9 +90,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: "3.12"
cache: "pip"
- name: Install protobuf
run: |
@@ -111,7 +110,7 @@ jobs:
timeout-minutes: 30
strategy:
matrix:
python-minor-version: ["10", "13"]
python-minor-version: ["9", "12"]
runs-on: "ubuntu-24.04"
defaults:
run:
@@ -127,7 +126,7 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: 3.${{ matrix.python-minor-version }}
- uses: ./.github/workflows/build_linux_wheel
@@ -157,9 +156,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: "3.12"
- uses: ./.github/workflows/build_mac_wheel
with:
args: --profile ci
@@ -186,9 +185,9 @@ jobs:
fetch-depth: 0
lfs: true
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.13"
python-version: "3.12"
- uses: ./.github/workflows/build_windows_wheel
with:
args: --profile ci
@@ -213,9 +212,9 @@ jobs:
sudo apt update
sudo apt install -y protobuf-compiler
- name: Set up Python
uses: actions/setup-python@v6
uses: actions/setup-python@v5
with:
python-version: "3.10"
python-version: 3.9
- name: Install lancedb
run: |
pip install "pydantic<2"

View File

@@ -183,7 +183,7 @@ jobs:
runs-on: ubuntu-24.04
strategy:
matrix:
msrv: ["1.91.0"] # This should match up with rust-version in Cargo.toml
msrv: ["1.88.0"] # This should match up with rust-version in Cargo.toml
env:
# Need up-to-date compilers for kernels
CC: clang-18

1213
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -12,43 +12,42 @@ repository = "https://github.com/lancedb/lancedb"
description = "Serverless, low-latency vector database for AI applications"
keywords = ["lancedb", "lance", "database", "vector", "search"]
categories = ["database-implementations"]
rust-version = "1.91.0"
rust-version = "1.88.0"
[workspace.dependencies]
lance = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=3.1.0-beta.2", default-features = false, "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=3.1.0-beta.2", "tag" = "v3.1.0-beta.2", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "57.2", optional = false }
arrow-array = "57.2"
arrow-data = "57.2"
arrow-ipc = "57.2"
arrow-ord = "57.2"
arrow-schema = "57.2"
arrow-select = "57.2"
arrow-cast = "57.2"
arrow = { version = "56.2", optional = false }
arrow-array = "56.2"
arrow-data = "56.2"
arrow-ipc = "56.2"
arrow-ord = "56.2"
arrow-schema = "56.2"
arrow-select = "56.2"
arrow-cast = "56.2"
async-trait = "0"
datafusion = { version = "51.0", default-features = false }
datafusion-catalog = "51.0"
datafusion-common = { version = "51.0", default-features = false }
datafusion-execution = "51.0"
datafusion-expr = "51.0"
datafusion-physical-plan = "51.0"
datafusion-physical-expr = "51.0"
datafusion = { version = "50.1", default-features = false }
datafusion-catalog = "50.1"
datafusion-common = { version = "50.1", default-features = false }
datafusion-execution = "50.1"
datafusion-expr = "50.1"
datafusion-physical-plan = "50.1"
env_logger = "0.11"
half = { "version" = "2.7.1", default-features = false, features = [
half = { "version" = "2.6.0", default-features = false, features = [
"num-traits",
] }
futures = "0"

View File

@@ -1,9 +0,0 @@
.PHONY: licenses
licenses:
cargo about generate about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c about.toml
cd python && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
cd python && uv sync --all-extras && uv tool run pip-licenses --python .venv/bin/python --format=markdown --with-urls --output-file=PYTHON_THIRD_PARTY_LICENSES.md
cd nodejs && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
cd nodejs && npx license-checker --markdown --out NODEJS_THIRD_PARTY_LICENSES.md
cd java && ./mvnw license:aggregate-add-third-party -q

View File

@@ -66,7 +66,7 @@ Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up Lanc
| Python SDK | https://lancedb.github.io/lancedb/python/python/ |
| Typescript SDK | https://lancedb.github.io/lancedb/js/globals/ |
| Rust SDK | https://docs.rs/lancedb/latest/lancedb/index.html |
| REST API | https://docs.lancedb.com/api-reference/rest |
| REST API | https://docs.lancedb.com/api-reference/introduction |
## **Join Us and Contribute**

File diff suppressed because it is too large Load Diff

View File

@@ -1,70 +0,0 @@
<html>
<head>
<style>
@media (prefers-color-scheme: dark) {
body {
background: #333;
color: white;
}
a {
color: skyblue;
}
}
.container {
font-family: sans-serif;
max-width: 800px;
margin: 0 auto;
}
.intro {
text-align: center;
}
.licenses-list {
list-style-type: none;
margin: 0;
padding: 0;
}
.license-used-by {
margin-top: -10px;
}
.license-text {
max-height: 200px;
overflow-y: scroll;
white-space: pre-wrap;
}
</style>
</head>
<body>
<main class="container">
<div class="intro">
<h1>Third Party Licenses</h1>
<p>This page lists the licenses of the projects used in cargo-about.</p>
</div>
<h2>Overview of licenses:</h2>
<ul class="licenses-overview">
{{#each overview}}
<li><a href="#{{id}}">{{name}}</a> ({{count}})</li>
{{/each}}
</ul>
<h2>All license text:</h2>
<ul class="licenses-list">
{{#each licenses}}
<li class="license">
<h3 id="{{id}}">{{name}}</h3>
<h4>Used by:</h4>
<ul class="license-used-by">
{{#each used_by}}
<li><a href="{{#if crate.repository}} {{crate.repository}} {{else}} https://crates.io/crates/{{crate.name}} {{/if}}">{{crate.name}} {{crate.version}}</a></li>
{{/each}}
</ul>
<pre class="license-text">{{text}}</pre>
</li>
{{/each}}
</ul>
</main>
</body>
</html>

View File

@@ -1,18 +0,0 @@
accepted = [
"0BSD",
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"BSD-2-Clause",
"BSD-3-Clause",
"BSL-1.0",
"bzip2-1.0.6",
"CC0-1.0",
"CDDL-1.0",
"CDLA-Permissive-2.0",
"ISC",
"MIT",
"MPL-2.0",
"OpenSSL",
"Unicode-3.0",
"Zlib",
]

View File

@@ -1,62 +0,0 @@
# VoyageAI Embeddings
Voyage AI provides cutting-edge embedding and rerankers.
Using voyageai API requires voyageai package, which can be installed using `pip install voyageai`. Voyage AI embeddings are used to generate embeddings for text data. The embeddings can be used for various tasks like semantic search, clustering, and classification.
You also need to set the `VOYAGE_API_KEY` environment variable to use the VoyageAI API.
Supported models are:
**Voyage-4 Series (Latest)**
- voyage-4 (1024 dims, general-purpose and multilingual retrieval, 320K batch tokens)
- voyage-4-lite (1024 dims, optimized for latency and cost, 1M batch tokens)
- voyage-4-large (1024 dims, best retrieval quality, 120K batch tokens)
**Voyage-3 Series**
- voyage-3
- voyage-3-lite
**Domain-Specific Models**
- voyage-finance-2
- voyage-multilingual-2
- voyage-law-2
- voyage-code-2
Supported parameters (to be passed in `create` method) are:
| Parameter | Type | Default Value | Description |
|---|---|--------|---------|
| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-4, voyage-4-lite, voyage-4-large, voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
Usage Example:
```python
import lancedb
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import EmbeddingFunctionRegistry
voyageai = EmbeddingFunctionRegistry
.get_instance()
.get("voyageai")
.create(name="voyage-3")
class TextModel(LanceModel):
text: str = voyageai.SourceField()
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
data = [ { "text": "hello world" },
{ "text": "goodbye world" }]
db = lancedb.connect("~/.lancedb")
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(data)
```

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.27.0-beta.2</version>
<version>0.24.1</version>
</dependency>
```

View File

@@ -367,27 +367,6 @@ Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
***
### initialStorageOptions()
```ts
abstract initialStorageOptions(): Promise<undefined | null | Record<string, string>>
```
Get the initial storage options that were passed in when opening this table.
For dynamically refreshed options (e.g., credential vending), use
[Table.latestStorageOptions](Table.md#lateststorageoptions).
Warning: This is an internal API and the return value is subject to change.
#### Returns
`Promise`&lt;`undefined` \| `null` \| `Record`&lt;`string`, `string`&gt;&gt;
The storage options, or undefined if no storage options were configured.
***
### isOpen()
```ts
@@ -402,28 +381,6 @@ Return true if the table has not been closed
***
### latestStorageOptions()
```ts
abstract latestStorageOptions(): Promise<undefined | null | Record<string, string>>
```
Get the latest storage options, refreshing from provider if configured.
This method is useful for credential vending scenarios where storage options
may be refreshed dynamically. If no dynamic provider is configured, this
returns the initial static options.
Warning: This is an internal API and the return value is subject to change.
#### Returns
`Promise`&lt;`undefined` \| `null` \| `Record`&lt;`string`, `string`&gt;&gt;
The storage options, or undefined if no storage options were configured.
***
### listIndices()
```ts
@@ -748,11 +705,8 @@ Create a query that returns a subset of the rows in the table.
#### Parameters
* **rowIds**: readonly (`number` \| `bigint`)[]
* **rowIds**: `number`[]
The row ids of the rows to return.
Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
For convenience / backwards compatibility, `number[]` is also accepted (for
small row ids that fit in a safe integer).
#### Returns

View File

@@ -1,71 +0,0 @@
List of third-party dependencies grouped by their license type.
Apache 2.0:
* error-prone annotations (com.google.errorprone:error_prone_annotations:2.28.0 - https://errorprone.info/error_prone_annotations)
Apache License 2.0:
* JsonNullable Jackson module (org.openapitools:jackson-databind-nullable:0.2.6 - https://github.com/OpenAPITools/jackson-databind-nullable)
Apache License V2.0:
* FlatBuffers Java API (com.google.flatbuffers:flatbuffers-java:23.5.26 - https://github.com/google/flatbuffers)
Apache License, Version 2.0:
* Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/)
* Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.2.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.2.1/httpclient5/)
* Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5/)
* Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5-h2/)
* Arrow Format (org.apache.arrow:arrow-format:15.0.0 - https://arrow.apache.org/arrow-format/)
* Arrow Java C Data Interface (org.apache.arrow:arrow-c-data:15.0.0 - https://arrow.apache.org/arrow-c-data/)
* Arrow Java Dataset (org.apache.arrow:arrow-dataset:15.0.0 - https://arrow.apache.org/arrow-dataset/)
* Arrow Memory - Core (org.apache.arrow:arrow-memory-core:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-core/)
* Arrow Memory - Netty (org.apache.arrow:arrow-memory-netty:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-netty/)
* Arrow Vectors (org.apache.arrow:arrow-vector:15.0.0 - https://arrow.apache.org/arrow-vector/)
* Guava: Google Core Libraries for Java (com.google.guava:guava:33.3.1-jre - https://github.com/google/guava)
* J2ObjC Annotations (com.google.j2objc:j2objc-annotations:3.0.0 - https://github.com/google/j2objc/)
* Netty/Buffer (io.netty:netty-buffer:4.1.104.Final - https://netty.io/netty-buffer/)
* Netty/Common (io.netty:netty-common:4.1.104.Final - https://netty.io/netty-common/)
Apache-2.0:
* Apache Commons Lang (org.apache.commons:commons-lang3:3.18.0 - https://commons.apache.org/proper/commons-lang/)
* lance-namespace-apache-client (org.lance:lance-namespace-apache-client:0.4.5 - https://github.com/openapitools/openapi-generator)
* lance-namespace-core (org.lance:lance-namespace-core:0.4.5 - https://lance.org/format/namespace/lance-namespace-core/)
EDL 1.0:
* Jakarta Activation API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api)
Eclipse Distribution License - v 1.0:
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
* Jakarta XML Binding API (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api)
Eclipse Public License - v 1.0:
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
The Apache Software License, Version 2.0:
* FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/)
* Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess)
* Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture)
* Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.16.0 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310)
* Jackson module: Old JAXB Annotations (javax.xml.bind) (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.17.1 - https://github.com/FasterXML/jackson-modules-base)
* Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.16.0 - https://github.com/FasterXML/jackson)
* Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.0 - https://github.com/FasterXML/jackson-core)
* jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson)
* Jackson-JAXRS: base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base)
* Jackson-JAXRS: JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider)
* JAR JNI Loader (org.questdb:jar-jni:1.1.1 - https://github.com/questdb/rust-maven-plugin)
* Lance Core (org.lance:lance-core:2.0.0 - https://lance.org/)
The MIT License:
* Checker Qual (org.checkerframework:checker-qual:3.43.0 - https://checkerframework.org/)

View File

@@ -1,71 +0,0 @@
List of third-party dependencies grouped by their license type.
Apache 2.0:
* error-prone annotations (com.google.errorprone:error_prone_annotations:2.28.0 - https://errorprone.info/error_prone_annotations)
Apache License 2.0:
* JsonNullable Jackson module (org.openapitools:jackson-databind-nullable:0.2.6 - https://github.com/OpenAPITools/jackson-databind-nullable)
Apache License V2.0:
* FlatBuffers Java API (com.google.flatbuffers:flatbuffers-java:23.5.26 - https://github.com/google/flatbuffers)
Apache License, Version 2.0:
* Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/)
* Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.2.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.2.1/httpclient5/)
* Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5/)
* Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5-h2/)
* Arrow Format (org.apache.arrow:arrow-format:15.0.0 - https://arrow.apache.org/arrow-format/)
* Arrow Java C Data Interface (org.apache.arrow:arrow-c-data:15.0.0 - https://arrow.apache.org/arrow-c-data/)
* Arrow Java Dataset (org.apache.arrow:arrow-dataset:15.0.0 - https://arrow.apache.org/arrow-dataset/)
* Arrow Memory - Core (org.apache.arrow:arrow-memory-core:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-core/)
* Arrow Memory - Netty (org.apache.arrow:arrow-memory-netty:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-netty/)
* Arrow Vectors (org.apache.arrow:arrow-vector:15.0.0 - https://arrow.apache.org/arrow-vector/)
* Guava: Google Core Libraries for Java (com.google.guava:guava:33.3.1-jre - https://github.com/google/guava)
* J2ObjC Annotations (com.google.j2objc:j2objc-annotations:3.0.0 - https://github.com/google/j2objc/)
* Netty/Buffer (io.netty:netty-buffer:4.1.104.Final - https://netty.io/netty-buffer/)
* Netty/Common (io.netty:netty-common:4.1.104.Final - https://netty.io/netty-common/)
Apache-2.0:
* Apache Commons Lang (org.apache.commons:commons-lang3:3.18.0 - https://commons.apache.org/proper/commons-lang/)
* lance-namespace-apache-client (org.lance:lance-namespace-apache-client:0.4.5 - https://github.com/openapitools/openapi-generator)
* lance-namespace-core (org.lance:lance-namespace-core:0.4.5 - https://lance.org/format/namespace/lance-namespace-core/)
EDL 1.0:
* Jakarta Activation API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api)
Eclipse Distribution License - v 1.0:
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
* Jakarta XML Binding API (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api)
Eclipse Public License - v 1.0:
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
The Apache Software License, Version 2.0:
* FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/)
* Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess)
* Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture)
* Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.16.0 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310)
* Jackson module: Old JAXB Annotations (javax.xml.bind) (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.17.1 - https://github.com/FasterXML/jackson-modules-base)
* Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.16.0 - https://github.com/FasterXML/jackson)
* Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.0 - https://github.com/FasterXML/jackson-core)
* jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson)
* Jackson-JAXRS: base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base)
* Jackson-JAXRS: JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider)
* JAR JNI Loader (org.questdb:jar-jni:1.1.1 - https://github.com/questdb/rust-maven-plugin)
* Lance Core (org.lance:lance-core:2.0.0 - https://lance.org/)
The MIT License:
* Checker Qual (org.checkerframework:checker-qual:3.43.0 - https://checkerframework.org/)

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.27.0-beta.2</version>
<version>0.24.1-final.0</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.27.0-beta.2</version>
<version>0.24.1-final.0</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>
@@ -28,7 +28,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<arrow.version>15.0.0</arrow.version>
<lance-core.version>3.1.0-beta.2</lance-core.version>
<lance-core.version>1.0.0-rc.2</lance-core.version>
<spotless.skip>false</spotless.skip>
<spotless.version>2.30.0</spotless.version>
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
@@ -160,19 +160,6 @@
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>license-maven-plugin</artifactId>
<version>2.4.0</version>
<configuration>
<outputDirectory>${project.basedir}</outputDirectory>
<thirdPartyFilename>JAVA_THIRD_PARTY_LICENSES.md</thirdPartyFilename>
<fileTemplate>/org/codehaus/mojo/license/third-party-file-groupByLicense.ftl</fileTemplate>
<includedScopes>compile,runtime</includedScopes>
<excludedScopes>test,provided</excludedScopes>
<sortArtifactByName>true</sortArtifactByName>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
@@ -305,12 +292,11 @@
<plugin>
<groupId>org.sonatype.central</groupId>
<artifactId>central-publishing-maven-plugin</artifactId>
<version>0.8.0</version>
<version>0.4.0</version>
<extensions>true</extensions>
<configuration>
<publishingServerId>ossrh</publishingServerId>
<tokenAuth>true</tokenAuth>
<autoPublish>true</autoPublish>
</configuration>
</plugin>
<plugin>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.27.0-beta.2"
version = "0.24.1"
license.workspace = true
description.workspace = true
repository.workspace = true
@@ -19,11 +19,11 @@ arrow-schema.workspace = true
env_logger.workspace = true
futures.workspace = true
lancedb = { path = "../rust/lancedb", default-features = false }
napi = { version = "3.8.3", default-features = false, features = [
napi = { version = "2.16.8", default-features = false, features = [
"napi9",
"async"
] }
napi-derive = "3.5.2"
napi-derive = "2.16.4"
# Prevent dynamic linking of lzma, which comes from datafusion
lzma-sys = { version = "*", features = ["static"] }
log.workspace = true
@@ -33,7 +33,7 @@ aws-lc-sys = "=0.28.0"
aws-lc-rs = "=1.13.0"
[build-dependencies]
napi-build = "2.3.1"
napi-build = "2.1"
[features]
default = ["remote", "lancedb/aws", "lancedb/gcs", "lancedb/azure", "lancedb/dynamodb", "lancedb/oss", "lancedb/huggingface"]

View File

@@ -1,668 +0,0 @@
[@75lb/deep-merge@1.1.2](https://github.com/75lb/deep-merge) - MIT
[@aashutoshrathi/word-wrap@1.2.6](https://github.com/aashutoshrathi/word-wrap) - MIT
[@ampproject/remapping@2.2.1](https://github.com/ampproject/remapping) - Apache-2.0
[@aws-crypto/crc32@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/crc32c@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/ie11-detection@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/sha1-browser@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/sha256-browser@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/sha256-browser@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/sha256-js@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/sha256-js@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/supports-web-crypto@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/supports-web-crypto@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/util@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-crypto/util@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
[@aws-sdk/client-dynamodb@3.602.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-kms@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-s3@3.550.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sso-oidc@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sso-oidc@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sso@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sso@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sts@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/client-sts@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/core@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/core@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-env@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-env@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-http@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-http@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-ini@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-ini@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-node@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-node@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-process@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-process@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-sso@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-sso@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-web-identity@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/credential-provider-web-identity@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/endpoint-cache@3.572.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-bucket-endpoint@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-endpoint-discovery@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-expect-continue@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-flexible-checksums@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-host-header@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-host-header@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-location-constraint@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-logger@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-logger@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-recursion-detection@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-recursion-detection@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-sdk-s3@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-signing@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-ssec@3.537.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-user-agent@3.540.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/middleware-user-agent@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/region-config-resolver@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/region-config-resolver@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/signature-v4-multi-region@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/token-providers@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/token-providers@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/types@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/types@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-arn-parser@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-endpoints@3.540.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-endpoints@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-locate-window@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-user-agent-browser@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-user-agent-browser@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-user-agent-node@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-user-agent-node@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/util-utf8-browser@3.259.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@aws-sdk/xml-builder@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
[@babel/code-frame@7.26.2](https://github.com/babel/babel) - MIT
[@babel/compat-data@7.23.5](https://github.com/babel/babel) - MIT
[@babel/core@7.23.7](https://github.com/babel/babel) - MIT
[@babel/generator@7.23.6](https://github.com/babel/babel) - MIT
[@babel/helper-compilation-targets@7.23.6](https://github.com/babel/babel) - MIT
[@babel/helper-environment-visitor@7.22.20](https://github.com/babel/babel) - MIT
[@babel/helper-function-name@7.23.0](https://github.com/babel/babel) - MIT
[@babel/helper-hoist-variables@7.22.5](https://github.com/babel/babel) - MIT
[@babel/helper-module-imports@7.22.15](https://github.com/babel/babel) - MIT
[@babel/helper-module-transforms@7.23.3](https://github.com/babel/babel) - MIT
[@babel/helper-plugin-utils@7.22.5](https://github.com/babel/babel) - MIT
[@babel/helper-simple-access@7.22.5](https://github.com/babel/babel) - MIT
[@babel/helper-split-export-declaration@7.22.6](https://github.com/babel/babel) - MIT
[@babel/helper-string-parser@7.25.9](https://github.com/babel/babel) - MIT
[@babel/helper-validator-identifier@7.25.9](https://github.com/babel/babel) - MIT
[@babel/helper-validator-option@7.23.5](https://github.com/babel/babel) - MIT
[@babel/helpers@7.27.0](https://github.com/babel/babel) - MIT
[@babel/parser@7.27.0](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-async-generators@7.8.4](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-async-generators) - MIT
[@babel/plugin-syntax-bigint@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-bigint) - MIT
[@babel/plugin-syntax-class-properties@7.12.13](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-import-meta@7.10.4](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-json-strings@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-json-strings) - MIT
[@babel/plugin-syntax-jsx@7.23.3](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-logical-assignment-operators@7.10.4](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-nullish-coalescing-operator@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-nullish-coalescing-operator) - MIT
[@babel/plugin-syntax-numeric-separator@7.10.4](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-object-rest-spread@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-object-rest-spread) - MIT
[@babel/plugin-syntax-optional-catch-binding@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-catch-binding) - MIT
[@babel/plugin-syntax-optional-chaining@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-chaining) - MIT
[@babel/plugin-syntax-top-level-await@7.14.5](https://github.com/babel/babel) - MIT
[@babel/plugin-syntax-typescript@7.23.3](https://github.com/babel/babel) - MIT
[@babel/template@7.27.0](https://github.com/babel/babel) - MIT
[@babel/traverse@7.23.7](https://github.com/babel/babel) - MIT
[@babel/types@7.27.0](https://github.com/babel/babel) - MIT
[@bcoe/v8-coverage@0.2.3](https://github.com/demurgos/v8-coverage) - MIT
[@biomejs/biome@1.8.3](https://github.com/biomejs/biome) - MIT OR Apache-2.0
[@biomejs/cli-darwin-arm64@1.8.3](https://github.com/biomejs/biome) - MIT OR Apache-2.0
[@eslint-community/eslint-utils@4.4.0](https://github.com/eslint-community/eslint-utils) - MIT
[@eslint-community/regexpp@4.10.0](https://github.com/eslint-community/regexpp) - MIT
[@eslint/eslintrc@2.1.4](https://github.com/eslint/eslintrc) - MIT
[@eslint/js@8.57.0](https://github.com/eslint/eslint) - MIT
[@huggingface/jinja@0.3.2](https://github.com/huggingface/huggingface.js) - MIT
[@huggingface/transformers@3.0.2](https://github.com/huggingface/transformers.js) - Apache-2.0
[@humanwhocodes/config-array@0.11.14](https://github.com/humanwhocodes/config-array) - Apache-2.0
[@humanwhocodes/module-importer@1.0.1](https://github.com/humanwhocodes/module-importer) - Apache-2.0
[@humanwhocodes/object-schema@2.0.2](https://github.com/humanwhocodes/object-schema) - BSD-3-Clause
[@img/sharp-darwin-arm64@0.33.5](https://github.com/lovell/sharp) - Apache-2.0
[@img/sharp-libvips-darwin-arm64@1.0.4](https://github.com/lovell/sharp-libvips) - LGPL-3.0-or-later
[@isaacs/cliui@8.0.2](https://github.com/yargs/cliui) - ISC
[@isaacs/fs-minipass@4.0.1](https://github.com/npm/fs-minipass) - ISC
[@istanbuljs/load-nyc-config@1.1.0](https://github.com/istanbuljs/load-nyc-config) - ISC
[@istanbuljs/schema@0.1.3](https://github.com/istanbuljs/schema) - MIT
[@jest/console@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/core@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/environment@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/expect-utils@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/expect@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/fake-timers@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/globals@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/reporters@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/schemas@29.6.3](https://github.com/jestjs/jest) - MIT
[@jest/source-map@29.6.3](https://github.com/jestjs/jest) - MIT
[@jest/test-result@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/test-sequencer@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/transform@29.7.0](https://github.com/jestjs/jest) - MIT
[@jest/types@29.6.3](https://github.com/jestjs/jest) - MIT
[@jridgewell/gen-mapping@0.3.3](https://github.com/jridgewell/gen-mapping) - MIT
[@jridgewell/resolve-uri@3.1.1](https://github.com/jridgewell/resolve-uri) - MIT
[@jridgewell/set-array@1.1.2](https://github.com/jridgewell/set-array) - MIT
[@jridgewell/sourcemap-codec@1.4.15](https://github.com/jridgewell/sourcemap-codec) - MIT
[@jridgewell/trace-mapping@0.3.22](https://github.com/jridgewell/trace-mapping) - MIT
[@lancedb/lancedb@0.26.2](https://github.com/lancedb/lancedb) - Apache-2.0
[@napi-rs/cli@2.18.3](https://github.com/napi-rs/napi-rs) - MIT
[@nodelib/fs.scandir@2.1.5](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.scandir) - MIT
[@nodelib/fs.stat@2.0.5](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.stat) - MIT
[@nodelib/fs.walk@1.2.8](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.walk) - MIT
[@pkgjs/parseargs@0.11.0](https://github.com/pkgjs/parseargs) - MIT
[@protobufjs/aspromise@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/base64@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/codegen@2.0.4](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/eventemitter@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/fetch@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/float@1.0.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/inquire@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/path@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/pool@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@protobufjs/utf8@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
[@shikijs/core@1.10.3](https://github.com/shikijs/shiki) - MIT
[@sinclair/typebox@0.27.8](https://github.com/sinclairzx81/typebox) - MIT
[@sinonjs/commons@3.0.1](https://github.com/sinonjs/commons) - BSD-3-Clause
[@sinonjs/fake-timers@10.3.0](https://github.com/sinonjs/fake-timers) - BSD-3-Clause
[@smithy/abort-controller@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/abort-controller@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/chunked-blob-reader-native@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/chunked-blob-reader@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/config-resolver@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/config-resolver@3.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/core@1.4.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/core@2.2.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/credential-provider-imds@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/credential-provider-imds@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/eventstream-codec@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/eventstream-serde-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/eventstream-serde-config-resolver@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/eventstream-serde-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/eventstream-serde-universal@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/fetch-http-handler@2.5.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/fetch-http-handler@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/hash-blob-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/hash-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/hash-node@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/hash-stream-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/invalid-dependency@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/invalid-dependency@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/is-array-buffer@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/is-array-buffer@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/md5-js@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-content-length@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-content-length@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-endpoint@2.5.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-endpoint@3.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-retry@2.3.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-retry@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-serde@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-serde@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-stack@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/middleware-stack@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/node-config-provider@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/node-config-provider@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/node-http-handler@2.5.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/node-http-handler@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/property-provider@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/property-provider@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/protocol-http@3.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/protocol-http@4.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/querystring-builder@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/querystring-builder@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/querystring-parser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/querystring-parser@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/service-error-classification@2.1.5](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/service-error-classification@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/shared-ini-file-loader@2.4.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/shared-ini-file-loader@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/signature-v4@2.2.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/signature-v4@3.1.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/smithy-client@2.5.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/smithy-client@3.1.4](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/types@2.12.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/types@3.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/url-parser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/url-parser@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-base64@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-base64@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-body-length-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-body-length-browser@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-body-length-node@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-body-length-node@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-buffer-from@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-buffer-from@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-config-provider@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-config-provider@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-defaults-mode-browser@2.2.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-defaults-mode-browser@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-defaults-mode-node@2.3.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-defaults-mode-node@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-endpoints@1.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-endpoints@2.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-hex-encoding@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-hex-encoding@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-middleware@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-middleware@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-retry@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-retry@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-stream@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-stream@3.0.4](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-uri-escape@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-uri-escape@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-utf8@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-utf8@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-waiter@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@smithy/util-waiter@3.1.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
[@swc/helpers@0.5.12](https://github.com/swc-project/swc) - Apache-2.0
[@types/axios@0.14.0](https://github.com/mzabriskie/axios) - MIT
[@types/babel__core@7.20.5](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/babel__generator@7.6.8](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/babel__template@7.4.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/babel__traverse@7.20.5](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/command-line-args@5.2.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/command-line-usage@5.0.2](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/command-line-usage@5.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/graceful-fs@4.1.9](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/hast@3.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/istanbul-lib-coverage@2.0.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/istanbul-lib-report@3.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/istanbul-reports@3.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/jest@29.5.12](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/json-schema@7.0.15](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/node-fetch@2.6.11](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/node@18.19.26](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/node@20.16.10](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/node@20.17.9](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/node@22.7.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/semver@7.5.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/stack-utils@2.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/tmp@0.2.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/unist@3.0.2](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/yargs-parser@21.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@types/yargs@17.0.32](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
[@typescript-eslint/eslint-plugin@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@typescript-eslint/parser@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - BSD-2-Clause
[@typescript-eslint/scope-manager@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@typescript-eslint/type-utils@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@typescript-eslint/types@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@typescript-eslint/typescript-estree@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - BSD-2-Clause
[@typescript-eslint/utils@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@typescript-eslint/visitor-keys@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[@ungap/structured-clone@1.2.0](https://github.com/ungap/structured-clone) - ISC
[abort-controller@3.0.0](https://github.com/mysticatea/abort-controller) - MIT
[acorn-jsx@5.3.2](https://github.com/acornjs/acorn-jsx) - MIT
[acorn@8.11.3](https://github.com/acornjs/acorn) - MIT
[agentkeepalive@4.5.0](https://github.com/node-modules/agentkeepalive) - MIT
[ajv@6.12.6](https://github.com/ajv-validator/ajv) - MIT
[ansi-escapes@4.3.2](https://github.com/sindresorhus/ansi-escapes) - MIT
[ansi-regex@5.0.1](https://github.com/chalk/ansi-regex) - MIT
[ansi-regex@6.1.0](https://github.com/chalk/ansi-regex) - MIT
[ansi-styles@4.3.0](https://github.com/chalk/ansi-styles) - MIT
[ansi-styles@5.2.0](https://github.com/chalk/ansi-styles) - MIT
[ansi-styles@6.2.1](https://github.com/chalk/ansi-styles) - MIT
[anymatch@3.1.3](https://github.com/micromatch/anymatch) - ISC
[apache-arrow@15.0.0](https://github.com/apache/arrow) - Apache-2.0
[apache-arrow@16.0.0](https://github.com/apache/arrow) - Apache-2.0
[apache-arrow@17.0.0](https://github.com/apache/arrow) - Apache-2.0
[apache-arrow@18.0.0](https://github.com/apache/arrow) - Apache-2.0
[argparse@1.0.10](https://github.com/nodeca/argparse) - MIT
[argparse@2.0.1](https://github.com/nodeca/argparse) - Python-2.0
[array-back@3.1.0](https://github.com/75lb/array-back) - MIT
[array-back@6.2.2](https://github.com/75lb/array-back) - MIT
[array-union@2.1.0](https://github.com/sindresorhus/array-union) - MIT
[asynckit@0.4.0](https://github.com/alexindigo/asynckit) - MIT
[axios@1.8.4](https://github.com/axios/axios) - MIT
[babel-jest@29.7.0](https://github.com/jestjs/jest) - MIT
[babel-plugin-istanbul@6.1.1](https://github.com/istanbuljs/babel-plugin-istanbul) - BSD-3-Clause
[babel-plugin-jest-hoist@29.6.3](https://github.com/jestjs/jest) - MIT
[babel-preset-current-node-syntax@1.0.1](https://github.com/nicolo-ribaudo/babel-preset-current-node-syntax) - MIT
[babel-preset-jest@29.6.3](https://github.com/jestjs/jest) - MIT
[balanced-match@1.0.2](https://github.com/juliangruber/balanced-match) - MIT
[base-64@0.1.0](https://github.com/mathiasbynens/base64) - MIT
[bowser@2.11.0](https://github.com/lancedikson/bowser) - MIT
[brace-expansion@1.1.11](https://github.com/juliangruber/brace-expansion) - MIT
[brace-expansion@2.0.1](https://github.com/juliangruber/brace-expansion) - MIT
[braces@3.0.3](https://github.com/micromatch/braces) - MIT
[browserslist@4.22.2](https://github.com/browserslist/browserslist) - MIT
[bs-logger@0.2.6](https://github.com/huafu/bs-logger) - MIT
[bser@2.1.1](https://github.com/facebook/watchman) - Apache-2.0
[buffer-from@1.1.2](https://github.com/LinusU/buffer-from) - MIT
[callsites@3.1.0](https://github.com/sindresorhus/callsites) - MIT
[camelcase@5.3.1](https://github.com/sindresorhus/camelcase) - MIT
[camelcase@6.3.0](https://github.com/sindresorhus/camelcase) - MIT
[caniuse-lite@1.0.30001579](https://github.com/browserslist/caniuse-lite) - CC-BY-4.0
[chalk-template@0.4.0](https://github.com/chalk/chalk-template) - MIT
[chalk@4.1.2](https://github.com/chalk/chalk) - MIT
[char-regex@1.0.2](https://github.com/Richienb/char-regex) - MIT
[charenc@0.0.2](https://github.com/pvorb/node-charenc) - BSD-3-Clause
[chownr@3.0.0](https://github.com/isaacs/chownr) - BlueOak-1.0.0
[ci-info@3.9.0](https://github.com/watson/ci-info) - MIT
[cjs-module-lexer@1.2.3](https://github.com/nodejs/cjs-module-lexer) - MIT
[cliui@8.0.1](https://github.com/yargs/cliui) - ISC
[co@4.6.0](https://github.com/tj/co) - MIT
[collect-v8-coverage@1.0.2](https://github.com/SimenB/collect-v8-coverage) - MIT
[color-convert@2.0.1](https://github.com/Qix-/color-convert) - MIT
[color-name@1.1.4](https://github.com/colorjs/color-name) - MIT
[color-string@1.9.1](https://github.com/Qix-/color-string) - MIT
[color@4.2.3](https://github.com/Qix-/color) - MIT
[combined-stream@1.0.8](https://github.com/felixge/node-combined-stream) - MIT
[command-line-args@5.2.1](https://github.com/75lb/command-line-args) - MIT
[command-line-usage@7.0.1](https://github.com/75lb/command-line-usage) - MIT
[concat-map@0.0.1](https://github.com/substack/node-concat-map) - MIT
[convert-source-map@2.0.0](https://github.com/thlorenz/convert-source-map) - MIT
[create-jest@29.7.0](https://github.com/jestjs/jest) - MIT
[cross-spawn@7.0.6](https://github.com/moxystudio/node-cross-spawn) - MIT
[crypt@0.0.2](https://github.com/pvorb/node-crypt) - BSD-3-Clause
[debug@4.3.4](https://github.com/debug-js/debug) - MIT
[dedent@1.5.1](https://github.com/dmnd/dedent) - MIT
[deep-is@0.1.4](https://github.com/thlorenz/deep-is) - MIT
[deepmerge@4.3.1](https://github.com/TehShrike/deepmerge) - MIT
[delayed-stream@1.0.0](https://github.com/felixge/node-delayed-stream) - MIT
[detect-libc@2.0.3](https://github.com/lovell/detect-libc) - Apache-2.0
[detect-newline@3.1.0](https://github.com/sindresorhus/detect-newline) - MIT
[diff-sequences@29.6.3](https://github.com/jestjs/jest) - MIT
[digest-fetch@1.3.0](https://github.com/devfans/digest-fetch) - ISC
[dir-glob@3.0.1](https://github.com/kevva/dir-glob) - MIT
[doctrine@3.0.0](https://github.com/eslint/doctrine) - Apache-2.0
[eastasianwidth@0.2.0](https://github.com/komagata/eastasianwidth) - MIT
[electron-to-chromium@1.4.642](https://github.com/kilian/electron-to-chromium) - ISC
[emittery@0.13.1](https://github.com/sindresorhus/emittery) - MIT
[emoji-regex@8.0.0](https://github.com/mathiasbynens/emoji-regex) - MIT
[emoji-regex@9.2.2](https://github.com/mathiasbynens/emoji-regex) - MIT
[entities@4.5.0](https://github.com/fb55/entities) - BSD-2-Clause
[error-ex@1.3.2](https://github.com/qix-/node-error-ex) - MIT
[escalade@3.1.1](https://github.com/lukeed/escalade) - MIT
[escape-string-regexp@2.0.0](https://github.com/sindresorhus/escape-string-regexp) - MIT
[escape-string-regexp@4.0.0](https://github.com/sindresorhus/escape-string-regexp) - MIT
[eslint-scope@7.2.2](https://github.com/eslint/eslint-scope) - BSD-2-Clause
[eslint-visitor-keys@3.4.3](https://github.com/eslint/eslint-visitor-keys) - Apache-2.0
[eslint@8.57.0](https://github.com/eslint/eslint) - MIT
[espree@9.6.1](https://github.com/eslint/espree) - BSD-2-Clause
[esprima@4.0.1](https://github.com/jquery/esprima) - BSD-2-Clause
[esquery@1.5.0](https://github.com/estools/esquery) - BSD-3-Clause
[esrecurse@4.3.0](https://github.com/estools/esrecurse) - BSD-2-Clause
[estraverse@5.3.0](https://github.com/estools/estraverse) - BSD-2-Clause
[esutils@2.0.3](https://github.com/estools/esutils) - BSD-2-Clause
[event-target-shim@5.0.1](https://github.com/mysticatea/event-target-shim) - MIT
[execa@5.1.1](https://github.com/sindresorhus/execa) - MIT
[exit@0.1.2](https://github.com/cowboy/node-exit) - MIT
[expect@29.7.0](https://github.com/jestjs/jest) - MIT
[fast-deep-equal@3.1.3](https://github.com/epoberezkin/fast-deep-equal) - MIT
[fast-glob@3.3.2](https://github.com/mrmlnc/fast-glob) - MIT
[fast-json-stable-stringify@2.1.0](https://github.com/epoberezkin/fast-json-stable-stringify) - MIT
[fast-levenshtein@2.0.6](https://github.com/hiddentao/fast-levenshtein) - MIT
[fast-xml-parser@4.2.5](https://github.com/NaturalIntelligence/fast-xml-parser) - MIT
[fastq@1.16.0](https://github.com/mcollina/fastq) - ISC
[fb-watchman@2.0.2](https://github.com/facebook/watchman) - Apache-2.0
[file-entry-cache@6.0.1](https://github.com/royriojas/file-entry-cache) - MIT
[fill-range@7.1.1](https://github.com/jonschlinkert/fill-range) - MIT
[find-replace@3.0.0](https://github.com/75lb/find-replace) - MIT
[find-up@4.1.0](https://github.com/sindresorhus/find-up) - MIT
[find-up@5.0.0](https://github.com/sindresorhus/find-up) - MIT
[flat-cache@3.2.0](https://github.com/jaredwray/flat-cache) - MIT
[flatbuffers@1.12.0](https://github.com/google/flatbuffers) - Apache*
[flatbuffers@23.5.26](https://github.com/google/flatbuffers) - Apache*
[flatbuffers@24.3.25](https://github.com/google/flatbuffers) - Apache-2.0
[flatted@3.2.9](https://github.com/WebReflection/flatted) - ISC
[follow-redirects@1.15.6](https://github.com/follow-redirects/follow-redirects) - MIT
[foreground-child@3.3.0](https://github.com/tapjs/foreground-child) - ISC
[form-data-encoder@1.7.2](https://github.com/octet-stream/form-data-encoder) - MIT
[form-data@4.0.0](https://github.com/form-data/form-data) - MIT
[formdata-node@4.4.1](https://github.com/octet-stream/form-data) - MIT
[fs.realpath@1.0.0](https://github.com/isaacs/fs.realpath) - ISC
[fsevents@2.3.3](https://github.com/fsevents/fsevents) - MIT
[function-bind@1.1.2](https://github.com/Raynos/function-bind) - MIT
[gensync@1.0.0-beta.2](https://github.com/loganfsmyth/gensync) - MIT
[get-caller-file@2.0.5](https://github.com/stefanpenner/get-caller-file) - ISC
[get-package-type@0.1.0](https://github.com/cfware/get-package-type) - MIT
[get-stream@6.0.1](https://github.com/sindresorhus/get-stream) - MIT
[glob-parent@5.1.2](https://github.com/gulpjs/glob-parent) - ISC
[glob-parent@6.0.2](https://github.com/gulpjs/glob-parent) - ISC
[glob@10.4.5](https://github.com/isaacs/node-glob) - ISC
[glob@7.2.3](https://github.com/isaacs/node-glob) - ISC
[globals@11.12.0](https://github.com/sindresorhus/globals) - MIT
[globals@13.24.0](https://github.com/sindresorhus/globals) - MIT
[globby@11.1.0](https://github.com/sindresorhus/globby) - MIT
[graceful-fs@4.2.11](https://github.com/isaacs/node-graceful-fs) - ISC
[graphemer@1.4.0](https://github.com/flmnt/graphemer) - MIT
[guid-typescript@1.0.9](https://github.com/NicolasDeveloper/guid-typescript) - ISC
[has-flag@4.0.0](https://github.com/sindresorhus/has-flag) - MIT
[hasown@2.0.0](https://github.com/inspect-js/hasOwn) - MIT
[html-escaper@2.0.2](https://github.com/WebReflection/html-escaper) - MIT
[human-signals@2.1.0](https://github.com/ehmicky/human-signals) - Apache-2.0
[humanize-ms@1.2.1](https://github.com/node-modules/humanize-ms) - MIT
[ignore@5.3.0](https://github.com/kaelzhang/node-ignore) - MIT
[import-fresh@3.3.0](https://github.com/sindresorhus/import-fresh) - MIT
[import-local@3.1.0](https://github.com/sindresorhus/import-local) - MIT
[imurmurhash@0.1.4](https://github.com/jensyt/imurmurhash-js) - MIT
[inflight@1.0.6](https://github.com/npm/inflight) - ISC
[inherits@2.0.4](https://github.com/isaacs/inherits) - ISC
[interpret@1.4.0](https://github.com/gulpjs/interpret) - MIT
[is-arrayish@0.2.1](https://github.com/qix-/node-is-arrayish) - MIT
[is-arrayish@0.3.2](https://github.com/qix-/node-is-arrayish) - MIT
[is-buffer@1.1.6](https://github.com/feross/is-buffer) - MIT
[is-core-module@2.13.1](https://github.com/inspect-js/is-core-module) - MIT
[is-extglob@2.1.1](https://github.com/jonschlinkert/is-extglob) - MIT
[is-fullwidth-code-point@3.0.0](https://github.com/sindresorhus/is-fullwidth-code-point) - MIT
[is-generator-fn@2.1.0](https://github.com/sindresorhus/is-generator-fn) - MIT
[is-glob@4.0.3](https://github.com/micromatch/is-glob) - MIT
[is-number@7.0.0](https://github.com/jonschlinkert/is-number) - MIT
[is-path-inside@3.0.3](https://github.com/sindresorhus/is-path-inside) - MIT
[is-stream@2.0.1](https://github.com/sindresorhus/is-stream) - MIT
[isexe@2.0.0](https://github.com/isaacs/isexe) - ISC
[istanbul-lib-coverage@3.2.2](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[istanbul-lib-instrument@5.2.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[istanbul-lib-instrument@6.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[istanbul-lib-report@3.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[istanbul-lib-source-maps@4.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[istanbul-reports@3.1.6](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
[jackspeak@3.4.3](https://github.com/isaacs/jackspeak) - BlueOak-1.0.0
[jest-changed-files@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-circus@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-cli@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-config@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-diff@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-docblock@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-each@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-environment-node@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-get-type@29.6.3](https://github.com/jestjs/jest) - MIT
[jest-haste-map@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-leak-detector@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-matcher-utils@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-message-util@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-mock@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-pnp-resolver@1.2.3](https://github.com/arcanis/jest-pnp-resolver) - MIT
[jest-regex-util@29.6.3](https://github.com/jestjs/jest) - MIT
[jest-resolve-dependencies@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-resolve@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-runner@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-runtime@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-snapshot@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-util@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-validate@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-watcher@29.7.0](https://github.com/jestjs/jest) - MIT
[jest-worker@29.7.0](https://github.com/jestjs/jest) - MIT
[jest@29.7.0](https://github.com/jestjs/jest) - MIT
[js-tokens@4.0.0](https://github.com/lydell/js-tokens) - MIT
[js-yaml@3.14.1](https://github.com/nodeca/js-yaml) - MIT
[js-yaml@4.1.0](https://github.com/nodeca/js-yaml) - MIT
[jsesc@2.5.2](https://github.com/mathiasbynens/jsesc) - MIT
[json-bignum@0.0.3](https://github.com/datalanche/json-bignum) - MIT
[json-buffer@3.0.1](https://github.com/dominictarr/json-buffer) - MIT
[json-parse-even-better-errors@2.3.1](https://github.com/npm/json-parse-even-better-errors) - MIT
[json-schema-traverse@0.4.1](https://github.com/epoberezkin/json-schema-traverse) - MIT
[json-stable-stringify-without-jsonify@1.0.1](https://github.com/samn/json-stable-stringify) - MIT
[json5@2.2.3](https://github.com/json5/json5) - MIT
[keyv@4.5.4](https://github.com/jaredwray/keyv) - MIT
[kleur@3.0.3](https://github.com/lukeed/kleur) - MIT
[leven@3.1.0](https://github.com/sindresorhus/leven) - MIT
[levn@0.4.1](https://github.com/gkz/levn) - MIT
[lines-and-columns@1.2.4](https://github.com/eventualbuddha/lines-and-columns) - MIT
[linkify-it@5.0.0](https://github.com/markdown-it/linkify-it) - MIT
[locate-path@5.0.0](https://github.com/sindresorhus/locate-path) - MIT
[locate-path@6.0.0](https://github.com/sindresorhus/locate-path) - MIT
[lodash.camelcase@4.3.0](https://github.com/lodash/lodash) - MIT
[lodash.memoize@4.1.2](https://github.com/lodash/lodash) - MIT
[lodash.merge@4.6.2](https://github.com/lodash/lodash) - MIT
[lodash@4.17.21](https://github.com/lodash/lodash) - MIT
[long@5.2.3](https://github.com/dcodeIO/long.js) - Apache-2.0
[lru-cache@10.4.3](https://github.com/isaacs/node-lru-cache) - ISC
[lru-cache@5.1.1](https://github.com/isaacs/node-lru-cache) - ISC
[lunr@2.3.9](https://github.com/olivernn/lunr.js) - MIT
[make-dir@4.0.0](https://github.com/sindresorhus/make-dir) - MIT
[make-error@1.3.6](https://github.com/JsCommunity/make-error) - ISC
[makeerror@1.0.12](https://github.com/daaku/nodejs-makeerror) - BSD-3-Clause
[markdown-it@14.1.0](https://github.com/markdown-it/markdown-it) - MIT
[md5@2.3.0](https://github.com/pvorb/node-md5) - BSD-3-Clause
[mdurl@2.0.0](https://github.com/markdown-it/mdurl) - MIT
[merge-stream@2.0.0](https://github.com/grncdr/merge-stream) - MIT
[merge2@1.4.1](https://github.com/teambition/merge2) - MIT
[micromatch@4.0.8](https://github.com/micromatch/micromatch) - MIT
[mime-db@1.52.0](https://github.com/jshttp/mime-db) - MIT
[mime-types@2.1.35](https://github.com/jshttp/mime-types) - MIT
[mimic-fn@2.1.0](https://github.com/sindresorhus/mimic-fn) - MIT
[minimatch@3.1.2](https://github.com/isaacs/minimatch) - ISC
[minimatch@9.0.3](https://github.com/isaacs/minimatch) - ISC
[minimatch@9.0.5](https://github.com/isaacs/minimatch) - ISC
[minimist@1.2.8](https://github.com/minimistjs/minimist) - MIT
[minipass@7.1.2](https://github.com/isaacs/minipass) - ISC
[minizlib@3.0.1](https://github.com/isaacs/minizlib) - MIT
[mkdirp@3.0.1](https://github.com/isaacs/node-mkdirp) - MIT
[mnemonist@0.38.3](https://github.com/yomguithereal/mnemonist) - MIT
[ms@2.1.2](https://github.com/zeit/ms) - MIT
[ms@2.1.3](https://github.com/vercel/ms) - MIT
[natural-compare@1.4.0](https://github.com/litejs/natural-compare-lite) - MIT
[node-domexception@1.0.0](https://github.com/jimmywarting/node-domexception) - MIT
[node-fetch@2.7.0](https://github.com/bitinn/node-fetch) - MIT
[node-int64@0.4.0](https://github.com/broofa/node-int64) - MIT
[node-releases@2.0.14](https://github.com/chicoxyzzy/node-releases) - MIT
[normalize-path@3.0.0](https://github.com/jonschlinkert/normalize-path) - MIT
[npm-run-path@4.0.1](https://github.com/sindresorhus/npm-run-path) - MIT
[obliterator@1.6.1](https://github.com/yomguithereal/obliterator) - MIT
[once@1.4.0](https://github.com/isaacs/once) - ISC
[onetime@5.1.2](https://github.com/sindresorhus/onetime) - MIT
[onnxruntime-common@1.19.2](https://github.com/Microsoft/onnxruntime) - MIT
[onnxruntime-common@1.20.0-dev.20241016-2b8fc5529b](https://github.com/Microsoft/onnxruntime) - MIT
[onnxruntime-node@1.19.2](https://github.com/Microsoft/onnxruntime) - MIT
[onnxruntime-web@1.21.0-dev.20241024-d9ca84ef96](https://github.com/Microsoft/onnxruntime) - MIT
[openai@4.29.2](https://github.com/openai/openai-node) - Apache-2.0
[optionator@0.9.3](https://github.com/gkz/optionator) - MIT
[p-limit@2.3.0](https://github.com/sindresorhus/p-limit) - MIT
[p-limit@3.1.0](https://github.com/sindresorhus/p-limit) - MIT
[p-locate@4.1.0](https://github.com/sindresorhus/p-locate) - MIT
[p-locate@5.0.0](https://github.com/sindresorhus/p-locate) - MIT
[p-try@2.2.0](https://github.com/sindresorhus/p-try) - MIT
[package-json-from-dist@1.0.1](https://github.com/isaacs/package-json-from-dist) - BlueOak-1.0.0
[parent-module@1.0.1](https://github.com/sindresorhus/parent-module) - MIT
[parse-json@5.2.0](https://github.com/sindresorhus/parse-json) - MIT
[path-exists@4.0.0](https://github.com/sindresorhus/path-exists) - MIT
[path-is-absolute@1.0.1](https://github.com/sindresorhus/path-is-absolute) - MIT
[path-key@3.1.1](https://github.com/sindresorhus/path-key) - MIT
[path-parse@1.0.7](https://github.com/jbgutierrez/path-parse) - MIT
[path-scurry@1.11.1](https://github.com/isaacs/path-scurry) - BlueOak-1.0.0
[path-type@4.0.0](https://github.com/sindresorhus/path-type) - MIT
[picocolors@1.0.0](https://github.com/alexeyraspopov/picocolors) - ISC
[picomatch@2.3.1](https://github.com/micromatch/picomatch) - MIT
[pirates@4.0.6](https://github.com/danez/pirates) - MIT
[pkg-dir@4.2.0](https://github.com/sindresorhus/pkg-dir) - MIT
[platform@1.3.6](https://github.com/bestiejs/platform.js) - MIT
[prelude-ls@1.2.1](https://github.com/gkz/prelude-ls) - MIT
[pretty-format@29.7.0](https://github.com/jestjs/jest) - MIT
[prompts@2.4.2](https://github.com/terkelg/prompts) - MIT
[protobufjs@7.4.0](https://github.com/protobufjs/protobuf.js) - BSD-3-Clause
[proxy-from-env@1.1.0](https://github.com/Rob--W/proxy-from-env) - MIT
[punycode.js@2.3.1](https://github.com/mathiasbynens/punycode.js) - MIT
[punycode@2.3.1](https://github.com/mathiasbynens/punycode.js) - MIT
[pure-rand@6.0.4](https://github.com/dubzzz/pure-rand) - MIT
[queue-microtask@1.2.3](https://github.com/feross/queue-microtask) - MIT
[react-is@18.2.0](https://github.com/facebook/react) - MIT
[rechoir@0.6.2](https://github.com/tkellen/node-rechoir) - MIT
[reflect-metadata@0.2.2](https://github.com/rbuckton/reflect-metadata) - Apache-2.0
[require-directory@2.1.1](https://github.com/troygoode/node-require-directory) - MIT
[resolve-cwd@3.0.0](https://github.com/sindresorhus/resolve-cwd) - MIT
[resolve-from@4.0.0](https://github.com/sindresorhus/resolve-from) - MIT
[resolve-from@5.0.0](https://github.com/sindresorhus/resolve-from) - MIT
[resolve.exports@2.0.2](https://github.com/lukeed/resolve.exports) - MIT
[resolve@1.22.8](https://github.com/browserify/resolve) - MIT
[reusify@1.0.4](https://github.com/mcollina/reusify) - MIT
[rimraf@3.0.2](https://github.com/isaacs/rimraf) - ISC
[rimraf@5.0.10](https://github.com/isaacs/rimraf) - ISC
[run-parallel@1.2.0](https://github.com/feross/run-parallel) - MIT
[semver@6.3.1](https://github.com/npm/node-semver) - ISC
[semver@7.6.3](https://github.com/npm/node-semver) - ISC
[sharp@0.33.5](https://github.com/lovell/sharp) - Apache-2.0
[shebang-command@2.0.0](https://github.com/kevva/shebang-command) - MIT
[shebang-regex@3.0.0](https://github.com/sindresorhus/shebang-regex) - MIT
[shelljs@0.8.5](https://github.com/shelljs/shelljs) - BSD-3-Clause
[shiki@1.10.3](https://github.com/shikijs/shiki) - MIT
[shx@0.3.4](https://github.com/shelljs/shx) - MIT
[signal-exit@3.0.7](https://github.com/tapjs/signal-exit) - ISC
[signal-exit@4.1.0](https://github.com/tapjs/signal-exit) - ISC
[simple-swizzle@0.2.2](https://github.com/qix-/node-simple-swizzle) - MIT
[sisteransi@1.0.5](https://github.com/terkelg/sisteransi) - MIT
[slash@3.0.0](https://github.com/sindresorhus/slash) - MIT
[source-map-support@0.5.13](https://github.com/evanw/node-source-map-support) - MIT
[source-map@0.6.1](https://github.com/mozilla/source-map) - BSD-3-Clause
[sprintf-js@1.0.3](https://github.com/alexei/sprintf.js) - BSD-3-Clause
[stack-utils@2.0.6](https://github.com/tapjs/stack-utils) - MIT
[stream-read-all@3.0.1](https://github.com/75lb/stream-read-all) - MIT
[string-length@4.0.2](https://github.com/sindresorhus/string-length) - MIT
[string-width@4.2.3](https://github.com/sindresorhus/string-width) - MIT
[string-width@5.1.2](https://github.com/sindresorhus/string-width) - MIT
[strip-ansi@6.0.1](https://github.com/chalk/strip-ansi) - MIT
[strip-ansi@7.1.0](https://github.com/chalk/strip-ansi) - MIT
[strip-bom@4.0.0](https://github.com/sindresorhus/strip-bom) - MIT
[strip-final-newline@2.0.0](https://github.com/sindresorhus/strip-final-newline) - MIT
[strip-json-comments@3.1.1](https://github.com/sindresorhus/strip-json-comments) - MIT
[strnum@1.0.5](https://github.com/NaturalIntelligence/strnum) - MIT
[supports-color@7.2.0](https://github.com/chalk/supports-color) - MIT
[supports-color@8.1.1](https://github.com/chalk/supports-color) - MIT
[supports-preserve-symlinks-flag@1.0.0](https://github.com/inspect-js/node-supports-preserve-symlinks-flag) - MIT
[table-layout@3.0.2](https://github.com/75lb/table-layout) - MIT
[tar@7.4.3](https://github.com/isaacs/node-tar) - ISC
[test-exclude@6.0.0](https://github.com/istanbuljs/test-exclude) - ISC
[text-table@0.2.0](https://github.com/substack/text-table) - MIT
[tmp@0.2.3](https://github.com/raszi/node-tmp) - MIT
[tmpl@1.0.5](https://github.com/daaku/nodejs-tmpl) - BSD-3-Clause
[to-regex-range@5.0.1](https://github.com/micromatch/to-regex-range) - MIT
[tr46@0.0.3](https://github.com/Sebmaster/tr46.js) - MIT
[ts-api-utils@1.0.3](https://github.com/JoshuaKGoldberg/ts-api-utils) - MIT
[ts-jest@29.1.2](https://github.com/kulshekhar/ts-jest) - MIT
[tslib@1.14.1](https://github.com/Microsoft/tslib) - 0BSD
[tslib@2.6.2](https://github.com/Microsoft/tslib) - 0BSD
[type-check@0.4.0](https://github.com/gkz/type-check) - MIT
[type-detect@4.0.8](https://github.com/chaijs/type-detect) - MIT
[type-fest@0.20.2](https://github.com/sindresorhus/type-fest) - (MIT OR CC0-1.0)
[type-fest@0.21.3](https://github.com/sindresorhus/type-fest) - (MIT OR CC0-1.0)
[typedoc-plugin-markdown@4.2.1](https://github.com/typedoc2md/typedoc-plugin-markdown) - MIT
[typedoc@0.26.4](https://github.com/TypeStrong/TypeDoc) - Apache-2.0
[typescript-eslint@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
[typescript@5.5.4](https://github.com/Microsoft/TypeScript) - Apache-2.0
[typical@4.0.0](https://github.com/75lb/typical) - MIT
[typical@7.1.1](https://github.com/75lb/typical) - MIT
[uc.micro@2.1.0](https://github.com/markdown-it/uc.micro) - MIT
[undici-types@5.26.5](https://github.com/nodejs/undici) - MIT
[undici-types@6.19.8](https://github.com/nodejs/undici) - MIT
[update-browserslist-db@1.0.13](https://github.com/browserslist/update-db) - MIT
[uri-js@4.4.1](https://github.com/garycourt/uri-js) - BSD-2-Clause
[uuid@9.0.1](https://github.com/uuidjs/uuid) - MIT
[v8-to-istanbul@9.2.0](https://github.com/istanbuljs/v8-to-istanbul) - ISC
[walker@1.0.8](https://github.com/daaku/nodejs-walker) - Apache-2.0
[web-streams-polyfill@3.3.3](https://github.com/MattiasBuelens/web-streams-polyfill) - MIT
[web-streams-polyfill@4.0.0-beta.3](https://github.com/MattiasBuelens/web-streams-polyfill) - MIT
[webidl-conversions@3.0.1](https://github.com/jsdom/webidl-conversions) - BSD-2-Clause
[whatwg-url@5.0.0](https://github.com/jsdom/whatwg-url) - MIT
[which@2.0.2](https://github.com/isaacs/node-which) - ISC
[wordwrapjs@5.1.0](https://github.com/75lb/wordwrapjs) - MIT
[wrap-ansi@7.0.0](https://github.com/chalk/wrap-ansi) - MIT
[wrap-ansi@8.1.0](https://github.com/chalk/wrap-ansi) - MIT
[wrappy@1.0.2](https://github.com/npm/wrappy) - ISC
[write-file-atomic@4.0.2](https://github.com/npm/write-file-atomic) - ISC
[y18n@5.0.8](https://github.com/yargs/y18n) - ISC
[yallist@3.1.1](https://github.com/isaacs/yallist) - ISC
[yallist@5.0.0](https://github.com/isaacs/yallist) - BlueOak-1.0.0
[yaml@2.4.5](https://github.com/eemeli/yaml) - ISC
[yargs-parser@21.1.1](https://github.com/yargs/yargs-parser) - ISC
[yargs@17.7.2](https://github.com/yargs/yargs) - MIT
[yocto-queue@0.1.0](https://github.com/sindresorhus/yocto-queue) - MIT

File diff suppressed because it is too large Load Diff

View File

@@ -312,66 +312,6 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
expect(res.getChild("id")?.toJSON()).toEqual([2, 3]);
});
it("should support takeRowIds with bigint array", async () => {
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
// Get actual row IDs using withRowId()
const allRows = await table.query().withRowId().toArray();
const rowIds = allRows.map((row) => row._rowid) as bigint[];
// Verify row IDs are bigint
expect(typeof rowIds[0]).toBe("bigint");
// Use takeRowIds with bigint array (the main use case from issue #2722)
const res = await table.takeRowIds([rowIds[0], rowIds[2]]).toArray();
expect(res.map((r) => r.id)).toEqual([1, 3]);
});
it("should support takeRowIds with number array for backwards compatibility", async () => {
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
// Small row IDs can be passed as numbers
const res = await table.takeRowIds([0, 2]).toArray();
expect(res.map((r) => r.id)).toEqual([1, 3]);
});
it("should support takeRowIds with mixed bigint and number array", async () => {
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
// Mixed array of bigint and number
const res = await table.takeRowIds([0n, 1, 2n]).toArray();
expect(res.map((r) => r.id)).toEqual([1, 2, 3]);
});
it("should throw for non-integer number in takeRowIds", () => {
expect(() => table.takeRowIds([1.5])).toThrow(
"Row id must be an integer (or bigint)",
);
expect(() => table.takeRowIds([0, 1.1, 2])).toThrow(
"Row id must be an integer (or bigint)",
);
});
it("should throw for negative number in takeRowIds", () => {
expect(() => table.takeRowIds([-1])).toThrow("Row id cannot be negative");
expect(() => table.takeRowIds([0, -5, 2])).toThrow(
"Row id cannot be negative",
);
});
it("should throw for unsafe large number in takeRowIds", () => {
// Number.MAX_SAFE_INTEGER + 1 is not safe
const unsafeNumber = Number.MAX_SAFE_INTEGER + 1;
expect(() => table.takeRowIds([unsafeNumber])).toThrow(
"Row id is too large for number; use bigint instead",
);
});
it("should reject negative bigint in takeRowIds", async () => {
await table.add([{ id: 1 }]);
// Negative bigint should be rejected by the Rust layer
expect(() => {
table.takeRowIds([-1n]);
}).toThrow("Row id cannot be negative");
});
it("should return the table as an instance of an arrow table", async () => {
const arrowTbl = await table.toArrow();
expect(arrowTbl).toBeInstanceOf(ArrowTable);
@@ -1580,9 +1520,9 @@ describe("when optimizing a dataset", () => {
it("delete unverified", async () => {
const version = await table.version();
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
18446744073709551615n - (BigInt(version) - 1n),
).padStart(20, "0")}.manifest`;
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
version - 1
}.manifest`;
fs.rmSync(versionFile);
let stats = await table.optimize({ deleteUnverified: false });

View File

@@ -273,9 +273,7 @@ export async function connect(
let nativeProvider: NativeJsHeaderProvider | undefined;
if (finalHeaderProvider) {
if (typeof finalHeaderProvider === "function") {
nativeProvider = new NativeJsHeaderProvider(async () =>
finalHeaderProvider(),
);
nativeProvider = new NativeJsHeaderProvider(finalHeaderProvider);
} else if (
finalHeaderProvider &&
typeof finalHeaderProvider.getHeaders === "function"

View File

@@ -684,17 +684,19 @@ export class VectorQuery extends StandardQueryBase<NativeVectorQuery> {
rerank(reranker: Reranker): VectorQuery {
super.doCall((inner) =>
inner.rerank(async (args) => {
const vecResults = await fromBufferToRecordBatch(args.vecResults);
const ftsResults = await fromBufferToRecordBatch(args.ftsResults);
const result = await reranker.rerankHybrid(
args.query,
vecResults as RecordBatch,
ftsResults as RecordBatch,
);
inner.rerank({
rerankHybrid: async (_, args) => {
const vecResults = await fromBufferToRecordBatch(args.vecResults);
const ftsResults = await fromBufferToRecordBatch(args.ftsResults);
const result = await reranker.rerankHybrid(
args.query,
vecResults as RecordBatch,
ftsResults as RecordBatch,
);
const buffer = fromRecordBatchToBuffer(result);
return buffer;
const buffer = fromRecordBatchToBuffer(result);
return buffer;
},
}),
);

View File

@@ -347,13 +347,9 @@ export abstract class Table {
/**
* Create a query that returns a subset of the rows in the table.
* @param rowIds The row ids of the rows to return.
*
* Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
* For convenience / backwards compatibility, `number[]` is also accepted (for
* small row ids that fit in a safe integer).
* @returns A builder that can be used to parameterize the query.
*/
abstract takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery;
abstract takeRowIds(rowIds: number[]): TakeQuery;
/**
* Create a search query to find the nearest neighbors
@@ -542,35 +538,6 @@ export abstract class Table {
*
*/
abstract stats(): Promise<TableStatistics>;
/**
* Get the initial storage options that were passed in when opening this table.
*
* For dynamically refreshed options (e.g., credential vending), use
* {@link Table.latestStorageOptions}.
*
* Warning: This is an internal API and the return value is subject to change.
*
* @returns The storage options, or undefined if no storage options were configured.
*/
abstract initialStorageOptions(): Promise<
Record<string, string> | null | undefined
>;
/**
* Get the latest storage options, refreshing from provider if configured.
*
* This method is useful for credential vending scenarios where storage options
* may be refreshed dynamically. If no dynamic provider is configured, this
* returns the initial static options.
*
* Warning: This is an internal API and the return value is subject to change.
*
* @returns The storage options, or undefined if no storage options were configured.
*/
abstract latestStorageOptions(): Promise<
Record<string, string> | null | undefined
>;
}
export class LocalTable extends Table {
@@ -719,24 +686,8 @@ export class LocalTable extends Table {
return new TakeQuery(this.inner.takeOffsets(offsets));
}
takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery {
const ids = rowIds.map((id) => {
if (typeof id === "bigint") {
return id;
}
if (!Number.isInteger(id)) {
throw new Error("Row id must be an integer (or bigint)");
}
if (id < 0) {
throw new Error("Row id cannot be negative");
}
if (!Number.isSafeInteger(id)) {
throw new Error("Row id is too large for number; use bigint instead");
}
return BigInt(id);
});
return new TakeQuery(this.inner.takeRowIds(ids));
takeRowIds(rowIds: number[]): TakeQuery {
return new TakeQuery(this.inner.takeRowIds(rowIds));
}
query(): Query {
@@ -907,18 +858,6 @@ export class LocalTable extends Table {
return await this.inner.stats();
}
async initialStorageOptions(): Promise<
Record<string, string> | null | undefined
> {
return await this.inner.initialStorageOptions();
}
async latestStorageOptions(): Promise<
Record<string, string> | null | undefined
> {
return await this.inner.latestStorageOptions();
}
mergeInsert(on: string | string[]): MergeInsertBuilder {
on = Array.isArray(on) ? on : [on];
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",
@@ -8,9 +8,5 @@
"license": "Apache-2.0",
"engines": {
"node": ">= 18"
},
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
}

View File

@@ -0,0 +1,3 @@
# `@lancedb/lancedb-darwin-x64`
This is the **x86_64-apple-darwin** binary for `@lancedb/lancedb`

View File

@@ -0,0 +1,12 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.24.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",
"files": ["lancedb.darwin-x64.node"],
"license": "Apache-2.0",
"engines": {
"node": ">= 18"
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",
@@ -9,9 +9,5 @@
"engines": {
"node": ">= 18"
},
"libc": ["glibc"],
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
"libc": ["glibc"]
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",
@@ -9,9 +9,5 @@
"engines": {
"node": ">= 18"
},
"libc": ["musl"],
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
"libc": ["musl"]
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",
@@ -9,9 +9,5 @@
"engines": {
"node": ">= 18"
},
"libc": ["glibc"],
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
"libc": ["glibc"]
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",
@@ -9,9 +9,5 @@
"engines": {
"node": ">= 18"
},
"libc": ["musl"],
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
"libc": ["musl"]
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": [
"win32"
],
@@ -14,9 +14,5 @@
"license": "Apache-2.0",
"engines": {
"node": ">= 18"
},
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
}

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.27.0-beta.2",
"version": "0.24.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",
@@ -8,9 +8,5 @@
"license": "Apache-2.0",
"engines": {
"node": ">= 18"
},
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
}
}

1779
nodejs/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.27.0-beta.2",
"version": "0.24.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",
@@ -21,29 +21,29 @@
},
"types": "dist/index.d.ts",
"napi": {
"binaryName": "lancedb",
"targets": [
"aarch64-apple-darwin",
"x86_64-unknown-linux-gnu",
"aarch64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
"aarch64-unknown-linux-musl",
"x86_64-pc-windows-msvc",
"aarch64-pc-windows-msvc"
]
"name": "lancedb",
"triples": {
"defaults": false,
"additional": [
"x86_64-apple-darwin",
"aarch64-apple-darwin",
"x86_64-unknown-linux-gnu",
"aarch64-unknown-linux-gnu",
"x86_64-unknown-linux-musl",
"aarch64-unknown-linux-musl",
"x86_64-pc-windows-msvc",
"aarch64-pc-windows-msvc"
]
}
},
"license": "Apache-2.0",
"repository": {
"type": "git",
"url": "https://github.com/lancedb/lancedb"
},
"devDependencies": {
"@aws-sdk/client-dynamodb": "^3.33.0",
"@aws-sdk/client-kms": "^3.33.0",
"@aws-sdk/client-s3": "^3.33.0",
"@biomejs/biome": "^1.7.3",
"@jest/globals": "^29.7.0",
"@napi-rs/cli": "^3.5.1",
"@napi-rs/cli": "^2.18.3",
"@types/axios": "^0.14.0",
"@types/jest": "^29.1.2",
"@types/node": "^22.7.4",
@@ -72,9 +72,9 @@
"os": ["darwin", "linux", "win32"],
"scripts": {
"artifacts": "napi artifacts",
"build:debug": "napi build --platform --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir lancedb",
"build:debug": "napi build --platform --no-const-enum --dts ../lancedb/native.d.ts --js ../lancedb/native.js lancedb",
"postbuild:debug": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build:release": "napi build --platform --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js --output-dir dist",
"build:release": "napi build --platform --no-const-enum --release --dts ../lancedb/native.d.ts --js ../lancedb/native.js dist/",
"postbuild:release": "shx mkdir -p dist && shx cp lancedb/*.node dist/",
"build": "npm run build:debug && npm run tsc",
"build-release": "npm run build:release && npm run tsc",
@@ -88,7 +88,7 @@
"prepublishOnly": "napi prepublish -t npm",
"test": "jest --verbose",
"integration": "S3_TEST=1 npm run test",
"universal": "napi universalize",
"universal": "napi universal",
"version": "napi version"
},
"dependencies": {

View File

@@ -13,7 +13,6 @@ use crate::header::JsHeaderProvider;
use crate::table::Table;
use crate::ConnectionOptions;
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
#[napi]

View File

@@ -1,19 +1,20 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use napi::{bindgen_prelude::*, threadsafe_function::ThreadsafeFunction};
use napi::{
bindgen_prelude::*,
threadsafe_function::{ErrorStrategy, ThreadsafeFunction},
};
use napi_derive::napi;
use std::collections::HashMap;
use std::sync::Arc;
type GetHeadersFn = ThreadsafeFunction<(), Promise<HashMap<String, String>>, (), Status, false>;
/// JavaScript HeaderProvider implementation that wraps a JavaScript callback.
/// This is the only native header provider - all header provider implementations
/// should provide a JavaScript function that returns headers.
#[napi]
pub struct JsHeaderProvider {
get_headers_fn: Arc<GetHeadersFn>,
get_headers_fn: Arc<ThreadsafeFunction<(), ErrorStrategy::CalleeHandled>>,
}
impl Clone for JsHeaderProvider {
@@ -28,12 +29,9 @@ impl Clone for JsHeaderProvider {
impl JsHeaderProvider {
/// Create a new JsHeaderProvider from a JavaScript callback
#[napi(constructor)]
pub fn new(
get_headers_callback: Function<(), Promise<HashMap<String, String>>>,
) -> Result<Self> {
pub fn new(get_headers_callback: JsFunction) -> Result<Self> {
let get_headers_fn = get_headers_callback
.build_threadsafe_function()
.build()
.create_threadsafe_function(0, |ctx| Ok(vec![ctx.value]))
.map_err(|e| {
Error::new(
Status::GenericFailure,
@@ -53,7 +51,7 @@ impl lancedb::remote::HeaderProvider for JsHeaderProvider {
async fn get_headers(&self) -> lancedb::error::Result<HashMap<String, String>> {
// Call the JavaScript function asynchronously
let promise: Promise<HashMap<String, String>> =
self.get_headers_fn.call_async(()).await.map_err(|e| {
self.get_headers_fn.call_async(Ok(())).await.map_err(|e| {
lancedb::error::Error::Runtime {
message: format!("Failed to call JavaScript get_headers: {}", e),
}

View File

@@ -60,7 +60,7 @@ pub struct OpenTableOptions {
pub storage_options: Option<HashMap<String, String>>,
}
#[napi_derive::module_init]
#[napi::module_init]
fn init() {
let env = Env::new()
.filter_or("LANCEDB_LOG", "warn")

View File

@@ -20,8 +20,8 @@ use napi_derive::napi;
use crate::error::convert_error;
use crate::error::NapiErrorExt;
use crate::iterator::RecordBatchIterator;
use crate::rerankers::RerankHybridCallbackArgs;
use crate::rerankers::Reranker;
use crate::rerankers::RerankerCallbacks;
use crate::util::{parse_distance_type, schema_to_buffer};
#[napi]
@@ -42,7 +42,7 @@ impl Query {
}
#[napi]
pub fn full_text_search(&mut self, query: Object) -> napi::Result<()> {
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
let query = parse_fts_query(query)?;
self.inner = self.inner.clone().full_text_search(query);
Ok(())
@@ -235,7 +235,7 @@ impl VectorQuery {
}
#[napi]
pub fn full_text_search(&mut self, query: Object) -> napi::Result<()> {
pub fn full_text_search(&mut self, query: napi::JsObject) -> napi::Result<()> {
let query = parse_fts_query(query)?;
self.inner = self.inner.clone().full_text_search(query);
Ok(())
@@ -272,13 +272,11 @@ impl VectorQuery {
}
#[napi]
pub fn rerank(
&mut self,
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
) -> napi::Result<()> {
let reranker = Reranker::new(rerank_hybrid)?;
self.inner = self.inner.clone().rerank(Arc::new(reranker));
Ok(())
pub fn rerank(&mut self, callbacks: RerankerCallbacks) {
self.inner = self
.inner
.clone()
.rerank(Arc::new(Reranker::new(callbacks)));
}
#[napi(catch_unwind)]
@@ -525,12 +523,12 @@ impl JsFullTextQuery {
}
}
fn parse_fts_query(query: Object) -> napi::Result<FullTextSearchQuery> {
if let Ok(Some(query)) = query.get::<&JsFullTextQuery>("query") {
fn parse_fts_query(query: napi::JsObject) -> napi::Result<FullTextSearchQuery> {
if let Ok(Some(query)) = query.get::<_, &JsFullTextQuery>("query") {
Ok(FullTextSearchQuery::new_query(query.inner.clone()))
} else if let Ok(Some(query_text)) = query.get::<String>("query") {
} else if let Ok(Some(query_text)) = query.get::<_, String>("query") {
let mut query_text = query_text;
let columns = query.get::<Option<Vec<String>>>("columns")?.flatten();
let columns = query.get::<_, Option<Vec<String>>>("columns")?.flatten();
let is_phrase =
query_text.len() >= 2 && query_text.starts_with('"') && query_text.ends_with('"');

View File

@@ -3,7 +3,10 @@
use arrow_array::RecordBatch;
use async_trait::async_trait;
use napi::{bindgen_prelude::*, threadsafe_function::ThreadsafeFunction};
use napi::{
bindgen_prelude::*,
threadsafe_function::{ErrorStrategy, ThreadsafeFunction},
};
use napi_derive::napi;
use lancedb::ipc::batches_to_ipc_file;
@@ -12,28 +15,27 @@ use lancedb::{error::Error, ipc::ipc_file_to_batches};
use crate::error::NapiErrorExt;
type RerankHybridFn = ThreadsafeFunction<
RerankHybridCallbackArgs,
Promise<Buffer>,
RerankHybridCallbackArgs,
Status,
false,
>;
/// Reranker implementation that "wraps" a NodeJS Reranker implementation.
/// This contains references to the callbacks that can be used to invoke the
/// reranking methods on the NodeJS implementation and handles serializing the
/// record batches to Arrow IPC buffers.
#[napi]
pub struct Reranker {
rerank_hybrid: RerankHybridFn,
/// callback to the Javascript which will call the rerankHybrid method of
/// some Reranker implementation
rerank_hybrid: ThreadsafeFunction<RerankHybridCallbackArgs, ErrorStrategy::CalleeHandled>,
}
#[napi]
impl Reranker {
pub fn new(
rerank_hybrid: Function<RerankHybridCallbackArgs, Promise<Buffer>>,
) -> napi::Result<Self> {
let rerank_hybrid = rerank_hybrid.build_threadsafe_function().build()?;
Ok(Self { rerank_hybrid })
#[napi]
pub fn new(callbacks: RerankerCallbacks) -> Self {
let rerank_hybrid = callbacks
.rerank_hybrid
.create_threadsafe_function(0, move |ctx| Ok(vec![ctx.value]))
.unwrap();
Self { rerank_hybrid }
}
}
@@ -47,16 +49,16 @@ impl lancedb::rerankers::Reranker for Reranker {
) -> lancedb::error::Result<RecordBatch> {
let callback_args = RerankHybridCallbackArgs {
query: query.to_string(),
vec_results: Buffer::from(batches_to_ipc_file(&[vector_results])?.as_ref()),
fts_results: Buffer::from(batches_to_ipc_file(&[fts_results])?.as_ref()),
vec_results: batches_to_ipc_file(&[vector_results])?,
fts_results: batches_to_ipc_file(&[fts_results])?,
};
let promised_buffer: Promise<Buffer> = self
.rerank_hybrid
.call_async(callback_args)
.call_async(Ok(callback_args))
.await
.map_err(|e| Error::Runtime {
message: format!("napi error status={}, reason={}", e.status, e.reason),
})?;
message: format!("napi error status={}, reason={}", e.status, e.reason),
})?;
let buffer = promised_buffer.await.map_err(|e| Error::Runtime {
message: format!("napi error status={}, reason={}", e.status, e.reason),
})?;
@@ -75,11 +77,16 @@ impl std::fmt::Debug for Reranker {
}
}
#[napi(object)]
pub struct RerankerCallbacks {
pub rerank_hybrid: JsFunction,
}
#[napi(object)]
pub struct RerankHybridCallbackArgs {
pub query: String,
pub vec_results: Buffer,
pub fts_results: Buffer,
pub vec_results: Vec<u8>,
pub fts_results: Vec<u8>,
}
fn buffer_to_record_batch(buffer: Buffer) -> Result<RecordBatch> {

View File

@@ -96,6 +96,7 @@ impl napi::bindgen_prelude::FromNapiValue for Session {
) -> napi::Result<Self> {
let object: napi::bindgen_prelude::ClassInstance<Self> =
napi::bindgen_prelude::ClassInstance::from_napi_value(env, napi_val)?;
Ok((*object).clone())
let copy = object.clone();
Ok(copy)
}
}

View File

@@ -71,17 +71,6 @@ impl Table {
pub async fn add(&self, buf: Buffer, mode: String) -> napi::Result<AddResult> {
let batches = ipc_file_to_batches(buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC file: {}", e)))?;
let batches = batches
.into_iter()
.map(|batch| {
batch.map_err(|e| {
napi::Error::from_reason(format!(
"Failed to read record batch from IPC file: {}",
e
))
})
})
.collect::<Result<Vec<_>>>()?;
let mut op = self.inner_ref()?.add(batches);
op = if mode == "append" {
@@ -177,19 +166,6 @@ impl Table {
Ok(stats.into())
}
#[napi(catch_unwind)]
pub async fn initial_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
Ok(self.inner_ref()?.initial_storage_options().await)
}
#[napi(catch_unwind)]
pub async fn latest_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
self.inner_ref()?
.latest_storage_options()
.await
.default_error()
}
#[napi(catch_unwind)]
pub async fn update(
&self,
@@ -232,24 +208,18 @@ impl Table {
}
#[napi(catch_unwind)]
pub fn take_row_ids(&self, row_ids: Vec<BigInt>) -> napi::Result<TakeQuery> {
pub fn take_row_ids(&self, row_ids: Vec<i64>) -> napi::Result<TakeQuery> {
Ok(TakeQuery::new(
self.inner_ref()?.take_row_ids(
row_ids
.into_iter()
.map(|id| {
let (negative, value, lossless) = id.get_u64();
if negative {
Err(napi::Error::from_reason(
"Row id cannot be negative".to_string(),
.map(|o| {
u64::try_from(o).map_err(|e| {
napi::Error::from_reason(format!(
"Failed to convert row id to u64: {}",
e
))
} else if !lossless {
Err(napi::Error::from_reason(
"Row id is too large to fit in u64".to_string(),
))
} else {
Ok(value)
}
})
})
.collect::<Result<Vec<_>>>()?,
),

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.30.0-beta.2"
current_version = "0.27.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -16,7 +16,7 @@ The Python package is a wrapper around the Rust library, `lancedb`. We use
To set up your development environment, you will need to install the following:
1. Python 3.10 or later
1. Python 3.9 or later
2. Cargo (Rust's package manager). Use [rustup](https://rustup.rs/) to install.
3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)

View File

@@ -1,28 +1,28 @@
[package]
name = "lancedb-python"
version = "0.30.0-beta.2"
version = "0.27.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
rust-version = "1.91.0"
rust-version = "1.88.0"
[lib]
name = "_lancedb"
crate-type = ["cdylib"]
[dependencies]
arrow = { version = "57.2", features = ["pyarrow"] }
arrow = { version = "56.2", features = ["pyarrow"] }
async-trait = "0.1"
lancedb = { path = "../rust/lancedb", default-features = false }
lance-core.workspace = true
lance-namespace.workspace = true
lance-io.workspace = true
env_logger.workspace = true
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.26", features = [
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py39"] }
pyo3-async-runtimes = { version = "0.25", features = [
"attributes",
"tokio-runtime",
] }
@@ -32,7 +32,7 @@ snafu.workspace = true
tokio = { version = "1.40", features = ["sync"] }
[build-dependencies]
pyo3-build-config = { version = "0.26", features = [
pyo3-build-config = { version = "0.25", features = [
"extension-module",
"abi3-py39",
] }

View File

@@ -1,206 +0,0 @@
| Name | Version | License | URL |
|--------------------------------|-----------------|--------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|
| InstructorEmbedding | 1.0.1 | Apache License 2.0 | https://github.com/HKUNLP/instructor-embedding |
| Jinja2 | 3.1.6 | BSD License | https://github.com/pallets/jinja/ |
| Markdown | 3.10.2 | BSD-3-Clause | https://Python-Markdown.github.io/ |
| MarkupSafe | 3.0.3 | BSD-3-Clause | https://github.com/pallets/markupsafe/ |
| PyJWT | 2.11.0 | MIT | https://github.com/jpadilla/pyjwt |
| PyYAML | 6.0.3 | MIT License | https://pyyaml.org/ |
| Pygments | 2.19.2 | BSD License | https://pygments.org |
| accelerate | 1.12.0 | Apache Software License | https://github.com/huggingface/accelerate |
| adlfs | 2026.2.0 | BSD License | UNKNOWN |
| aiohappyeyeballs | 2.6.1 | Python Software Foundation License | https://github.com/aio-libs/aiohappyeyeballs |
| aiohttp | 3.13.3 | Apache-2.0 AND MIT | https://github.com/aio-libs/aiohttp |
| aiosignal | 1.4.0 | Apache Software License | https://github.com/aio-libs/aiosignal |
| annotated-types | 0.7.0 | MIT License | https://github.com/annotated-types/annotated-types |
| anyio | 4.12.1 | MIT | https://anyio.readthedocs.io/en/stable/versionhistory.html |
| appnope | 0.1.4 | BSD License | http://github.com/minrk/appnope |
| asttokens | 3.0.1 | Apache 2.0 | https://github.com/gristlabs/asttokens |
| attrs | 25.4.0 | MIT | https://www.attrs.org/en/stable/changelog.html |
| awscli | 1.44.35 | Apache Software License | http://aws.amazon.com/cli/ |
| azure-core | 1.38.0 | MIT License | https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/core/azure-core |
| azure-datalake-store | 0.0.53 | MIT License | https://github.com/Azure/azure-data-lake-store-python |
| azure-identity | 1.25.1 | MIT | https://github.com/Azure/azure-sdk-for-python |
| azure-storage-blob | 12.28.0 | MIT License | https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-blob |
| babel | 2.18.0 | BSD License | https://babel.pocoo.org/ |
| backrefs | 6.1 | MIT | https://github.com/facelessuser/backrefs |
| beautifulsoup4 | 4.14.3 | MIT License | https://www.crummy.com/software/BeautifulSoup/bs4/ |
| bleach | 6.3.0 | Apache Software License | https://github.com/mozilla/bleach |
| boto3 | 1.42.45 | Apache-2.0 | https://github.com/boto/boto3 |
| botocore | 1.42.45 | Apache-2.0 | https://github.com/boto/botocore |
| cachetools | 7.0.0 | MIT | https://github.com/tkem/cachetools/ |
| certifi | 2026.1.4 | Mozilla Public License 2.0 (MPL 2.0) | https://github.com/certifi/python-certifi |
| cffi | 2.0.0 | MIT | https://cffi.readthedocs.io/en/latest/whatsnew.html |
| cfgv | 3.5.0 | MIT | https://github.com/asottile/cfgv |
| charset-normalizer | 3.4.4 | MIT | https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md |
| click | 8.3.1 | BSD-3-Clause | https://github.com/pallets/click/ |
| cohere | 5.20.4 | MIT License | https://github.com/cohere-ai/cohere-python |
| colorama | 0.4.6 | BSD License | https://github.com/tartley/colorama |
| colpali_engine | 0.3.13 | MIT License | https://github.com/illuin-tech/colpali |
| comm | 0.2.3 | BSD License | https://github.com/ipython/comm |
| cryptography | 46.0.4 | Apache-2.0 OR BSD-3-Clause | https://github.com/pyca/cryptography |
| datafusion | 51.0.0 | Apache Software License | https://datafusion.apache.org/python |
| debugpy | 1.8.20 | MIT License | https://aka.ms/debugpy |
| decorator | 5.2.1 | BSD License | UNKNOWN |
| defusedxml | 0.7.1 | Python Software Foundation License | https://github.com/tiran/defusedxml |
| deprecation | 2.1.0 | Apache Software License | http://deprecation.readthedocs.io/ |
| distlib | 0.4.0 | Python Software Foundation License | https://github.com/pypa/distlib |
| distro | 1.9.0 | Apache Software License | https://github.com/python-distro/distro |
| docutils | 0.19 | BSD License; GNU General Public License (GPL); Public Domain; Python Software Foundation License | https://docutils.sourceforge.io/ |
| duckdb | 1.4.4 | MIT License | https://github.com/duckdb/duckdb-python |
| executing | 2.2.1 | MIT License | https://github.com/alexmojaki/executing |
| fastavro | 1.12.1 | MIT | https://github.com/fastavro/fastavro |
| fastjsonschema | 2.21.2 | BSD License | https://github.com/horejsek/python-fastjsonschema |
| filelock | 3.20.3 | Unlicense | https://github.com/tox-dev/py-filelock |
| frozenlist | 1.8.0 | Apache-2.0 | https://github.com/aio-libs/frozenlist |
| fsspec | 2026.2.0 | BSD-3-Clause | https://github.com/fsspec/filesystem_spec |
| ftfy | 6.3.1 | Apache-2.0 | https://ftfy.readthedocs.io/en/latest/ |
| ghp-import | 2.1.0 | Apache Software License | https://github.com/c-w/ghp-import |
| google-ai-generativelanguage | 0.6.15 | Apache Software License | https://github.com/googleapis/google-cloud-python/tree/main/packages/google-ai-generativelanguage |
| google-api-core | 2.25.2 | Apache Software License | https://github.com/googleapis/python-api-core |
| google-api-python-client | 2.189.0 | Apache Software License | https://github.com/googleapis/google-api-python-client/ |
| google-auth | 2.48.0 | Apache Software License | https://github.com/googleapis/google-auth-library-python |
| google-auth-httplib2 | 0.3.0 | Apache Software License | https://github.com/GoogleCloudPlatform/google-auth-library-python-httplib2 |
| google-generativeai | 0.8.6 | Apache Software License | https://github.com/google/generative-ai-python |
| googleapis-common-protos | 1.72.0 | Apache Software License | https://github.com/googleapis/google-cloud-python/tree/main/packages/googleapis-common-protos |
| griffe | 2.0.0 | ISC | https://mkdocstrings.github.io/griffe |
| griffecli | 2.0.0 | ISC | UNKNOWN |
| griffelib | 2.0.0 | ISC | UNKNOWN |
| grpcio | 1.78.0 | Apache-2.0 | https://grpc.io |
| grpcio-status | 1.71.2 | Apache Software License | https://grpc.io |
| h11 | 0.16.0 | MIT License | https://github.com/python-hyper/h11 |
| hf-xet | 1.2.0 | Apache-2.0 | https://github.com/huggingface/xet-core |
| httpcore | 1.0.9 | BSD-3-Clause | https://www.encode.io/httpcore/ |
| httplib2 | 0.31.2 | MIT License | https://github.com/httplib2/httplib2 |
| httpx | 0.28.1 | BSD License | https://github.com/encode/httpx |
| huggingface_hub | 0.36.2 | Apache Software License | https://github.com/huggingface/huggingface_hub |
| ibm-cos-sdk | 2.14.3 | Apache Software License | https://github.com/ibm/ibm-cos-sdk-python |
| ibm-cos-sdk-core | 2.14.3 | Apache Software License | https://github.com/ibm/ibm-cos-sdk-python-core |
| ibm-cos-sdk-s3transfer | 2.14.3 | Apache Software License | https://github.com/IBM/ibm-cos-sdk-python-s3transfer |
| ibm_watsonx_ai | 1.5.1 | BSD License | https://ibm.github.io/watsonx-ai-python-sdk/changelog.html |
| identify | 2.6.16 | MIT | https://github.com/pre-commit/identify |
| idna | 3.11 | BSD-3-Clause | https://github.com/kjd/idna |
| iniconfig | 2.3.0 | MIT | https://github.com/pytest-dev/iniconfig |
| ipykernel | 6.31.0 | BSD-3-Clause | https://ipython.org |
| ipython | 9.10.0 | BSD-3-Clause | https://ipython.org |
| ipython_pygments_lexers | 1.1.1 | BSD License | https://github.com/ipython/ipython-pygments-lexers |
| isodate | 0.7.2 | BSD License | https://github.com/gweis/isodate/ |
| jedi | 0.19.2 | MIT License | https://github.com/davidhalter/jedi |
| jiter | 0.13.0 | MIT License | https://github.com/pydantic/jiter/ |
| jmespath | 1.0.1 | MIT License | https://github.com/jmespath/jmespath.py |
| joblib | 1.5.3 | BSD-3-Clause | https://joblib.readthedocs.io |
| jsonschema | 4.26.0 | MIT | https://github.com/python-jsonschema/jsonschema |
| jsonschema-specifications | 2025.9.1 | MIT | https://github.com/python-jsonschema/jsonschema-specifications |
| jupyter_client | 8.8.0 | BSD License | https://jupyter.org |
| jupyter_core | 5.9.1 | BSD-3-Clause | https://jupyter.org |
| jupyterlab_pygments | 0.3.0 | BSD License | https://github.com/jupyterlab/jupyterlab_pygments |
| jupytext | 1.19.1 | MIT License | https://github.com/mwouts/jupytext |
| lance-namespace | 0.4.5 | Apache-2.0 | https://github.com/lance-format/lance-namespace |
| lance-namespace-urllib3-client | 0.4.5 | Apache-2.0 | https://github.com/lance-format/lance-namespace |
| lancedb | 0.29.2 | Apache Software License | https://github.com/lancedb/lancedb |
| lomond | 0.3.3 | BSD License | https://github.com/wildfoundry/dataplicity-lomond |
| markdown-it-py | 4.0.0 | MIT License | https://github.com/executablebooks/markdown-it-py |
| matplotlib-inline | 0.2.1 | UNKNOWN | https://github.com/ipython/matplotlib-inline |
| mdit-py-plugins | 0.5.0 | MIT License | https://github.com/executablebooks/mdit-py-plugins |
| mdurl | 0.1.2 | MIT License | https://github.com/executablebooks/mdurl |
| mergedeep | 1.3.4 | MIT License | https://github.com/clarketm/mergedeep |
| mistune | 3.2.0 | BSD License | https://github.com/lepture/mistune |
| mkdocs | 1.6.1 | BSD-2-Clause | https://github.com/mkdocs/mkdocs |
| mkdocs-autorefs | 1.4.3 | ISC | https://mkdocstrings.github.io/autorefs |
| mkdocs-get-deps | 0.2.0 | MIT | https://github.com/mkdocs/get-deps |
| mkdocs-jupyter | 0.25.1 | Apache-2.0 | https://github.com/danielfrg/mkdocs-jupyter |
| mkdocs-material | 9.7.1 | MIT | https://github.com/squidfunk/mkdocs-material |
| mkdocs-material-extensions | 1.3.1 | MIT | https://github.com/facelessuser/mkdocs-material-extensions |
| mkdocstrings | 1.0.3 | ISC | https://mkdocstrings.github.io |
| mkdocstrings-python | 2.0.2 | ISC | https://mkdocstrings.github.io/python |
| mpmath | 1.3.0 | BSD License | http://mpmath.org/ |
| msal | 1.34.0 | MIT License | https://github.com/AzureAD/microsoft-authentication-library-for-python |
| msal-extensions | 1.3.1 | MIT License | https://github.com/AzureAD/microsoft-authentication-extensions-for-python/releases |
| multidict | 6.7.1 | Apache License 2.0 | https://github.com/aio-libs/multidict |
| nbclient | 0.10.4 | BSD License | https://jupyter.org |
| nbconvert | 7.17.0 | BSD License | https://jupyter.org |
| nbformat | 5.10.4 | BSD License | https://jupyter.org |
| nest-asyncio | 1.6.0 | BSD License | https://github.com/erdewit/nest_asyncio |
| networkx | 3.6.1 | BSD-3-Clause | https://networkx.org/ |
| nodeenv | 1.10.0 | BSD License | https://github.com/ekalinin/nodeenv |
| numpy | 2.4.2 | BSD-3-Clause AND 0BSD AND MIT AND Zlib AND CC0-1.0 | https://numpy.org |
| ollama | 0.6.1 | MIT | https://ollama.com |
| open_clip_torch | 3.2.0 | MIT License | https://github.com/mlfoundations/open_clip |
| openai | 2.18.0 | Apache Software License | https://github.com/openai/openai-python |
| packaging | 26.0 | Apache-2.0 OR BSD-2-Clause | https://github.com/pypa/packaging |
| paginate | 0.5.7 | MIT License | https://github.com/Signum/paginate |
| pandas | 2.3.3 | BSD License | https://pandas.pydata.org |
| pandocfilters | 1.5.1 | BSD License | http://github.com/jgm/pandocfilters |
| parso | 0.8.6 | MIT License | https://github.com/davidhalter/parso |
| pathspec | 1.0.4 | Mozilla Public License 2.0 (MPL 2.0) | UNKNOWN |
| peft | 0.17.1 | Apache Software License | https://github.com/huggingface/peft |
| pexpect | 4.9.0 | ISC License (ISCL) | https://pexpect.readthedocs.io/ |
| pillow | 12.1.0 | MIT-CMU | https://python-pillow.github.io |
| platformdirs | 4.5.1 | MIT | https://github.com/tox-dev/platformdirs |
| pluggy | 1.6.0 | MIT License | UNKNOWN |
| polars | 1.3.0 | MIT License | https://www.pola.rs/ |
| pre_commit | 4.5.1 | MIT | https://github.com/pre-commit/pre-commit |
| prompt_toolkit | 3.0.52 | BSD License | https://github.com/prompt-toolkit/python-prompt-toolkit |
| propcache | 0.4.1 | Apache Software License | https://github.com/aio-libs/propcache |
| proto-plus | 1.27.1 | Apache Software License | https://github.com/googleapis/proto-plus-python |
| protobuf | 5.29.6 | 3-Clause BSD License | https://developers.google.com/protocol-buffers/ |
| psutil | 7.2.2 | BSD-3-Clause | https://github.com/giampaolo/psutil |
| ptyprocess | 0.7.0 | ISC License (ISCL) | https://github.com/pexpect/ptyprocess |
| pure_eval | 0.2.3 | MIT License | http://github.com/alexmojaki/pure_eval |
| pyarrow | 23.0.0 | Apache-2.0 | https://arrow.apache.org/ |
| pyarrow-stubs | 20.0.0.20251215 | BSD-2-Clause | https://github.com/zen-xu/pyarrow-stubs |
| pyasn1 | 0.6.2 | BSD-2-Clause | https://github.com/pyasn1/pyasn1 |
| pyasn1_modules | 0.4.2 | BSD License | https://github.com/pyasn1/pyasn1-modules |
| pycparser | 3.0 | BSD-3-Clause | https://github.com/eliben/pycparser |
| pydantic | 2.12.5 | MIT | https://github.com/pydantic/pydantic |
| pydantic_core | 2.41.5 | MIT | https://github.com/pydantic/pydantic-core |
| pylance | 2.0.0 | Apache Software License | UNKNOWN |
| pymdown-extensions | 10.20.1 | MIT | https://github.com/facelessuser/pymdown-extensions |
| pyparsing | 3.3.2 | MIT | https://github.com/pyparsing/pyparsing/ |
| pyright | 1.1.408 | MIT | https://github.com/RobertCraigie/pyright-python |
| pytest | 9.0.2 | MIT | https://docs.pytest.org/en/latest/ |
| pytest-asyncio | 1.3.0 | Apache-2.0 | https://github.com/pytest-dev/pytest-asyncio |
| pytest-mock | 3.15.1 | MIT License | https://github.com/pytest-dev/pytest-mock/ |
| python-dateutil | 2.9.0.post0 | Apache Software License; BSD License | https://github.com/dateutil/dateutil |
| pytz | 2025.2 | MIT License | http://pythonhosted.org/pytz |
| pyyaml_env_tag | 1.1 | MIT | https://github.com/waylan/pyyaml-env-tag |
| pyzmq | 27.1.0 | BSD License | https://pyzmq.readthedocs.org |
| referencing | 0.37.0 | MIT | https://github.com/python-jsonschema/referencing |
| regex | 2026.1.15 | Apache-2.0 AND CNRI-Python | https://github.com/mrabarnett/mrab-regex |
| requests | 2.32.5 | Apache Software License | https://requests.readthedocs.io |
| rpds-py | 0.30.0 | MIT | https://github.com/crate-py/rpds |
| rsa | 4.7.2 | Apache Software License | https://stuvel.eu/rsa |
| ruff | 0.15.0 | MIT License | https://docs.astral.sh/ruff |
| s3transfer | 0.16.0 | Apache Software License | https://github.com/boto/s3transfer |
| safetensors | 0.7.0 | Apache Software License | https://github.com/huggingface/safetensors |
| scikit-learn | 1.8.0 | BSD-3-Clause | https://scikit-learn.org |
| scipy | 1.17.0 | BSD License | https://scipy.org/ |
| sentence-transformers | 5.2.2 | Apache Software License | https://www.SBERT.net |
| sentencepiece | 0.2.1 | UNKNOWN | https://github.com/google/sentencepiece |
| six | 1.17.0 | MIT License | https://github.com/benjaminp/six |
| sniffio | 1.3.1 | Apache Software License; MIT License | https://github.com/python-trio/sniffio |
| soupsieve | 2.8.3 | MIT | https://github.com/facelessuser/soupsieve |
| stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data |
| sympy | 1.14.0 | BSD License | https://sympy.org |
| tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate |
| tantivy | 0.25.1 | UNKNOWN | UNKNOWN |
| threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl |
| timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models |
| tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 |
| tokenizers | 0.22.2 | Apache Software License | https://github.com/huggingface/tokenizers |
| torch | 2.8.0 | BSD License | https://pytorch.org/ |
| torchvision | 0.23.0 | BSD | https://github.com/pytorch/vision |
| tornado | 6.5.4 | Apache Software License | http://www.tornadoweb.org/ |
| tqdm | 4.67.3 | MPL-2.0 AND MIT | https://tqdm.github.io |
| traitlets | 5.14.3 | BSD License | https://github.com/ipython/traitlets |
| transformers | 4.57.6 | Apache Software License | https://github.com/huggingface/transformers |
| types-requests | 2.32.4.20260107 | Apache-2.0 | https://github.com/python/typeshed |
| typing-inspection | 0.4.2 | MIT | https://github.com/pydantic/typing-inspection |
| typing_extensions | 4.15.0 | PSF-2.0 | https://github.com/python/typing_extensions |
| tzdata | 2025.3 | Apache-2.0 | https://github.com/python/tzdata |
| uritemplate | 4.2.0 | BSD 3-Clause OR Apache-2.0 | https://uritemplate.readthedocs.org |
| urllib3 | 2.6.3 | MIT | https://github.com/urllib3/urllib3/blob/main/CHANGES.rst |
| virtualenv | 20.36.1 | MIT | https://github.com/pypa/virtualenv |
| watchdog | 6.0.0 | Apache Software License | https://github.com/gorakhargosh/watchdog |
| webencodings | 0.5.1 | BSD License | https://github.com/SimonSapin/python-webencodings |
| yarl | 1.22.0 | Apache Software License | https://github.com/aio-libs/yarl |

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@ description = "lancedb"
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
license = { file = "LICENSE" }
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.9"
keywords = [
"data-format",
"data-science",
@@ -33,10 +33,10 @@ classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
]
@@ -61,7 +61,7 @@ tests = [
"pyarrow-stubs",
"pylance>=1.0.0b14",
"requests",
"datafusion<52",
"datafusion",
]
dev = [
"ruff",
@@ -137,4 +137,4 @@ include = [
"python/lancedb/_lancedb.pyi",
]
exclude = ["python/tests/"]
pythonVersion = "3.13"
pythonVersion = "3.12"

View File

@@ -180,8 +180,6 @@ class Table:
delete_unverified: Optional[bool] = None,
) -> OptimizeStats: ...
async def uri(self) -> str: ...
async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
@property
def tags(self) -> Tags: ...
def query(self) -> Query: ...

View File

@@ -1,10 +1,8 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from functools import singledispatch
from typing import List, Optional, Tuple, Union
from lancedb.pydantic import LanceModel, model_to_dict
import pyarrow as pa
from ._lancedb import RecordBatchStream
@@ -82,32 +80,3 @@ def peek_reader(
yield from reader
return batch, pa.RecordBatchReader.from_batches(batch.schema, all_batches())
@singledispatch
def to_arrow(data) -> pa.Table:
"""Convert a single data object to a pa.Table."""
raise NotImplementedError(f"to_arrow not implemented for type {type(data)}")
@to_arrow.register(pa.RecordBatch)
def _arrow_from_batch(data: pa.RecordBatch) -> pa.Table:
return pa.Table.from_batches([data])
@to_arrow.register(pa.Table)
def _arrow_from_table(data: pa.Table) -> pa.Table:
return data
@to_arrow.register(list)
def _arrow_from_list(data: list) -> pa.Table:
if not data:
raise ValueError("Cannot create table from empty list without a schema")
if isinstance(data[0], LanceModel):
schema = data[0].__class__.to_arrow_schema()
dicts = [model_to_dict(d) for d in data]
return pa.Table.from_pylist(dicts, schema=schema)
return pa.Table.from_pylist(data)

View File

@@ -22,12 +22,7 @@ class BackgroundEventLoop:
self.thread.start()
def run(self, future):
concurrent_future = asyncio.run_coroutine_threadsafe(future, self.loop)
try:
return concurrent_future.result()
except BaseException:
concurrent_future.cancel()
raise
return asyncio.run_coroutine_threadsafe(future, self.loop).result()
LOOP = BackgroundEventLoop()

View File

@@ -275,7 +275,7 @@ class ColPaliEmbeddings(EmbeddingFunction):
"""
Convert image inputs to PIL Images.
"""
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
requests = attempt_import_or_raise("requests", "requests")
images = self.sanitize_input(images)
pil_images = []
@@ -285,12 +285,12 @@ class ColPaliEmbeddings(EmbeddingFunction):
if image.startswith(("http://", "https://")):
response = requests.get(image, timeout=10)
response.raise_for_status()
pil_images.append(PIL_Image.open(io.BytesIO(response.content)))
pil_images.append(PIL.Image.open(io.BytesIO(response.content)))
else:
with PIL_Image.open(image) as im:
with PIL.Image.open(image) as im:
pil_images.append(im.copy())
elif isinstance(image, bytes):
pil_images.append(PIL_Image.open(io.BytesIO(image)))
pil_images.append(PIL.Image.open(io.BytesIO(image)))
else:
# Assume it's a PIL Image; will raise if invalid
pil_images.append(image)

View File

@@ -2,7 +2,6 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import warnings
from typing import List, Union
import numpy as np
@@ -16,8 +15,6 @@ from .utils import weak_lru
@register("gte-text")
class GteEmbeddings(TextEmbeddingFunction):
"""
Deprecated: GTE embeddings should be used through sentence-transformers.
An embedding function that uses GTE-LARGE MLX format(for Apple silicon devices only)
as well as the standard cpu/gpu version from: https://huggingface.co/thenlper/gte-large.
@@ -64,13 +61,6 @@ class GteEmbeddings(TextEmbeddingFunction):
def __init__(self, **kwargs):
super().__init__(**kwargs)
warnings.warn(
"GTE embeddings as a standalone embedding function are deprecated. "
"Use the 'sentence-transformers' embedding function with a GTE model "
"instead.",
DeprecationWarning,
stacklevel=3,
)
self._ndims = None
if kwargs:
self.mlx = kwargs.get("mlx", False)

View File

@@ -77,8 +77,8 @@ class JinaEmbeddings(EmbeddingFunction):
if isinstance(inputs, list):
inputs = inputs
else:
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
if isinstance(inputs, PIL_Image.Image):
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(inputs, PIL.Image.Image):
inputs = [inputs]
return inputs
@@ -89,13 +89,13 @@ class JinaEmbeddings(EmbeddingFunction):
elif isinstance(image, (str, Path)):
parsed = urlparse.urlparse(image)
# TODO handle drive letter on windows.
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
if parsed.scheme == "file":
pil_image = PIL_Image.open(parsed.path)
pil_image = PIL.Image.open(parsed.path)
elif parsed.scheme == "":
pil_image = PIL_Image.open(image if os.name == "nt" else parsed.path)
pil_image = PIL.Image.open(image if os.name == "nt" else parsed.path)
elif parsed.scheme.startswith("http"):
pil_image = PIL_Image.open(io.BytesIO(url_retrieve(image)))
pil_image = PIL.Image.open(io.BytesIO(url_retrieve(image)))
else:
raise NotImplementedError("Only local and http(s) urls are supported")
buffered = io.BytesIO()
@@ -103,9 +103,9 @@ class JinaEmbeddings(EmbeddingFunction):
image_bytes = buffered.getvalue()
image_dict = {"image": base64.b64encode(image_bytes).decode("utf-8")}
else:
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(image, PIL_Image.Image):
if isinstance(image, PIL.Image.Image):
buffered = io.BytesIO()
image.save(buffered, format="PNG")
image_bytes = buffered.getvalue()
@@ -136,9 +136,9 @@ class JinaEmbeddings(EmbeddingFunction):
elif isinstance(query, (Path, bytes)):
return [self.generate_image_embedding(query)]
else:
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(query, PIL_Image.Image):
if isinstance(query, PIL.Image.Image):
return [self.generate_image_embedding(query)]
else:
raise TypeError(

View File

@@ -71,8 +71,8 @@ class OpenClipEmbeddings(EmbeddingFunction):
if isinstance(query, str):
return [self.generate_text_embeddings(query)]
else:
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
if isinstance(query, PIL_Image.Image):
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(query, PIL.Image.Image):
return [self.generate_image_embedding(query)]
else:
raise TypeError("OpenClip supports str or PIL Image as query")
@@ -145,20 +145,20 @@ class OpenClipEmbeddings(EmbeddingFunction):
return self._encode_and_normalize_image(image)
def _to_pil(self, image: Union[str, bytes]):
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(image, bytes):
return PIL_Image.open(io.BytesIO(image))
if isinstance(image, PIL_Image.Image):
return PIL.Image.open(io.BytesIO(image))
if isinstance(image, PIL.Image.Image):
return image
elif isinstance(image, str):
parsed = urlparse.urlparse(image)
# TODO handle drive letter on windows.
if parsed.scheme == "file":
return PIL_Image.open(parsed.path)
return PIL.Image.open(parsed.path)
elif parsed.scheme == "":
return PIL_Image.open(image if os.name == "nt" else parsed.path)
return PIL.Image.open(image if os.name == "nt" else parsed.path)
elif parsed.scheme.startswith("http"):
return PIL_Image.open(io.BytesIO(url_retrieve(image)))
return PIL.Image.open(io.BytesIO(url_retrieve(image)))
else:
raise NotImplementedError("Only local and http(s) urls are supported")

View File

@@ -110,9 +110,6 @@ class OpenAIEmbeddings(TextEmbeddingFunction):
valid_embeddings = {
idx: v.embedding for v, idx in zip(rs.data, valid_indices)
}
except openai.AuthenticationError:
logging.error("Authentication failed: Invalid API key provided")
raise
except openai.BadRequestError:
logging.exception("Bad request: %s", texts)
return [None] * len(texts)

View File

@@ -6,7 +6,6 @@ import io
import os
from typing import TYPE_CHECKING, List, Union
import urllib.parse as urlparse
import warnings
import numpy as np
import pyarrow as pa
@@ -25,7 +24,6 @@ if TYPE_CHECKING:
@register("siglip")
class SigLipEmbeddings(EmbeddingFunction):
# Deprecated: prefer CLIP embeddings via `open-clip`.
model_name: str = "google/siglip-base-patch16-224"
device: str = "cpu"
batch_size: int = 64
@@ -38,12 +36,6 @@ class SigLipEmbeddings(EmbeddingFunction):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
"SigLip embeddings are deprecated. Use CLIP embeddings via the "
"'open-clip' embedding function instead.",
DeprecationWarning,
stacklevel=3,
)
transformers = attempt_import_or_raise("transformers")
self._torch = attempt_import_or_raise("torch")
@@ -64,8 +56,8 @@ class SigLipEmbeddings(EmbeddingFunction):
if isinstance(query, str):
return [self.generate_text_embeddings(query)]
else:
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
if isinstance(query, PIL_Image.Image):
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(query, PIL.Image.Image):
return [self.generate_image_embedding(query)]
else:
raise TypeError("SigLIP supports str or PIL Image as query")
@@ -135,21 +127,21 @@ class SigLipEmbeddings(EmbeddingFunction):
return image_features.cpu().detach().numpy().squeeze()
def _to_pil(self, image: Union[str, bytes, "PIL.Image.Image"]):
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
if isinstance(image, PIL_Image.Image):
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(image, PIL.Image.Image):
return image.convert("RGB") if image.mode != "RGB" else image
elif isinstance(image, bytes):
return PIL_Image.open(io.BytesIO(image)).convert("RGB")
return PIL.Image.open(io.BytesIO(image)).convert("RGB")
elif isinstance(image, str):
parsed = urlparse.urlparse(image)
if parsed.scheme == "file":
return PIL_Image.open(parsed.path).convert("RGB")
return PIL.Image.open(parsed.path).convert("RGB")
elif parsed.scheme == "":
path = image if os.name == "nt" else parsed.path
return PIL_Image.open(path).convert("RGB")
return PIL.Image.open(path).convert("RGB")
elif parsed.scheme.startswith("http"):
image_bytes = url_retrieve(image)
return PIL_Image.open(io.BytesIO(image_bytes)).convert("RGB")
return PIL.Image.open(io.BytesIO(image_bytes)).convert("RGB")
else:
raise NotImplementedError("Only local and http(s) urls are supported")
else:

View File

@@ -269,11 +269,6 @@ def retry_with_exponential_backoff(
# and say that it is assumed that if this portion errors out, it's due
# to rate limit but the user should check the error message to be sure.
except Exception as e: # noqa: PERF203
# Don't retry on authentication errors (e.g., OpenAI 401)
# These are permanent failures that won't be fixed by retrying
if _is_non_retryable_error(e):
raise
num_retries += 1
if num_retries > max_retries:
@@ -294,29 +289,6 @@ def retry_with_exponential_backoff(
return wrapper
def _is_non_retryable_error(error: Exception) -> bool:
"""Check if an error should not be retried.
Args:
error: The exception to check
Returns:
True if the error should not be retried, False otherwise
"""
# Check for OpenAI authentication errors
error_type = type(error).__name__
if error_type == "AuthenticationError":
return True
# Check for other common non-retryable HTTP status codes
# 401 Unauthorized, 403 Forbidden
if hasattr(error, "status_code"):
if error.status_code in (401, 403):
return True
return False
def url_retrieve(url: str):
"""
Parameters

View File

@@ -21,9 +21,6 @@ if TYPE_CHECKING:
# Token limits for different VoyageAI models
VOYAGE_TOTAL_TOKEN_LIMITS = {
"voyage-4": 320_000,
"voyage-4-lite": 1_000_000,
"voyage-4-large": 120_000,
"voyage-context-3": 32_000,
"voyage-3.5-lite": 1_000_000,
"voyage-3.5": 320_000,
@@ -64,7 +61,7 @@ def is_video_path(path: Path) -> bool:
def transform_input(input_data: Union[str, bytes, Path]):
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(input_data, str):
if is_valid_url(input_data):
if is_video_url(input_data):
@@ -73,7 +70,7 @@ def transform_input(input_data: Union[str, bytes, Path]):
content = {"type": "image_url", "image_url": input_data}
else:
content = {"type": "text", "text": input_data}
elif isinstance(input_data, PIL_Image.Image):
elif isinstance(input_data, PIL.Image.Image):
buffered = BytesIO()
input_data.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -82,7 +79,7 @@ def transform_input(input_data: Union[str, bytes, Path]):
"image_base64": "data:image/jpeg;base64," + img_str,
}
elif isinstance(input_data, bytes):
img = PIL_Image.open(BytesIO(input_data))
img = PIL.Image.open(BytesIO(input_data))
buffered = BytesIO()
img.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -101,7 +98,7 @@ def transform_input(input_data: Union[str, bytes, Path]):
"video_base64": video_str,
}
else:
img = PIL_Image.open(input_data)
img = PIL.Image.open(input_data)
buffered = BytesIO()
img.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -119,8 +116,8 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
"""
Sanitize the input to the embedding function.
"""
PIL_Image = attempt_import_or_raise("PIL.Image", "pillow")
if isinstance(inputs, (str, bytes, Path, PIL_Image.Image)):
PIL = attempt_import_or_raise("PIL", "pillow")
if isinstance(inputs, (str, bytes, Path, PIL.Image.Image)):
inputs = [inputs]
elif isinstance(inputs, list):
pass # Already a list, use as-is
@@ -133,7 +130,7 @@ def sanitize_multimodal_input(inputs: Union[TEXT, IMAGES]) -> List[Any]:
f"Input type {type(inputs)} not allowed with multimodal model."
)
if not all(isinstance(x, (str, bytes, Path, PIL_Image.Image)) for x in inputs):
if not all(isinstance(x, (str, bytes, Path, PIL.Image.Image)) for x in inputs):
raise ValueError("Each input should be either str, bytes, Path or Image.")
return [transform_input(i) for i in inputs]
@@ -170,9 +167,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
name: str
The name of the model to use. List of acceptable models:
* voyage-4 (1024 dims, general-purpose and multilingual retrieval)
* voyage-4-lite (1024 dims, optimized for latency and cost)
* voyage-4-large (1024 dims, best retrieval quality)
* voyage-context-3
* voyage-3.5
* voyage-3.5-lite
@@ -221,9 +215,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
_FLEXIBLE_DIM_MODELS: ClassVar[list] = ["voyage-multimodal-3.5"]
_VALID_DIMENSIONS: ClassVar[list] = [256, 512, 1024, 2048]
text_embedding_models: list = [
"voyage-4",
"voyage-4-lite",
"voyage-4-large",
"voyage-3.5",
"voyage-3.5-lite",
"voyage-3",
@@ -261,9 +252,6 @@ class VoyageAIEmbeddingFunction(EmbeddingFunction):
elif self.name == "voyage-code-2":
return 1536
elif self.name in [
"voyage-4",
"voyage-4-lite",
"voyage-4-large",
"voyage-context-3",
"voyage-3.5",
"voyage-3.5-lite",

View File

@@ -44,7 +44,7 @@ from lance_namespace import (
ListNamespacesRequest,
CreateNamespaceRequest,
DropNamespaceRequest,
DeclareTableRequest,
CreateEmptyTableRequest,
)
from lancedb.table import AsyncTable, LanceTable, Table
from lancedb.util import validate_table_name
@@ -318,20 +318,20 @@ class LanceNamespaceDBConnection(DBConnection):
if location is None:
# Table doesn't exist or mode is "create", reserve a new location
declare_request = DeclareTableRequest(
create_empty_request = CreateEmptyTableRequest(
id=table_id,
location=None,
properties=self.storage_options if self.storage_options else None,
)
declare_response = self._ns.declare_table(declare_request)
create_empty_response = self._ns.create_empty_table(create_empty_request)
if not declare_response.location:
if not create_empty_response.location:
raise ValueError(
"Table location is missing from declare_table response"
"Table location is missing from create_empty_table response"
)
location = declare_response.location
namespace_storage_options = declare_response.storage_options
location = create_empty_response.location
namespace_storage_options = create_empty_response.storage_options
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)
@@ -759,20 +759,20 @@ class AsyncLanceNamespaceDBConnection:
if location is None:
# Table doesn't exist or mode is "create", reserve a new location
declare_request = DeclareTableRequest(
create_empty_request = CreateEmptyTableRequest(
id=table_id,
location=None,
properties=self.storage_options if self.storage_options else None,
)
declare_response = self._ns.declare_table(declare_request)
create_empty_response = self._ns.create_empty_table(create_empty_request)
if not declare_response.location:
if not create_empty_response.location:
raise ValueError(
"Table location is missing from declare_table response"
"Table location is missing from create_empty_table response"
)
location = declare_response.location
namespace_storage_options = declare_response.storage_options
location = create_empty_response.location
namespace_storage_options = create_empty_response.storage_options
# Merge storage options: self.storage_options < user options < namespace options
merged_storage_options = dict(self.storage_options)

View File

@@ -9,7 +9,7 @@ import json
from ._lancedb import async_permutation_builder, PermutationReader
from .table import LanceTable
from .background_loop import LOOP
from .util import batch_to_tensor, batch_to_tensor_rows
from .util import batch_to_tensor
from typing import Any, Callable, Iterator, Literal, Optional, TYPE_CHECKING, Union
if TYPE_CHECKING:
@@ -333,11 +333,7 @@ class Transforms:
"""
@staticmethod
def arrow2python(batch: pa.RecordBatch) -> list[dict[str, Any]]:
return batch.to_pylist()
@staticmethod
def arrow2pythoncol(batch: pa.RecordBatch) -> dict[str, list[Any]]:
def arrow2python(batch: pa.RecordBatch) -> dict[str, list[Any]]:
return batch.to_pydict()
@staticmethod
@@ -691,17 +687,7 @@ class Permutation:
return
def with_format(
self,
format: Literal[
"numpy",
"python",
"python_col",
"pandas",
"arrow",
"torch",
"torch_col",
"polars",
],
self, format: Literal["numpy", "python", "pandas", "arrow", "torch", "polars"]
) -> "Permutation":
"""
Set the format for batches
@@ -710,18 +696,16 @@ class Permutation:
The format can be one of:
- "numpy" - the batch will be a dict of numpy arrays (one per column)
- "python" - the batch will be a list of dicts (one per row)
- "python_col" - the batch will be a dict of lists (one entry per column)
- "python" - the batch will be a dict of lists (one per column)
- "pandas" - the batch will be a pandas DataFrame
- "arrow" - the batch will be a pyarrow RecordBatch
- "torch" - the batch will be a list of tensors, one per row
- "torch_col" - the batch will be a 2D torch tensor (first dim indexes columns)
- "torch" - the batch will be a two dimensional torch tensor
- "polars" - the batch will be a polars DataFrame
Conversion may or may not involve a data copy. Lance uses Arrow internally
and so it is able to zero-copy to the arrow and polars formats.
and so it is able to zero-copy to the arrow and polars.
Conversion to torch_col will be zero-copy but will only support a subset of data
Conversion to torch will be zero-copy but will only support a subset of data
types (numeric types).
Conversion to numpy and/or pandas will typically be zero-copy for numeric
@@ -734,8 +718,6 @@ class Permutation:
assert format is not None, "format is required"
if format == "python":
return self.with_transform(Transforms.arrow2python)
if format == "python_col":
return self.with_transform(Transforms.arrow2pythoncol)
elif format == "numpy":
return self.with_transform(Transforms.arrow2numpy)
elif format == "pandas":
@@ -743,8 +725,6 @@ class Permutation:
elif format == "arrow":
return self.with_transform(Transforms.arrow2arrow)
elif format == "torch":
return self.with_transform(batch_to_tensor_rows)
elif format == "torch_col":
return self.with_transform(batch_to_tensor)
elif format == "polars":
return self.with_transform(Transforms.arrow2polars())
@@ -766,20 +746,15 @@ class Permutation:
def __getitem__(self, index: int) -> Any:
"""
Returns a single row from the permutation by offset
"""
return self.__getitems__([index])
Return a single row from the permutation
def __getitems__(self, indices: list[int]) -> Any:
"""
Returns rows from the permutation by offset
"""
The output will always be a python dictionary regardless of the format.
async def do_getitems():
return await self.reader.take_offsets(indices, selection=self.selection)
batch = LOOP.run(do_getitems())
return self.transform_fn(batch)
This method is mostly useful for debugging and exploration. For actual
processing use [iter](#iter) or a torch data loader to perform batched
processing.
"""
pass
@deprecated(details="Use with_skip instead")
def skip(self, skip: int) -> "Permutation":

View File

@@ -275,7 +275,7 @@ def _py_type_to_arrow_type(py_type: Type[Any], field: FieldInfo) -> pa.DataType:
return pa.timestamp("us", tz=tz)
elif getattr(py_type, "__origin__", None) in (list, tuple):
child = py_type.__args__[0]
return _pydantic_list_child_to_arrow(child, field)
return pa.list_(_py_type_to_arrow_type(child, field))
raise TypeError(
f"Converting Pydantic type to Arrow Type: unsupported type {py_type}."
)
@@ -298,18 +298,12 @@ else:
def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
def _safe_issubclass(candidate: Any, base: type) -> bool:
try:
return issubclass(candidate, base)
except TypeError:
return False
if inspect.isclass(tp):
if _safe_issubclass(tp, pydantic.BaseModel):
if issubclass(tp, pydantic.BaseModel):
# Struct
fields = _pydantic_model_to_fields(tp)
return pa.struct(fields)
if _safe_issubclass(tp, FixedSizeListMixin):
if issubclass(tp, FixedSizeListMixin):
if getattr(tp, "is_multi_vector", lambda: False)():
return pa.list_(pa.list_(tp.value_arrow_type(), tp.dim()))
# For regular Vector
@@ -317,67 +311,45 @@ def _pydantic_type_to_arrow_type(tp: Any, field: FieldInfo) -> pa.DataType:
return _py_type_to_arrow_type(tp, field)
def _pydantic_list_child_to_arrow(child: Any, field: FieldInfo) -> pa.DataType:
unwrapped = _unwrap_optional_annotation(child)
if unwrapped is not None:
return pa.list_(
pa.field("item", _pydantic_type_to_arrow_type(unwrapped, field), True)
)
return pa.list_(_pydantic_type_to_arrow_type(child, field))
def _unwrap_optional_annotation(annotation: Any) -> Any | None:
if isinstance(annotation, (_GenericAlias, GenericAlias)):
origin = annotation.__origin__
args = annotation.__args__
if origin == Union:
non_none = [arg for arg in args if arg is not type(None)]
if len(non_none) == 1 and len(non_none) != len(args):
return non_none[0]
elif sys.version_info >= (3, 10) and isinstance(annotation, types.UnionType):
args = annotation.__args__
non_none = [arg for arg in args if arg is not type(None)]
if len(non_none) == 1 and len(non_none) != len(args):
return non_none[0]
return None
def _pydantic_to_arrow_type(field: FieldInfo) -> pa.DataType:
"""Convert a Pydantic FieldInfo to Arrow DataType"""
unwrapped = _unwrap_optional_annotation(field.annotation)
if unwrapped is not None:
return _pydantic_type_to_arrow_type(unwrapped, field)
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__
args = field.annotation.__args__
if origin is list:
child = args[0]
return _pydantic_list_child_to_arrow(child, field)
return pa.list_(_py_type_to_arrow_type(child, field))
elif origin == Union:
if len(args) == 2 and args[1] is type(None):
return _pydantic_type_to_arrow_type(args[0], field)
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__
if len(args) == 2:
for typ in args:
if typ is type(None):
continue
return _py_type_to_arrow_type(typ, field)
return _pydantic_type_to_arrow_type(field.annotation, field)
def is_nullable(field: FieldInfo) -> bool:
"""Check if a Pydantic FieldInfo is nullable."""
if _unwrap_optional_annotation(field.annotation) is not None:
return True
if isinstance(field.annotation, (_GenericAlias, GenericAlias)):
origin = field.annotation.__origin__
args = field.annotation.__args__
if origin == Union:
if any(typ is type(None) for typ in args):
if len(args) == 2 and args[1] is type(None):
return True
elif sys.version_info >= (3, 10) and isinstance(field.annotation, types.UnionType):
args = field.annotation.__args__
for typ in args:
if typ is type(None):
return True
elif inspect.isclass(field.annotation):
try:
if issubclass(field.annotation, FixedSizeListMixin):
return field.annotation.nullable()
except TypeError:
return False
elif inspect.isclass(field.annotation) and issubclass(
field.annotation, FixedSizeListMixin
):
return field.annotation.nullable()
return False

View File

@@ -961,27 +961,22 @@ class LanceQueryBuilder(ABC):
>>> query = [100, 100]
>>> plan = table.search(query).analyze_plan()
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
AnalyzeExec verbose=true, elapsed=..., metrics=...
TracedExec, elapsed=..., metrics=...
ProjectionExec: elapsed=..., expr=[...],
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
GlobalLimitExec: elapsed=..., skip=0, fetch=10,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
TracedExec, metrics=[], cumulative_cpu=...
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
FilterExec: _distance@2 IS NOT NULL,
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
SortExec: TopK(fetch=10), expr=[...],
preserve_partitioning=[...],
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., row_replacements=...]
KNNVectorDistance: elapsed=..., metric=l2,
metrics=[output_rows=..., elapsed_compute=...,
output_bytes=..., output_batches=...]
LanceRead: elapsed=..., uri=..., projection=[vector],
num_fragments=..., range_before=None, range_after=None,
row_id=true, row_addr=false,
full_filter=--, refine_filter=--,
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
bytes_read=..., iops=..., requests=..., task_wait_time=...]
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
cumulative_cpu=...
KNNVectorDistance: metric=l2,
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
cumulative_cpu=...
LanceRead: uri=..., projection=[vector], ...
metrics=[output_rows=..., elapsed_compute=...,
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
Returns
-------
@@ -1433,19 +1428,6 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
self._bypass_vector_index = True
return self
def fast_search(self) -> LanceVectorQueryBuilder:
"""
Skip a flat search of unindexed data. This will improve
search performance but search results will not include unindexed data.
Returns
-------
LanceVectorQueryBuilder
The LanceVectorQueryBuilder object.
"""
self._fast_search = True
return self
class LanceFtsQueryBuilder(LanceQueryBuilder):
"""A builder for full text search for LanceDB."""
@@ -1782,26 +1764,6 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
vector_results = LanceHybridQueryBuilder._rank(vector_results, "_distance")
fts_results = LanceHybridQueryBuilder._rank(fts_results, "_score")
# If both result sets are empty (e.g. after hard filtering),
# return early to avoid errors in reranking or score restoration.
if vector_results.num_rows == 0 and fts_results.num_rows == 0:
# Build a minimal empty table with the _relevance_score column
combined_schema = pa.unify_schemas(
[vector_results.schema, fts_results.schema],
)
empty = pa.table(
{
col: pa.array([], type=combined_schema.field(col).type)
for col in combined_schema.names
}
)
empty = empty.append_column(
"_relevance_score", pa.array([], type=pa.float32())
)
if not with_row_ids and "_rowid" in empty.column_names:
empty = empty.drop(["_rowid"])
return empty
original_distances = None
original_scores = None
original_distance_row_ids = None
@@ -2138,17 +2100,19 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
""" # noqa: E501
self._create_query_builders()
reranker_label = str(self._reranker) if self._reranker else "No reranker"
vector_plan = self._table._explain_plan(
self._vector_query.to_query_object(), verbose=verbose
results = ["Vector Search Plan:"]
results.append(
self._table._explain_plan(
self._vector_query.to_query_object(), verbose=verbose
)
)
fts_plan = self._table._explain_plan(
self._fts_query.to_query_object(), verbose=verbose
results.append("FTS Search Plan:")
results.append(
self._table._explain_plan(
self._fts_query.to_query_object(), verbose=verbose
)
)
# Indent sub-plans under the reranker
indented_vector = "\n".join(" " + line for line in vector_plan.splitlines())
indented_fts = "\n".join(" " + line for line in fts_plan.splitlines())
return f"{reranker_label}\n {indented_vector}\n {indented_fts}"
return "\n".join(results)
def analyze_plan(self):
"""Execute the query and display with runtime metrics.
@@ -3182,20 +3146,23 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
... print(plan)
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
RRFReranker(K=60)
ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
Take: columns="vector, _rowid, _distance, (text)"
CoalesceBatchesExec: target_batch_size=1024
GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
Take: columns="_rowid, _score, (vector), (text)"
CoalesceBatchesExec: target_batch_size=1024
GlobalLimitExec: skip=0, fetch=10
MatchQuery: column=text, query=hello
Vector Search Plan:
ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
Take: columns="vector, _rowid, _distance, (text)"
CoalesceBatchesExec: target_batch_size=1024
GlobalLimitExec: skip=0, fetch=10
FilterExec: _distance@2 IS NOT NULL
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
KNNVectorDistance: metric=l2
LanceRead: uri=..., projection=[vector], ...
<BLANKLINE>
FTS Search Plan:
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
Take: columns="_rowid, _score, (vector), (text)"
CoalesceBatchesExec: target_batch_size=1024
GlobalLimitExec: skip=0, fetch=10
MatchQuery: column=text, query=hello
<BLANKLINE>
Parameters
----------
@@ -3207,12 +3174,12 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
plan : str
""" # noqa: E501
vector_plan = await self._inner.to_vector_query().explain_plan(verbose)
fts_plan = await self._inner.to_fts_query().explain_plan(verbose)
# Indent sub-plans under the reranker
indented_vector = "\n".join(" " + line for line in vector_plan.splitlines())
indented_fts = "\n".join(" " + line for line in fts_plan.splitlines())
return f"{self._reranker}\n {indented_vector}\n {indented_fts}"
results = ["Vector Search Plan:"]
results.append(await self._inner.to_vector_query().explain_plan(verbose))
results.append("FTS Search Plan:")
results.append(await self._inner.to_fts_query().explain_plan(verbose))
return "\n".join(results)
async def analyze_plan(self):
"""

View File

@@ -42,18 +42,10 @@ class AnswerdotaiRerankers(Reranker):
rerankers = attempt_import_or_raise(
"rerankers"
) # import here for faster ops later
self.model_name = model_name
self.model_type = model_type
self.reranker = rerankers.Reranker(
model_name=model_name, model_type=model_type, **kwargs
)
def __str__(self):
return (
f"AnswerdotaiRerankers(model_type={self.model_type}, "
f"model_name={self.model_name})"
)
def _rerank(self, result_set: pa.Table, query: str):
result_set = self._handle_empty_results(result_set)
if len(result_set) == 0:

View File

@@ -40,9 +40,6 @@ class Reranker(ABC):
if ARROW_VERSION.major <= 13:
self._concat_tables_args = {"promote": True}
def __str__(self):
return self.__class__.__name__
def rerank_vector(
self,
query: str,

View File

@@ -44,9 +44,6 @@ class CohereReranker(Reranker):
self.top_n = top_n
self.api_key = api_key
def __str__(self):
return f"CohereReranker(model_name={self.model_name})"
@cached_property
def _client(self):
cohere = attempt_import_or_raise("cohere")

View File

@@ -50,9 +50,6 @@ class CrossEncoderReranker(Reranker):
if self.device is None:
self.device = "cuda" if torch.cuda.is_available() else "cpu"
def __str__(self):
return f"CrossEncoderReranker(model_name={self.model_name})"
@cached_property
def model(self):
sbert = attempt_import_or_raise("sentence_transformers")

View File

@@ -45,9 +45,6 @@ class JinaReranker(Reranker):
self.top_n = top_n
self.api_key = api_key
def __str__(self):
return f"JinaReranker(model_name={self.model_name})"
@cached_property
def _client(self):
import requests

View File

@@ -38,9 +38,6 @@ class LinearCombinationReranker(Reranker):
self.weight = weight
self.fill = fill
def __str__(self):
return f"LinearCombinationReranker(weight={self.weight}, fill={self.fill})"
def rerank_hybrid(
self,
query: str, # noqa: F821

View File

@@ -54,12 +54,6 @@ class MRRReranker(Reranker):
self.weight_vector = weight_vector
self.weight_fts = weight_fts
def __str__(self):
return (
f"MRRReranker(weight_vector={self.weight_vector}, "
f"weight_fts={self.weight_fts})"
)
def rerank_hybrid(
self,
query: str, # noqa: F821

View File

@@ -43,9 +43,6 @@ class OpenaiReranker(Reranker):
self.column = column
self.api_key = api_key
def __str__(self):
return f"OpenaiReranker(model_name={self.model_name})"
def _rerank(self, result_set: pa.Table, query: str):
result_set = self._handle_empty_results(result_set)
if len(result_set) == 0:

View File

@@ -36,9 +36,6 @@ class RRFReranker(Reranker):
super().__init__(return_score)
self.K = K
def __str__(self):
return f"RRFReranker(K={self.K})"
def rerank_hybrid(
self,
query: str, # noqa: F821

View File

@@ -52,9 +52,6 @@ class VoyageAIReranker(Reranker):
self.api_key = api_key
self.truncation = truncation
def __str__(self):
return f"VoyageAIReranker(model_name={self.model_name})"
@cached_property
def _client(self):
voyageai = attempt_import_or_raise("voyageai")

View File

@@ -1,214 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
from dataclasses import dataclass
from functools import singledispatch
import sys
from typing import Callable, Iterator, Optional
from lancedb.arrow import to_arrow
import pyarrow as pa
import pyarrow.dataset as ds
from .pydantic import LanceModel
@dataclass
class Scannable:
schema: pa.Schema
num_rows: Optional[int]
# Factory function to create a new reader each time (supports re-scanning)
reader: Callable[[], pa.RecordBatchReader]
# Whether reader can be called more than once. For example, an iterator can
# only be consumed once, while a DataFrame can be converted to a new reader
# each time.
rescannable: bool = True
@singledispatch
def to_scannable(data) -> Scannable:
# Fallback: try iterable protocol
if hasattr(data, "__iter__"):
return _from_iterable(iter(data))
raise NotImplementedError(f"to_scannable not implemented for type {type(data)}")
@to_scannable.register(pa.RecordBatchReader)
def _from_reader(data: pa.RecordBatchReader) -> Scannable:
# RecordBatchReader can only be consumed once - not rescannable
return Scannable(
schema=data.schema, num_rows=None, reader=lambda: data, rescannable=False
)
@to_scannable.register(pa.RecordBatch)
def _from_batch(data: pa.RecordBatch) -> Scannable:
return Scannable(
schema=data.schema,
num_rows=data.num_rows,
reader=lambda: pa.RecordBatchReader.from_batches(data.schema, [data]),
)
@to_scannable.register(pa.Table)
def _from_table(data: pa.Table) -> Scannable:
return Scannable(schema=data.schema, num_rows=data.num_rows, reader=data.to_reader)
@to_scannable.register(ds.Dataset)
def _from_dataset(data: ds.Dataset) -> Scannable:
return Scannable(
schema=data.schema,
num_rows=data.count_rows(),
reader=lambda: data.scanner().to_reader(),
)
@to_scannable.register(ds.Scanner)
def _from_scanner(data: ds.Scanner) -> Scannable:
# Scanner can only be consumed once - not rescannable
return Scannable(
schema=data.projected_schema,
num_rows=None,
reader=data.to_reader,
rescannable=False,
)
@to_scannable.register(list)
def _from_list(data: list) -> Scannable:
if not data:
raise ValueError("Cannot create table from empty list without a schema")
table = to_arrow(data)
return Scannable(
schema=table.schema, num_rows=table.num_rows, reader=table.to_reader
)
@to_scannable.register(dict)
def _from_dict(data: dict) -> Scannable:
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
@to_scannable.register(LanceModel)
def _from_lance_model(data: LanceModel) -> Scannable:
raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
def _from_iterable(data: Iterator) -> Scannable:
first_item = next(data, None)
if first_item is None:
raise ValueError("Cannot create table from empty iterator")
first = to_arrow(first_item)
schema = first.schema
def iter():
yield from first.to_batches()
for item in data:
batch = to_arrow(item)
if batch.schema != schema:
try:
batch = batch.cast(schema)
except pa.lib.ArrowInvalid:
raise ValueError(
f"Input iterator yielded a batch with schema that "
f"does not match the schema of other batches.\n"
f"Expected:\n{schema}\nGot:\n{batch.schema}"
)
yield from batch.to_batches()
reader = pa.RecordBatchReader.from_batches(schema, iter())
return to_scannable(reader)
_registered_modules: set[str] = set()
def _register_optional_converters():
"""Register converters for optional dependencies that are already imported."""
if "pandas" in sys.modules and "pandas" not in _registered_modules:
_registered_modules.add("pandas")
import pandas as pd
@to_arrow.register(pd.DataFrame)
def _arrow_from_pandas(data: pd.DataFrame) -> pa.Table:
table = pa.Table.from_pandas(data, preserve_index=False)
return table.replace_schema_metadata(None)
@to_scannable.register(pd.DataFrame)
def _from_pandas(data: pd.DataFrame) -> Scannable:
return to_scannable(_arrow_from_pandas(data))
if "polars" in sys.modules and "polars" not in _registered_modules:
_registered_modules.add("polars")
import polars as pl
@to_arrow.register(pl.DataFrame)
def _arrow_from_polars(data: pl.DataFrame) -> pa.Table:
return data.to_arrow()
@to_scannable.register(pl.DataFrame)
def _from_polars(data: pl.DataFrame) -> Scannable:
arrow = data.to_arrow()
return Scannable(
schema=arrow.schema, num_rows=len(data), reader=arrow.to_reader
)
@to_scannable.register(pl.LazyFrame)
def _from_polars_lazy(data: pl.LazyFrame) -> Scannable:
arrow = data.collect().to_arrow()
return Scannable(
schema=arrow.schema, num_rows=arrow.num_rows, reader=arrow.to_reader
)
if "datasets" in sys.modules and "datasets" not in _registered_modules:
_registered_modules.add("datasets")
from datasets import Dataset as HFDataset
from datasets import DatasetDict as HFDatasetDict
@to_scannable.register(HFDataset)
def _from_hf_dataset(data: HFDataset) -> Scannable:
table = data.data.table # Access underlying Arrow table
return Scannable(
schema=table.schema, num_rows=len(data), reader=table.to_reader
)
@to_scannable.register(HFDatasetDict)
def _from_hf_dataset_dict(data: HFDatasetDict) -> Scannable:
# HuggingFace DatasetDict: combine all splits with a 'split' column
schema = data[list(data.keys())[0]].features.arrow_schema
if "split" not in schema.names:
schema = schema.append(pa.field("split", pa.string()))
def gen():
for split_name, dataset in data.items():
for batch in dataset.data.to_batches():
split_arr = pa.array(
[split_name] * len(batch), type=pa.string()
)
yield pa.RecordBatch.from_arrays(
list(batch.columns) + [split_arr], schema=schema
)
total_rows = sum(len(dataset) for dataset in data.values())
return Scannable(
schema=schema,
num_rows=total_rows,
reader=lambda: pa.RecordBatchReader.from_batches(schema, gen()),
)
if "lance" in sys.modules and "lance" not in _registered_modules:
_registered_modules.add("lance")
import lance
@to_scannable.register(lance.LanceDataset)
def _from_lance(data: lance.LanceDataset) -> Scannable:
return Scannable(
schema=data.schema,
num_rows=data.count_rows(),
reader=lambda: data.scanner().to_reader(),
)
# Register on module load
_register_optional_converters()

View File

@@ -25,8 +25,6 @@ from typing import (
)
from urllib.parse import urlparse
from lancedb.scannable import _register_optional_converters, to_scannable
from . import __version__
from lancedb.arrow import peek_reader
from lancedb.background_loop import LOOP
@@ -906,9 +904,7 @@ class Table(ABC):
----------
field_names: str or list of str
The name(s) of the field to index.
If ``use_tantivy`` is False (default), only a single field name
(str) is supported. To index multiple fields, create a separate
FTS index for each field.
can be only str if use_tantivy=True for now.
replace: bool, default False
If True, replace the existing index if it exists. Note that this is
not yet an atomic operation; the index will be temporarily
@@ -2226,37 +2222,6 @@ class LanceTable(Table):
def uri(self) -> str:
return LOOP.run(self._table.uri())
def initial_storage_options(self) -> Optional[Dict[str, str]]:
"""Get the initial storage options that were passed in when opening this table.
For dynamically refreshed options (e.g., credential vending), use
:meth:`latest_storage_options`.
Warning: This is an internal API and the return value is subject to change.
Returns
-------
Optional[Dict[str, str]]
The storage options, or None if no storage options were configured.
"""
return LOOP.run(self._table.initial_storage_options())
def latest_storage_options(self) -> Optional[Dict[str, str]]:
"""Get the latest storage options, refreshing from provider if configured.
This method is useful for credential vending scenarios where storage options
may be refreshed dynamically. If no dynamic provider is configured, this
returns the initial static options.
Warning: This is an internal API and the return value is subject to change.
Returns
-------
Optional[Dict[str, str]]
The storage options, or None if no storage options were configured.
"""
return LOOP.run(self._table.latest_storage_options())
def create_scalar_index(
self,
column: str,
@@ -2302,11 +2267,7 @@ class LanceTable(Table):
):
if not use_tantivy:
if not isinstance(field_names, str):
raise ValueError(
"Native FTS indexes can only be created on a single field "
"at a time. To search over multiple text fields, create a "
"separate FTS index for each field."
)
raise ValueError("field_names must be a string when use_tantivy=False")
if tokenizer_name is None:
tokenizer_configs = {
@@ -3663,37 +3624,6 @@ class AsyncTable:
"""
return await self._inner.uri()
async def initial_storage_options(self) -> Optional[Dict[str, str]]:
"""Get the initial storage options that were passed in when opening this table.
For dynamically refreshed options (e.g., credential vending), use
:meth:`latest_storage_options`.
Warning: This is an internal API and the return value is subject to change.
Returns
-------
Optional[Dict[str, str]]
The storage options, or None if no storage options were configured.
"""
return await self._inner.initial_storage_options()
async def latest_storage_options(self) -> Optional[Dict[str, str]]:
"""Get the latest storage options, refreshing from provider if configured.
This method is useful for credential vending scenarios where storage options
may be refreshed dynamically. If no dynamic provider is configured, this
returns the initial static options.
Warning: This is an internal API and the return value is subject to change.
Returns
-------
Optional[Dict[str, str]]
The storage options, or None if no storage options were configured.
"""
return await self._inner.latest_storage_options()
async def add(
self,
data: DATA,
@@ -3729,31 +3659,18 @@ class AsyncTable:
on_bad_vectors = "error"
if fill_value is None:
fill_value = 0.0
data = _sanitize_data(
data,
schema,
metadata=schema.metadata,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
allow_subschema=True,
)
if isinstance(data, pa.Table):
data = data.to_reader()
# _santitize_data is an old code path, but we will use it until the
# new code path is ready.
if on_bad_vectors != "error" or (
schema.metadata is not None and b"embedding_functions" in schema.metadata
):
data = _sanitize_data(
data,
schema,
metadata=schema.metadata,
on_bad_vectors=on_bad_vectors,
fill_value=fill_value,
allow_subschema=True,
)
_register_optional_converters()
data = to_scannable(data)
try:
return await self._inner.add(data, mode or "append")
except RuntimeError as e:
if "Cast error" in str(e):
raise ValueError(e)
elif "Vector column contains NaN" in str(e):
raise ValueError(e)
else:
raise
return await self._inner.add(data, mode or "append")
def merge_insert(self, on: Union[str, Iterable[str]]) -> LanceMergeInsertBuilder:
"""

View File

@@ -419,22 +419,3 @@ def batch_to_tensor(batch: pa.RecordBatch):
"""
torch = attempt_import_or_raise("torch", "torch")
return torch.stack([torch.from_dlpack(col) for col in batch.columns])
def batch_to_tensor_rows(batch: pa.RecordBatch):
"""
Convert a PyArrow RecordBatch to a list of PyTorch Tensor, one per row
Each column is converted to a tensor (using zero-copy via DLPack)
and the columns are then stacked into a single tensor. The 2D tensor
is then converted to a list of tensors, one per row
Fails if torch or numpy is not installed.
Fails if a column's data type is not supported by PyTorch.
"""
torch = attempt_import_or_raise("torch", "torch")
numpy = attempt_import_or_raise("numpy", "numpy")
columns = [col.to_numpy(zero_copy_only=False) for col in batch.columns]
stacked = torch.tensor(numpy.column_stack(columns))
rows = list(stacked.unbind(dim=0))
return rows

View File

@@ -515,34 +515,3 @@ def test_openai_propagates_api_key(monkeypatch):
query = "greetings"
actual = table.search(query).limit(1).to_pydantic(Words)[0]
assert len(actual.text) > 0
@patch("time.sleep")
def test_openai_no_retry_on_401(mock_sleep):
"""
Test that OpenAI embedding function does not retry on 401 authentication
errors.
"""
from lancedb.embeddings.utils import retry_with_exponential_backoff
# Create a mock that raises an AuthenticationError
class MockAuthenticationError(Exception):
"""Mock OpenAI AuthenticationError"""
pass
MockAuthenticationError.__name__ = "AuthenticationError"
mock_func = MagicMock(side_effect=MockAuthenticationError("Invalid API key"))
# Wrap the function with retry logic
wrapped_func = retry_with_exponential_backoff(mock_func, max_retries=3)
# Should raise without retrying
with pytest.raises(MockAuthenticationError):
wrapped_func()
# Verify that the function was only called once (no retries)
assert mock_func.call_count == 1
# Verify that sleep was never called (no retries)
assert mock_sleep.call_count == 0

View File

@@ -517,36 +517,19 @@ def test_ollama_embedding(tmp_path):
@pytest.mark.skipif(
os.environ.get("VOYAGE_API_KEY") is None, reason="VOYAGE_API_KEY not set"
)
@pytest.mark.parametrize(
"model_name,expected_dims",
[
("voyage-3", 1024),
("voyage-4", 1024),
("voyage-4-lite", 1024),
("voyage-4-large", 1024),
],
)
def test_voyageai_embedding_function(model_name, expected_dims, tmp_path):
"""Integration test for VoyageAI text embedding models with real API calls."""
voyageai = get_registry().get("voyageai").create(name=model_name, max_retries=0)
def test_voyageai_embedding_function():
voyageai = get_registry().get("voyageai").create(name="voyage-3", max_retries=0)
class TextModel(LanceModel):
text: str = voyageai.SourceField()
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
df = pd.DataFrame({"text": ["hello world", "goodbye world"]})
db = lancedb.connect(tmp_path)
db = lancedb.connect("~/lancedb")
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
tbl.add(df)
assert len(tbl.to_pandas()["vector"][0]) == voyageai.ndims()
assert voyageai.ndims() == expected_dims, (
f"{model_name} should have {expected_dims} dimensions"
)
# Test search functionality
result = tbl.search("hello").limit(1).to_pandas()
assert result["text"][0] == "hello world"
@pytest.mark.slow

View File

@@ -163,7 +163,9 @@ async def test_explain_plan(table: AsyncTable):
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).explain_plan(True)
)
assert "Vector Search Plan" in plan
assert "KNNVectorDistance" in plan
assert "FTS Search Plan" in plan
assert "LanceRead" in plan

View File

@@ -438,15 +438,11 @@ def test_filter_with_splits(mem_db):
row_count = permutation_tbl.count_rows()
assert row_count == 67
# Verify the permutation table only contains row_id and split_id
assert set(permutation_tbl.schema.names) == {"row_id", "split_id"}
row_ids = permutation_tbl.search(None).to_arrow().to_pydict()["row_id"]
data = tbl.take_row_ids(row_ids).to_arrow().to_pydict()
data = permutation_tbl.search(None).to_arrow().to_pydict()
categories = data["category"]
# All categories should be A or B
assert all(cat in ("A", "B") for cat in categories)
assert all(cat in ["A", "B"] for cat in categories)
def test_filter_with_shuffle(mem_db):
@@ -664,20 +660,23 @@ def test_iter_basic(some_permutation: Permutation):
expected_batches = (950 + batch_size - 1) // batch_size # ceiling division
assert len(batches) == expected_batches
# Check that all batches are lists of dicts (default python format)
assert all(isinstance(batch, list) for batch in batches)
# Check that all batches are dicts (default python format)
assert all(isinstance(batch, dict) for batch in batches)
# Check that batches have the correct structure
for batch in batches:
assert "id" in batch[0]
assert "value" in batch[0]
assert "id" in batch
assert "value" in batch
assert isinstance(batch["id"], list)
assert isinstance(batch["value"], list)
# Check that all batches except the last have the correct size
for batch in batches[:-1]:
assert len(batch) == batch_size
assert len(batch["id"]) == batch_size
assert len(batch["value"]) == batch_size
# Last batch might be smaller
assert len(batches[-1]) <= batch_size
assert len(batches[-1]["id"]) <= batch_size
def test_iter_skip_last_batch(some_permutation: Permutation):
@@ -696,11 +695,11 @@ def test_iter_skip_last_batch(some_permutation: Permutation):
if 950 % batch_size != 0:
assert len(batches_without_skip) == num_full_batches + 1
# Last batch should be smaller
assert len(batches_without_skip[-1]) == 950 % batch_size
assert len(batches_without_skip[-1]["id"]) == 950 % batch_size
# All batches with skip_last_batch should be full size
for batch in batches_with_skip:
assert len(batch) == batch_size
assert len(batch["id"]) == batch_size
def test_iter_different_batch_sizes(some_permutation: Permutation):
@@ -717,12 +716,12 @@ def test_iter_different_batch_sizes(some_permutation: Permutation):
# Test with batch size equal to total rows
single_batch = list(some_permutation.iter(950, skip_last_batch=False))
assert len(single_batch) == 1
assert len(single_batch[0]) == 950
assert len(single_batch[0]["id"]) == 950
# Test with batch size larger than total rows
oversized_batch = list(some_permutation.iter(10000, skip_last_batch=False))
assert len(oversized_batch) == 1
assert len(oversized_batch[0]) == 950
assert len(oversized_batch[0]["id"]) == 950
def test_dunder_iter(some_permutation: Permutation):
@@ -735,13 +734,15 @@ def test_dunder_iter(some_permutation: Permutation):
# All batches should be full size
for batch in batches:
assert len(batch) == 100
assert len(batch["id"]) == 100
assert len(batch["value"]) == 100
some_permutation = some_permutation.with_batch_size(400)
batches = list(some_permutation)
assert len(batches) == 2 # floor(950 / 400) since skip_last_batch=True
for batch in batches:
assert len(batch) == 400
assert len(batch["id"]) == 400
assert len(batch["value"]) == 400
def test_iter_with_different_formats(some_permutation: Permutation):
@@ -756,7 +757,7 @@ def test_iter_with_different_formats(some_permutation: Permutation):
# Test with python format (default)
python_perm = some_permutation.with_format("python")
python_batches = list(python_perm.iter(batch_size, skip_last_batch=False))
assert all(isinstance(batch, list) for batch in python_batches)
assert all(isinstance(batch, dict) for batch in python_batches)
# Test with pandas format
pandas_perm = some_permutation.with_format("pandas")
@@ -775,8 +776,8 @@ def test_iter_with_column_selection(some_permutation: Permutation):
# Check that batches only contain the id column
for batch in batches:
assert "id" in batch[0]
assert "value" not in batch[0]
assert "id" in batch
assert "value" not in batch
def test_iter_with_column_rename(some_permutation: Permutation):
@@ -786,9 +787,9 @@ def test_iter_with_column_rename(some_permutation: Permutation):
# Check that batches have the renamed column
for batch in batches:
assert "id" in batch[0]
assert "data" in batch[0]
assert "value" not in batch[0]
assert "id" in batch
assert "data" in batch
assert "value" not in batch
def test_iter_with_limit_offset(some_permutation: Permutation):
@@ -807,14 +808,14 @@ def test_iter_with_limit_offset(some_permutation: Permutation):
assert len(limit_batches) == 5
no_skip = some_permutation.iter(101, skip_last_batch=False)
row_100 = next(no_skip)[100]["id"]
row_100 = next(no_skip)["id"][100]
# Test with both limit and offset
limited_perm = some_permutation.with_skip(100).with_take(300)
limited_batches = list(limited_perm.iter(100, skip_last_batch=False))
# Should have 3 batches (300 / 100)
assert len(limited_batches) == 3
assert limited_batches[0][0]["id"] == row_100
assert limited_batches[0]["id"][0] == row_100
def test_iter_empty_permutation(mem_db):
@@ -837,7 +838,7 @@ def test_iter_single_row(mem_db):
# With skip_last_batch=False, should get one batch
batches = list(perm.iter(10, skip_last_batch=False))
assert len(batches) == 1
assert len(batches[0]) == 1
assert len(batches[0]["id"]) == 1
# With skip_last_batch=True, should skip the single row (since it's < batch_size)
batches_skip = list(perm.iter(10, skip_last_batch=True))
@@ -855,7 +856,8 @@ def test_identity_permutation(mem_db):
batches = list(permutation.iter(10, skip_last_batch=False))
assert len(batches) == 1
assert len(batches[0]) == 10
assert len(batches[0]["id"]) == 10
assert len(batches[0]["value"]) == 10
permutation = permutation.remove_columns(["value"])
assert permutation.num_columns == 1
@@ -898,10 +900,10 @@ def test_transform_fn(mem_db):
py_result = list(permutation.with_format("python").iter(10, skip_last_batch=False))[
0
]
assert len(py_result) == 10
assert "id" in py_result[0]
assert "value" in py_result[0]
assert isinstance(py_result, list)
assert len(py_result) == 2
assert len(py_result["id"]) == 10
assert len(py_result["value"]) == 10
assert isinstance(py_result, dict)
try:
import torch
@@ -909,11 +911,9 @@ def test_transform_fn(mem_db):
torch_result = list(
permutation.with_format("torch").iter(10, skip_last_batch=False)
)[0]
assert isinstance(torch_result, list)
assert len(torch_result) == 10
assert isinstance(torch_result[0], torch.Tensor)
assert torch_result[0].shape == (2,)
assert torch_result[0].dtype == torch.int64
assert torch_result.shape == (2, 10)
assert torch_result.dtype == torch.int64
assert isinstance(torch_result, torch.Tensor)
except ImportError:
# Skip check if torch is not installed
pass
@@ -941,113 +941,3 @@ def test_custom_transform(mem_db):
batch = batches[0]
assert batch == pa.record_batch([range(10)], ["id"])
def test_getitems_basic(some_permutation: Permutation):
"""Test __getitems__ returns correct rows by offset."""
result = some_permutation.__getitems__([0, 1, 2])
assert isinstance(result, list)
assert "id" in result[0]
assert "value" in result[0]
assert len(result) == 3
def test_getitems_single_index(some_permutation: Permutation):
"""Test __getitems__ with a single index."""
result = some_permutation.__getitems__([0])
assert len(result) == 1
def test_getitems_preserves_order(some_permutation: Permutation):
"""Test __getitems__ returns rows in the requested order."""
# Get rows in forward order
forward = some_permutation.__getitems__([0, 1, 2, 3, 4])
# Get the same rows in reverse order
reverse = some_permutation.__getitems__([4, 3, 2, 1, 0])
assert [r["id"] for r in forward] == list(reversed([r["id"] for r in reverse]))
assert [r["value"] for r in forward] == list(
reversed([r["value"] for r in reverse])
)
def test_getitems_non_contiguous(some_permutation: Permutation):
"""Test __getitems__ with non-contiguous indices."""
result = some_permutation.__getitems__([0, 10, 50, 100, 500])
assert len(result) == 5
# Each id/value pair should match what we'd get individually
for i, offset in enumerate([0, 10, 50, 100, 500]):
single = some_permutation.__getitems__([offset])
assert result[i]["id"] == single[0]["id"]
assert result[i]["value"] == single[0]["value"]
def test_getitems_with_column_selection(some_permutation: Permutation):
"""Test __getitems__ respects column selection."""
id_only = some_permutation.select_columns(["id"])
result = id_only.__getitems__([0, 1, 2])
assert "id" in result[0]
assert "value" not in result[0]
assert len(result) == 3
def test_getitems_with_column_rename(some_permutation: Permutation):
"""Test __getitems__ respects column renames."""
renamed = some_permutation.rename_column("value", "data")
result = renamed.__getitems__([0, 1])
assert "data" in result[0]
assert "value" not in result[0]
assert len(result) == 2
def test_getitems_with_format(some_permutation: Permutation):
"""Test __getitems__ applies the transform function."""
arrow_perm = some_permutation.with_format("arrow")
result = arrow_perm.__getitems__([0, 1, 2])
assert isinstance(result, pa.RecordBatch)
assert result.num_rows == 3
def test_getitems_with_custom_transform(some_permutation: Permutation):
"""Test __getitems__ with a custom transform."""
def transform(batch: pa.RecordBatch) -> list:
return batch.column("id").to_pylist()
custom = some_permutation.with_transform(transform)
result = custom.__getitems__([0, 1, 2])
assert isinstance(result, list)
assert len(result) == 3
def test_getitems_identity_permutation(mem_db):
"""Test __getitems__ on an identity permutation."""
tbl = mem_db.create_table(
"test_table", pa.table({"id": range(10), "value": range(10)})
)
perm = Permutation.identity(tbl)
result = perm.__getitems__([0, 5, 9])
assert [r["id"] for r in result] == [0, 5, 9]
assert [r["value"] for r in result] == [0, 5, 9]
def test_getitems_with_limit_offset(some_permutation: Permutation):
"""Test __getitems__ on a permutation with skip/take applied."""
limited = some_permutation.with_skip(100).with_take(200)
# Should be able to access offsets within the limited range
result = limited.__getitems__([0, 1, 199])
assert len(result) == 3
# The first item of the limited permutation should match offset 100 of original
full_result = some_permutation.__getitems__([100])
limited_result = limited.__getitems__([0])
assert limited_result[0]["id"] == full_result[0]["id"]
def test_getitems_invalid_offset(some_permutation: Permutation):
"""Test __getitems__ with an out-of-range offset raises an error."""
with pytest.raises(Exception):
some_permutation.__getitems__([999999])

View File

@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
import json
import sys
from datetime import date, datetime
from typing import List, Optional, Tuple
@@ -19,6 +20,10 @@ from pydantic import BaseModel
from pydantic import Field
@pytest.mark.skipif(
sys.version_info < (3, 9),
reason="using native type alias requires python3.9 or higher",
)
def test_pydantic_to_arrow():
class StructModel(pydantic.BaseModel):
a: str
@@ -78,6 +83,10 @@ def test_pydantic_to_arrow():
assert schema == expect_schema
@pytest.mark.skipif(
sys.version_info < (3, 10),
reason="using | type syntax requires python3.10 or higher",
)
def test_optional_types_py310():
class TestModel(pydantic.BaseModel):
a: str | None
@@ -96,233 +105,10 @@ def test_optional_types_py310():
assert schema == expect_schema
def test_optional_structs():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
split: SplitInfo | None = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"split",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
),
]
)
assert schema == expect_schema
def test_optional_struct_list_py310():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo] | None = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
False,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: Optional[list[SplitInfo]] = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_items():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[Optional[SplitInfo]]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
False,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_container_and_items():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: Optional[list[Optional[SplitInfo]]] = None
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
True,
),
]
)
assert schema == expect_schema
def test_nested_struct_list_optional_items_pep604():
class SplitInfo(pydantic.BaseModel):
start_frame: int
end_frame: int
class TestModel(pydantic.BaseModel):
id: str
splits: list[SplitInfo | None]
schema = pydantic_to_schema(TestModel)
expect_schema = pa.schema(
[
pa.field("id", pa.utf8(), False),
pa.field(
"splits",
pa.list_(
pa.field(
"item",
pa.struct(
[
pa.field("start_frame", pa.int64(), False),
pa.field("end_frame", pa.int64(), False),
]
),
True,
)
),
False,
),
]
)
assert schema == expect_schema
@pytest.mark.skipif(
sys.version_info > (3, 8),
reason="using native type alias requires python3.9 or higher",
)
def test_pydantic_to_arrow_py38():
class StructModel(pydantic.BaseModel):
a: str

View File

@@ -1499,30 +1499,3 @@ def test_search_empty_table(mem_db):
# Search on empty table should return empty results, not crash
results = table.search([1.0, 2.0]).limit(5).to_list()
assert results == []
def test_fast_search(tmp_path):
db = lancedb.connect(tmp_path)
# Generate data matching the async test style
vectors = pa.FixedShapeTensorArray.from_numpy_ndarray(
np.random.rand(256, 32)
).storage
table = db.create_table("test", pa.table({"vector": vectors}))
# FIX: Pass arguments directly instead of using 'config=IvfPq(...)'
table.create_index(vector_column_name="vector", num_partitions=1, num_sub_vectors=1)
# Add data to ensure table has enough segments/rows
table.add(pa.table({"vector": vectors}))
q = [1.0] * 32
# 1. Normal Search -> Should include "LanceScan" (Brute Force / Scan)
plan = table.search(q).explain_plan(True)
assert "LanceScan" in plan
# 2. Fast Search -> Should NOT include "LanceScan" (Uses Index)
plan = table.search(q).fast_search().explain_plan(True)
assert "LanceScan" not in plan

View File

@@ -8,7 +8,7 @@ import http.server
import json
import threading
import time
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock
import uuid
from packaging.version import Version
@@ -601,6 +601,7 @@ def test_head():
def test_query_sync_minimal():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"refine_factor": None,
@@ -684,6 +685,7 @@ def test_query_sync_maximal():
def test_query_sync_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -713,6 +715,7 @@ def test_query_sync_nprobes():
def test_query_sync_no_max_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
@@ -835,6 +838,7 @@ def test_query_sync_hybrid():
else:
# Vector query
assert body == {
"distance_type": "l2",
"k": 42,
"prefilter": True,
"refine_factor": None,
@@ -1199,22 +1203,3 @@ async def test_header_provider_overrides_static_headers():
extra_headers={"X-API-Key": "static-key", "X-Extra": "extra-value"},
) as db:
await db.table_names()
@pytest.mark.parametrize("exception", [KeyboardInterrupt, SystemExit, GeneratorExit])
def test_background_loop_cancellation(exception):
"""Test that BackgroundEventLoop.run() cancels the future on interrupt."""
from lancedb.background_loop import BackgroundEventLoop
mock_future = MagicMock()
mock_future.result.side_effect = exception()
with (
patch.object(BackgroundEventLoop, "__init__", return_value=None),
patch("asyncio.run_coroutine_threadsafe", return_value=mock_future),
):
loop = BackgroundEventLoop()
loop.loop = MagicMock()
with pytest.raises(exception):
loop.run(None)
mock_future.cancel.assert_called_once()

View File

@@ -531,78 +531,6 @@ def test_empty_result_reranker():
)
def test_empty_hybrid_result_reranker():
"""Test that hybrid search with empty results after filtering doesn't crash.
Regression test for https://github.com/lancedb/lancedb/issues/2425
"""
from lancedb.query import LanceHybridQueryBuilder
# Simulate empty vector and FTS results with the expected schema
vector_schema = pa.schema(
[
("text", pa.string()),
("vector", pa.list_(pa.float32(), 4)),
("_rowid", pa.uint64()),
("_distance", pa.float32()),
]
)
fts_schema = pa.schema(
[
("text", pa.string()),
("vector", pa.list_(pa.float32(), 4)),
("_rowid", pa.uint64()),
("_score", pa.float32()),
]
)
empty_vector = pa.table(
{
"text": pa.array([], type=pa.string()),
"vector": pa.array([], type=pa.list_(pa.float32(), 4)),
"_rowid": pa.array([], type=pa.uint64()),
"_distance": pa.array([], type=pa.float32()),
},
schema=vector_schema,
)
empty_fts = pa.table(
{
"text": pa.array([], type=pa.string()),
"vector": pa.array([], type=pa.list_(pa.float32(), 4)),
"_rowid": pa.array([], type=pa.uint64()),
"_score": pa.array([], type=pa.float32()),
},
schema=fts_schema,
)
for reranker in [LinearCombinationReranker(), RRFReranker()]:
result = LanceHybridQueryBuilder._combine_hybrid_results(
fts_results=empty_fts,
vector_results=empty_vector,
norm="score",
fts_query="nonexistent query",
reranker=reranker,
limit=10,
with_row_ids=False,
)
assert len(result) == 0
assert "_relevance_score" in result.column_names
assert "_rowid" not in result.column_names
# Also test with with_row_ids=True
result = LanceHybridQueryBuilder._combine_hybrid_results(
fts_results=empty_fts,
vector_results=empty_vector,
norm="score",
fts_query="nonexistent query",
reranker=LinearCombinationReranker(),
limit=10,
with_row_ids=True,
)
assert len(result) == 0
assert "_relevance_score" in result.column_names
assert "_rowid" in result.column_names
@pytest.mark.parametrize("use_tantivy", [True, False])
def test_cross_encoder_reranker_return_all(tmp_path, use_tantivy):
pytest.importorskip("sentence_transformers")

View File

@@ -810,7 +810,7 @@ def test_create_index_name_and_train_parameters(
)
def test_create_with_nans(mem_db: DBConnection):
def test_add_with_nans(mem_db: DBConnection):
# by default we raise an error on bad input vectors
bad_data = [
{"vector": [np.nan], "item": "bar", "price": 20.0},
@@ -854,57 +854,6 @@ def test_create_with_nans(mem_db: DBConnection):
assert np.allclose(v, np.array([0.0, 0.0]))
def test_add_with_nans(mem_db: DBConnection):
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), 2), nullable=True),
pa.field("item", pa.string(), nullable=True),
pa.field("price", pa.float64(), nullable=False),
],
)
table = mem_db.create_table("test", schema=schema)
# by default we raise an error on bad input vectors
bad_data = [
{"vector": [np.nan], "item": "bar", "price": 20.0},
{"vector": [5], "item": "bar", "price": 20.0},
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
{"vector": [np.nan, 5.0], "item": "bar", "price": 20.0},
]
for row in bad_data:
with pytest.raises(ValueError):
table.add(
data=[row],
)
table.add(
[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [2.1, 4.1], "item": "foo", "price": 9.0},
{"vector": [np.nan], "item": "bar", "price": 20.0},
{"vector": [5], "item": "bar", "price": 20.0},
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
],
on_bad_vectors="drop",
)
assert len(table) == 2
table.delete("true")
# We can fill bad input with some value
table.add(
data=[
{"vector": [3.1, 4.1], "item": "foo", "price": 10.0},
{"vector": [np.nan], "item": "bar", "price": 20.0},
{"vector": [np.nan, np.nan], "item": "bar", "price": 20.0},
],
on_bad_vectors="fill",
fill_value=0.0,
)
assert len(table) == 3
arrow_tbl = table.search().where("item == 'bar'").to_arrow()
v = arrow_tbl["vector"].to_pylist()[0]
assert np.allclose(v, np.array([0.0, 0.0]))
def test_restore(mem_db: DBConnection):
table = mem_db.create_table(
"my_table",
@@ -1931,13 +1880,8 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
],
)
version = await table.version()
assert version == 2
# By removing a manifest file, we make the data files we just inserted unverified
version_name = 18446744073709551615 - (version - 1)
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
os.remove(path)
stats = await table.optimize(delete_unverified=False)
assert stats.prune.old_versions_removed == 0
stats = await table.optimize(

View File

@@ -4,7 +4,6 @@
import pyarrow as pa
import pytest
from lancedb.util import tbl_to_tensor
from lancedb.permutation import Permutation
torch = pytest.importorskip("torch")
@@ -17,26 +16,3 @@ def test_table_dataloader(mem_db):
for batch in dataloader:
assert batch.size(0) == 1
assert batch.size(1) == 10
def test_permutation_dataloader(mem_db):
table = mem_db.create_table("test_table", pa.table({"a": range(1000)}))
permutation = Permutation.identity(table)
dataloader = torch.utils.data.DataLoader(permutation, batch_size=10, shuffle=True)
for batch in dataloader:
assert batch["a"].size(0) == 10
permutation = permutation.with_format("torch")
dataloader = torch.utils.data.DataLoader(permutation, batch_size=10, shuffle=True)
for batch in dataloader:
assert batch.size(0) == 10
assert batch.size(1) == 1
permutation = permutation.with_format("torch_col")
dataloader = torch.utils.data.DataLoader(
permutation, collate_fn=lambda x: x, batch_size=10, shuffle=True
)
for batch in dataloader:
assert batch.size(0) == 1
assert batch.size(1) == 10

View File

@@ -292,14 +292,18 @@ class TestModel(lancedb.pydantic.LanceModel):
lambda: pa.table({"a": [1], "b": [2]}),
lambda: pa.table({"a": [1], "b": [2]}).to_reader(),
lambda: iter(pa.table({"a": [1], "b": [2]}).to_batches()),
lambda: lance.write_dataset(
pa.table({"a": [1], "b": [2]}),
"memory://test",
lambda: (
lance.write_dataset(
pa.table({"a": [1], "b": [2]}),
"memory://test",
)
),
lambda: (
lance.write_dataset(
pa.table({"a": [1], "b": [2]}),
"memory://test",
).scanner()
),
lambda: lance.write_dataset(
pa.table({"a": [1], "b": [2]}),
"memory://test",
).scanner(),
lambda: pd.DataFrame({"a": [1], "b": [2]}),
lambda: pl.DataFrame({"a": [1], "b": [2]}),
lambda: pl.LazyFrame({"a": [1], "b": [2]}),

View File

@@ -1,108 +0,0 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
"""Unit tests for VoyageAI embedding function.
These tests verify model registration and configuration without requiring API calls.
"""
import pytest
from unittest.mock import MagicMock, patch
from lancedb.embeddings import get_registry
@pytest.fixture(autouse=True)
def reset_voyageai_client():
"""Reset VoyageAI client before and after each test to avoid state pollution."""
from lancedb.embeddings.voyageai import VoyageAIEmbeddingFunction
VoyageAIEmbeddingFunction.client = None
yield
VoyageAIEmbeddingFunction.client = None
class TestVoyageAIModelRegistration:
"""Tests for VoyageAI model registration and configuration."""
@pytest.fixture
def mock_voyageai_client(self):
"""Mock VoyageAI client to avoid API calls."""
with patch.dict("os.environ", {"VOYAGE_API_KEY": "test-key"}):
with patch("lancedb.embeddings.voyageai.attempt_import_or_raise") as mock:
mock_client = MagicMock()
mock_voyageai = MagicMock()
mock_voyageai.Client.return_value = mock_client
mock.return_value = mock_voyageai
yield mock_client
def test_voyageai_registered(self):
"""Test that VoyageAI is registered in the embedding function registry."""
registry = get_registry()
assert registry.get("voyageai") is not None
@pytest.mark.parametrize(
"model_name,expected_dims",
[
# Voyage-4 series (all 1024 dims)
("voyage-4", 1024),
("voyage-4-lite", 1024),
("voyage-4-large", 1024),
# Voyage-3 series
("voyage-3", 1024),
("voyage-3-lite", 512),
# Domain-specific models
("voyage-finance-2", 1024),
("voyage-multilingual-2", 1024),
("voyage-law-2", 1024),
("voyage-code-2", 1536),
# Multimodal
("voyage-multimodal-3", 1024),
],
)
def test_model_dimensions(self, model_name, expected_dims, mock_voyageai_client):
"""Test that each model returns the correct dimensions."""
registry = get_registry()
func = registry.get("voyageai").create(name=model_name)
assert func.ndims() == expected_dims, (
f"Model {model_name} should have {expected_dims} dimensions"
)
def test_unsupported_model_raises_error(self, mock_voyageai_client):
"""Test that unsupported models raise ValueError."""
registry = get_registry()
func = registry.get("voyageai").create(name="unsupported-model")
with pytest.raises(ValueError, match="not supported"):
func.ndims()
@pytest.mark.parametrize(
"model_name",
[
"voyage-4",
"voyage-4-lite",
"voyage-4-large",
],
)
def test_voyage4_models_are_text_models(self, model_name, mock_voyageai_client):
"""Test that voyage-4 models are classified as text models (not multimodal)."""
registry = get_registry()
func = registry.get("voyageai").create(name=model_name)
assert not func._is_multimodal_model(model_name), (
f"{model_name} should be a text model, not multimodal"
)
def test_voyage4_models_in_text_embedding_list(self, mock_voyageai_client):
"""Test that voyage-4 models are in the text_embedding_models list."""
registry = get_registry()
func = registry.get("voyageai").create(name="voyage-4")
assert "voyage-4" in func.text_embedding_models
assert "voyage-4-lite" in func.text_embedding_models
assert "voyage-4-large" in func.text_embedding_models
def test_voyage4_models_not_in_multimodal_list(self, mock_voyageai_client):
"""Test that voyage-4 models are NOT in the multimodal_embedding_models list."""
registry = get_registry()
func = registry.get("voyageai").create(name="voyage-4")
assert "voyage-4" not in func.multimodal_embedding_models
assert "voyage-4-lite" not in func.multimodal_embedding_models
assert "voyage-4-large" not in func.multimodal_embedding_models

View File

@@ -10,7 +10,8 @@ use arrow::{
use futures::stream::StreamExt;
use lancedb::arrow::SendableRecordBatchStream;
use pyo3::{
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -35,11 +36,8 @@ impl RecordBatchStream {
#[pymethods]
impl RecordBatchStream {
#[getter]
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
(*self.schema)
.clone()
.into_pyarrow(py)
.map(|obj| obj.unbind())
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
(*self.schema).clone().into_pyarrow(py)
}
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
@@ -55,12 +53,7 @@ impl RecordBatchStream {
.next()
.await
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
Python::attach(|py| {
inner_next
.infer_error()?
.to_pyarrow(py)
.map(|obj| obj.unbind())
})
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
})
}
}

View File

@@ -12,7 +12,7 @@ use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods,
types::{PyDict, PyDictMethods},
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
@@ -114,15 +114,14 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
let mode = Self::parse_create_mode_str(mode)?;
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
let mut builder = inner.create_table(name, batches).mode(mode);
@@ -153,7 +152,7 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -188,7 +187,7 @@ impl Connection {
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
storage_options_provider: Option<PyObject>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -308,7 +307,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_namespaces(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("namespaces", response.namespaces)?;
dict.set_item("page_token", response.page_token)?;
@@ -346,7 +345,7 @@ impl Connection {
..Default::default()
};
let response = inner.create_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -387,7 +386,7 @@ impl Connection {
..Default::default()
};
let response = inner.drop_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
dict.set_item("transaction_id", response.transaction_id)?;
@@ -414,7 +413,7 @@ impl Connection {
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("properties", response.properties)?;
Ok(dict.unbind())
@@ -444,7 +443,7 @@ impl Connection {
..Default::default()
};
let response = inner.list_tables(request).await.infer_error()?;
Python::attach(|py| -> PyResult<Py<PyDict>> {
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
let dict = PyDict::new(py);
dict.set_item("tables", response.tables)?;
dict.set_item("page_token", response.page_token)?;

Some files were not shown because too many files have changed in this diff Show More