mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-21 22:10:40 +00:00
Compare commits
1 Commits
dependabot
...
v0.22.4-be
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
35da57b44c |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.29.1-beta.0"
|
||||
current_version = "0.22.4-beta.1"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
@@ -72,9 +72,3 @@ search = "\nversion = \"{current_version}\""
|
||||
filename = "nodejs/Cargo.toml"
|
||||
replace = "\nversion = \"{new_version}\""
|
||||
search = "\nversion = \"{current_version}\""
|
||||
|
||||
# Java documentation
|
||||
[[tool.bumpversion.files]]
|
||||
filename = "docs/src/java/java.md"
|
||||
replace = "<version>{new_version}</version>"
|
||||
search = "<version>{current_version}</version>"
|
||||
|
||||
@@ -19,7 +19,7 @@ rustflags = [
|
||||
"-Wclippy::string_add_assign",
|
||||
"-Wclippy::string_add",
|
||||
"-Wclippy::string_lit_as_bytes",
|
||||
"-Wclippy::implicit_clone",
|
||||
"-Wclippy::string_to_string",
|
||||
"-Wclippy::use_self",
|
||||
"-Dclippy::cargo",
|
||||
"-Dclippy::dbg_macro",
|
||||
|
||||
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
2
.github/ISSUE_TEMPLATE/documentation.yml
vendored
@@ -18,6 +18,6 @@ body:
|
||||
label: Link
|
||||
description: >
|
||||
Provide a link to the existing documentation, if applicable.
|
||||
placeholder: ex. https://docs.lancedb.com/tables/...
|
||||
placeholder: ex. https://lancedb.github.io/lancedb/guides/tables/...
|
||||
validations:
|
||||
required: false
|
||||
|
||||
23
.github/dependabot.yml
vendored
23
.github/dependabot.yml
vendored
@@ -1,23 +0,0 @@
|
||||
version: 2
|
||||
|
||||
# Scope: the root Cargo workspace, which produces the Rust binaries we
|
||||
# ship to users (the Node.js and Python native extensions). The
|
||||
# `rust/lancedb` library crate shares the same lockfile; its consumers
|
||||
# pick their own dependency versions, but bumping transitive deps here
|
||||
# keeps the binaries we ship current.
|
||||
updates:
|
||||
- package-ecosystem: cargo
|
||||
directory: /
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 10
|
||||
# Only update Cargo.lock, never widen/raise the version requirements in
|
||||
# Cargo.toml. The goal is keeping the lockfile (and the binaries we ship)
|
||||
# current on security fixes, not forcing our library's consumers onto
|
||||
# newer minimum versions.
|
||||
versioning-strategy: lockfile-only
|
||||
groups:
|
||||
rust-minor-patch:
|
||||
update-types:
|
||||
- minor
|
||||
- patch
|
||||
12
.github/workflows/build_linux_wheel/action.yml
vendored
12
.github/workflows/build_linux_wheel/action.yml
vendored
@@ -3,7 +3,7 @@ name: build-linux-wheel
|
||||
description: "Build a manylinux wheel for lance"
|
||||
inputs:
|
||||
python-minor-version:
|
||||
description: "10, 11, 12, 13"
|
||||
description: "8, 9, 10, 11, 12"
|
||||
required: true
|
||||
args:
|
||||
description: "--release"
|
||||
@@ -23,18 +23,15 @@ runs:
|
||||
steps:
|
||||
- name: CONFIRM ARM BUILD
|
||||
shell: bash
|
||||
env:
|
||||
ARM_BUILD: ${{ inputs.arm-build }}
|
||||
run: |
|
||||
echo "ARM BUILD: $ARM_BUILD"
|
||||
echo "ARM BUILD: ${{ inputs.arm-build }}"
|
||||
- name: Build x86_64 Manylinux wheel
|
||||
if: ${{ inputs.arm-build == 'false' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
target: x86_64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
@@ -47,10 +44,9 @@ runs:
|
||||
if: ${{ inputs.arm-build == 'true' }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
maturin-version: "1.12.4"
|
||||
command: build
|
||||
working-directory: python
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
target: aarch64-unknown-linux-gnu
|
||||
manylinux: ${{ inputs.manylinux }}
|
||||
args: ${{ inputs.args }}
|
||||
|
||||
5
.github/workflows/build_mac_wheel/action.yml
vendored
5
.github/workflows/build_mac_wheel/action.yml
vendored
@@ -3,7 +3,7 @@ name: build_wheel
|
||||
description: "Build a lance wheel"
|
||||
inputs:
|
||||
python-minor-version:
|
||||
description: "10, 11, 12, 13"
|
||||
description: "8, 9, 10, 11"
|
||||
required: true
|
||||
args:
|
||||
description: "--release"
|
||||
@@ -20,8 +20,7 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
# TODO: pass through interpreter
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
|
||||
@@ -3,7 +3,7 @@ name: build_wheel
|
||||
description: "Build a lance wheel"
|
||||
inputs:
|
||||
python-minor-version:
|
||||
description: "10, 11, 12, 13, 14"
|
||||
description: "8, 9, 10, 11"
|
||||
required: true
|
||||
args:
|
||||
description: "--release"
|
||||
@@ -25,9 +25,8 @@ runs:
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: build
|
||||
maturin-version: "1.12.4"
|
||||
args: ${{ inputs.args }}
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL='https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/'"
|
||||
docker-options: "-e PIP_EXTRA_INDEX_URL=https://pypi.fury.io/lancedb/"
|
||||
working-directory: python
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
|
||||
2
.github/workflows/cargo-publish.yml
vendored
2
.github/workflows/cargo-publish.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
179
.github/workflows/codex-fix-ci.yml
vendored
179
.github/workflows/codex-fix-ci.yml
vendored
@@ -1,179 +0,0 @@
|
||||
name: Codex Fix CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workflow_run_url:
|
||||
description: "Failing CI workflow run URL (e.g., https://github.com/lancedb/lancedb/actions/runs/12345678)"
|
||||
required: true
|
||||
type: string
|
||||
branch:
|
||||
description: "Branch to fix (e.g., main, release/v2.0, or feature-branch)"
|
||||
required: true
|
||||
type: string
|
||||
guidelines:
|
||||
description: "Additional guidelines for the fix (optional)"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
actions: read
|
||||
|
||||
jobs:
|
||||
fix-ci:
|
||||
runs-on: warp-ubuntu-latest-x64-4x
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
steps:
|
||||
- name: Show inputs
|
||||
run: |
|
||||
echo "workflow_run_url = ${{ inputs.workflow_run_url }}"
|
||||
echo "branch = ${{ inputs.branch }}"
|
||||
echo "guidelines = ${{ inputs.guidelines }}"
|
||||
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.branch }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: true
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
# pnpm 11 (used by the nodejs install step below) requires
|
||||
# Node >= 22.13; use 24 since 22 hits EOL in October.
|
||||
node-version: 24
|
||||
|
||||
- name: Install Codex CLI
|
||||
run: npm install -g @openai/codex
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: stable
|
||||
components: clippy, rustfmt
|
||||
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y protobuf-compiler libssl-dev
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
pip install maturin ruff pytest pyarrow pandas polars
|
||||
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: '11'
|
||||
cache: maven
|
||||
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Install Node.js dependencies for TypeScript bindings
|
||||
run: |
|
||||
cd nodejs
|
||||
pnpm install --frozen-lockfile
|
||||
|
||||
- name: Configure git user
|
||||
run: |
|
||||
git config user.name "lancedb automation"
|
||||
git config user.email "robot@lancedb.com"
|
||||
|
||||
- name: Run Codex to fix CI failure
|
||||
env:
|
||||
WORKFLOW_RUN_URL: ${{ inputs.workflow_run_url }}
|
||||
BRANCH: ${{ inputs.branch }}
|
||||
GUIDELINES: ${{ inputs.guidelines }}
|
||||
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Your task is to fix a CI failure.
|
||||
|
||||
Input parameters:
|
||||
- Failing workflow run URL: ${WORKFLOW_RUN_URL}
|
||||
- Branch to fix: ${BRANCH}
|
||||
- Additional guidelines: ${GUIDELINES:-"None provided"}
|
||||
|
||||
Follow these steps exactly:
|
||||
|
||||
1. Extract the run ID from the workflow URL. The URL format is https://github.com/lancedb/lancedb/actions/runs/<run_id>.
|
||||
|
||||
2. Use "gh run view <run_id> --json jobs,conclusion,name" to get information about the failed run.
|
||||
|
||||
3. Identify which jobs failed. For each failed job, use "gh run view <run_id> --job <job_id> --log-failed" to get the failure logs.
|
||||
|
||||
4. Analyze the failure logs to understand what went wrong. Common failures include:
|
||||
- Compilation errors
|
||||
- Test failures
|
||||
- Clippy warnings treated as errors
|
||||
- Formatting issues
|
||||
- Dependency issues
|
||||
|
||||
5. Based on the analysis, fix the issues in the codebase:
|
||||
- For compilation errors: Fix the code that doesn't compile
|
||||
- For test failures: Fix the failing tests or the code they test
|
||||
- For clippy warnings: Apply the suggested fixes
|
||||
- For formatting issues: Run "cargo fmt --all"
|
||||
- For other issues: Apply appropriate fixes
|
||||
|
||||
6. After making fixes, verify them locally:
|
||||
- Run "cargo fmt --all" to ensure formatting is correct
|
||||
- Run "cargo clippy --workspace --tests --all-features -- -D warnings" to check for issues
|
||||
- Run ONLY the specific failing tests to confirm they pass now:
|
||||
- For Rust test failures: Run the specific test with "cargo test -p <crate> <test_name>"
|
||||
- For Python test failures: Build with "cd python && maturin develop" then run "pytest <specific_test_file>::<test_name>"
|
||||
- For Java test failures: Run "cd java && mvn test -Dtest=<TestClass>#<testMethod>"
|
||||
- For TypeScript test failures: Run "cd nodejs && pnpm build && pnpm test -- --testNamePattern='<test_name>'"
|
||||
- Do NOT run the full test suite - only run the tests that were failing
|
||||
|
||||
7. If the additional guidelines are provided, follow them as well.
|
||||
|
||||
8. Inspect "git status --short" and "git diff" to review your changes.
|
||||
|
||||
9. Create a fix branch: "git checkout -b codex/fix-ci-<run_id>".
|
||||
|
||||
10. Stage all changes with "git add -A" and commit with message "fix: resolve CI failures from run <run_id>".
|
||||
|
||||
11. Push the branch: "git push origin codex/fix-ci-<run_id>". If the remote branch exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/codex/fix-ci-<run_id>" then push. Do NOT use "git push --force" or "git push -f".
|
||||
|
||||
12. Create a pull request targeting "${BRANCH}":
|
||||
- Title: "ci: <short summary describing the fix>" (e.g., "ci: fix clippy warnings in lancedb" or "ci: resolve test flakiness in vector search")
|
||||
- First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'PREOF' > /tmp/pr-body.md). The body should include:
|
||||
- Link to the failing workflow run
|
||||
- Summary of what failed
|
||||
- Description of the fixes applied
|
||||
- Then run "gh pr create --base ${BRANCH} --body-file /tmp/pr-body.md".
|
||||
|
||||
13. Display the new PR URL, "git status --short", and a summary of what was fixed.
|
||||
|
||||
Constraints:
|
||||
- Use bash commands for all operations.
|
||||
- Do not merge the PR.
|
||||
- Do not modify GitHub workflow files unless they are the cause of the failure.
|
||||
- If any command fails, diagnose and attempt to fix the issue instead of aborting immediately.
|
||||
- If you cannot fix the issue automatically, create the PR anyway with a clear explanation of what you tried and what remains to be fixed.
|
||||
- env "GH_TOKEN" is available, use "gh" tools for GitHub-related operations.
|
||||
EOF
|
||||
|
||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
@@ -75,28 +75,20 @@ jobs:
|
||||
VERSION="${VERSION#v}"
|
||||
BRANCH_NAME="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
# Use "chore" for beta/rc versions, "feat" for stable releases
|
||||
if [[ "${VERSION}" == *beta* ]] || [[ "${VERSION}" == *rc* ]]; then
|
||||
COMMIT_TYPE="chore"
|
||||
else
|
||||
COMMIT_TYPE="feat"
|
||||
fi
|
||||
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
|
||||
|
||||
Follow these steps exactly:
|
||||
1. Use script "ci/set_lance_version.py" to update Lance Rust dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
|
||||
2. Update the Java lance-core dependency version in "java/pom.xml": change the "<lance-core.version>...</lance-core.version>" property to "${VERSION}".
|
||||
3. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
|
||||
4. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
5. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
6. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
7. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
|
||||
8. Push the branch to origin. If the remote branch already exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/${BRANCH_NAME}" then push with "git push origin ${BRANCH_NAME}". Do NOT use "git push --force" or "git push -f".
|
||||
9. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
10. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
|
||||
11. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
|
||||
2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
|
||||
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "chore: update lance dependency to v${VERSION}".
|
||||
7. Push the branch to origin. If the branch already exists, force-push your changes.
|
||||
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
9. Create a pull request targeting "main" with title "chore: update lance dependency to v${VERSION}". In the body, summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}).
|
||||
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
|
||||
Constraints:
|
||||
- Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
|
||||
@@ -106,30 +98,3 @@ jobs:
|
||||
|
||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
|
||||
- name: Trigger sophon dependency update
|
||||
env:
|
||||
TAG: ${{ inputs.tag }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
VERSION="${TAG#refs/tags/}"
|
||||
VERSION="${VERSION#v}"
|
||||
LANCEDB_BRANCH="codex/update-lance-${VERSION//[^a-zA-Z0-9]/-}"
|
||||
|
||||
echo "Triggering sophon workflow with:"
|
||||
echo " lance_ref: ${TAG#refs/tags/}"
|
||||
echo " lancedb_ref: ${LANCEDB_BRANCH}"
|
||||
|
||||
gh workflow run codex-bump-lancedb-lance.yml \
|
||||
--repo lancedb/sophon \
|
||||
-f lance_ref="${TAG#refs/tags/}" \
|
||||
-f lancedb_ref="${LANCEDB_BRANCH}"
|
||||
|
||||
- name: Show latest sophon workflow run
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
echo "Latest sophon workflow run:"
|
||||
gh run list --repo lancedb/sophon --workflow codex-bump-lancedb-lance.yml --limit 1 --json databaseId,url,displayTitle
|
||||
|
||||
9
.github/workflows/dev.yml
vendored
9
.github/workflows/dev.yml
vendored
@@ -8,9 +8,6 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
labeler:
|
||||
permissions:
|
||||
@@ -18,7 +15,7 @@ jobs:
|
||||
name: Label PR
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: srvaroa/labeler@v1
|
||||
- uses: srvaroa/labeler@master
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
commitlint:
|
||||
@@ -27,7 +24,7 @@ jobs:
|
||||
name: Verify PR title / description conforms to semantic-release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/setup-node@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: "18"
|
||||
# These rules are disabled because Github will always ensure there
|
||||
@@ -50,7 +47,7 @@ jobs:
|
||||
|
||||
${{ github.event.pull_request.body }}
|
||||
- if: failure()
|
||||
uses: actions/github-script@v7
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
const message = `**ACTION NEEDED**
|
||||
|
||||
12
.github/workflows/docs.yml
vendored
12
.github/workflows/docs.yml
vendored
@@ -24,7 +24,7 @@ env:
|
||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||
# CI builds are faster with incremental disabled.
|
||||
CARGO_INCREMENTAL: "0"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
# Single deploy job since we're just deploying
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
rustup update && rustup default
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
cache: "pip"
|
||||
@@ -50,10 +50,10 @@ jobs:
|
||||
- name: Build Python
|
||||
working-directory: python
|
||||
run: |
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .
|
||||
python -m pip install --extra-index-url https://pypi.fury.io/lancedb/ -r ../docs/requirements.txt
|
||||
- name: Set up node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
run: |
|
||||
PYTHONPATH=. mkdocs build
|
||||
- name: Setup Pages
|
||||
uses: actions/configure-pages@v5
|
||||
uses: actions/configure-pages@v2
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
|
||||
113
.github/workflows/java-publish.yml
vendored
113
.github/workflows/java-publish.yml
vendored
@@ -1,38 +1,76 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Build and publish Java packages
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
release:
|
||||
types: [released]
|
||||
pull_request:
|
||||
paths:
|
||||
- .github/workflows/java-publish.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
name: Build and Publish
|
||||
runs-on: ubuntu-24.04
|
||||
macos-arm64:
|
||||
name: Build on MacOS Arm64
|
||||
runs-on: macos-14
|
||||
timeout-minutes: 45
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install protobuf
|
||||
- name: Build release
|
||||
run: |
|
||||
cargo build --release
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: liblancedb_jni_darwin_aarch64.zip
|
||||
path: target/release/liblancedb_jni.dylib
|
||||
retention-days: 1
|
||||
if-no-files-found: error
|
||||
linux-arm64:
|
||||
name: Build on Linux Arm64
|
||||
runs-on: warp-ubuntu-2204-arm64-8x
|
||||
timeout-minutes: 45
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
cache-workspaces: "./java/core/lancedb-jni"
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
rustflags: "-C debuginfo=1"
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt -y -qq update
|
||||
sudo apt install -y protobuf-compiler libssl-dev pkg-config
|
||||
- name: Build release
|
||||
run: |
|
||||
cargo build --release
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: liblancedb_jni_linux_aarch64.zip
|
||||
path: target/release/liblancedb_jni.so
|
||||
retention-days: 1
|
||||
if-no-files-found: error
|
||||
linux-x86:
|
||||
runs-on: warp-ubuntu-2204-x64-8x
|
||||
timeout-minutes: 30
|
||||
needs: [macos-arm64, linux-arm64]
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Set up Java 8
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
@@ -43,32 +81,41 @@ jobs:
|
||||
server-username: SONATYPE_USER
|
||||
server-password: SONATYPE_TOKEN
|
||||
gpg-private-key: ${{ secrets.GPG_PRIVATE_KEY }}
|
||||
gpg-passphrase: MAVEN_GPG_PASSPHRASE
|
||||
- name: Set git config
|
||||
gpg-passphrase: ${{ secrets.GPG_PASSPHRASE }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
git config --global user.email "dev+gha@lancedb.com"
|
||||
git config --global user.name "LanceDB Github Runner"
|
||||
sudo apt -y -qq update
|
||||
sudo apt install -y protobuf-compiler libssl-dev pkg-config
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@v4
|
||||
- name: Copy native libs
|
||||
run: |
|
||||
mkdir -p ./core/target/classes/nativelib/darwin-aarch64 ./core/target/classes/nativelib/linux-aarch64
|
||||
cp ../liblancedb_jni_darwin_aarch64.zip/liblancedb_jni.dylib ./core/target/classes/nativelib/darwin-aarch64/liblancedb_jni.dylib
|
||||
cp ../liblancedb_jni_linux_aarch64.zip/liblancedb_jni.so ./core/target/classes/nativelib/linux-aarch64/liblancedb_jni.so
|
||||
- name: Dry run
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
./mvnw --batch-mode -DskipTests package -pl lancedb-core -am
|
||||
- name: Publish
|
||||
if: startsWith(github.ref, 'refs/tags/v')
|
||||
mvn --batch-mode -DskipTests -Drust.release.build=true package
|
||||
- name: Set github
|
||||
run: |
|
||||
git config --global user.email "LanceDB Github Runner"
|
||||
git config --global user.name "dev+gha@lancedb.com"
|
||||
- name: Publish with Java 8
|
||||
if: github.event_name == 'release'
|
||||
run: |
|
||||
echo "use-agent" >> ~/.gnupg/gpg.conf
|
||||
echo "pinentry-mode loopback" >> ~/.gnupg/gpg.conf
|
||||
export GPG_TTY=$(tty)
|
||||
./mvnw --batch-mode -DskipTests -DpushChanges=false deploy -pl lancedb-core -am -P deploy-to-ossrh
|
||||
mvn --batch-mode -DskipTests -Drust.release.build=true -DpushChanges=false -Dgpg.passphrase=${{ secrets.GPG_PASSPHRASE }} deploy -P deploy-to-ossrh
|
||||
env:
|
||||
SONATYPE_USER: ${{ secrets.SONATYPE_USER }}
|
||||
SONATYPE_TOKEN: ${{ secrets.SONATYPE_TOKEN }}
|
||||
MAVEN_GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
|
||||
|
||||
report-failure:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [publish]
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
needs: [linux-arm64, linux-x86, macos-arm64]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
121
.github/workflows/java.yml
vendored
121
.github/workflows/java.yml
vendored
@@ -1,49 +1,118 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: Build Java LanceDB Core
|
||||
|
||||
name: Build and Run Java JNI Tests
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- java/**
|
||||
- .github/workflows/java.yml
|
||||
pull_request:
|
||||
paths:
|
||||
- java/**
|
||||
- rust/**
|
||||
- .github/workflows/java.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
# This env var is used by Swatinem/rust-cache@v2 for the cache
|
||||
# key, so we set it to make sure it is always consistent.
|
||||
CARGO_TERM_COLOR: always
|
||||
# Disable full debug symbol generation to speed up CI build and keep memory down
|
||||
# "1" means line tables only, which is useful for panic tracebacks.
|
||||
RUSTFLAGS: "-C debuginfo=1"
|
||||
RUST_BACKTRACE: "1"
|
||||
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
|
||||
# CI builds are faster with incremental disabled.
|
||||
CARGO_INCREMENTAL: "0"
|
||||
CARGO_BUILD_JOBS: "1"
|
||||
jobs:
|
||||
build-java:
|
||||
runs-on: ubuntu-24.04
|
||||
name: Build
|
||||
linux-build-java-11:
|
||||
runs-on: ubuntu-22.04
|
||||
name: ubuntu-22.04 + Java 11
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up Java 17
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: java/core/lancedb-jni
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --check
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Install Java 11
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: 11
|
||||
cache: "maven"
|
||||
- name: Java Style Check
|
||||
run: mvn checkstyle:check
|
||||
# Disable because of issues in lancedb rust core code
|
||||
# - name: Rust Clippy
|
||||
# working-directory: java/core/lancedb-jni
|
||||
# run: cargo clippy --all-targets -- -D warnings
|
||||
- name: Running tests with Java 11
|
||||
run: mvn clean test
|
||||
linux-build-java-17:
|
||||
runs-on: ubuntu-22.04
|
||||
name: ubuntu-22.04 + Java 17
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ./java
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: java/core/lancedb-jni
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt
|
||||
- name: Run cargo fmt
|
||||
run: cargo fmt --check
|
||||
working-directory: ./java/core/lancedb-jni
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
- name: Install Java 17
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: 17
|
||||
cache: "maven"
|
||||
- run: echo "JAVA_17=$JAVA_HOME" >> $GITHUB_ENV
|
||||
- name: Java Style Check
|
||||
run: ./mvnw checkstyle:check
|
||||
- name: Build and install
|
||||
run: ./mvnw clean install
|
||||
run: mvn checkstyle:check
|
||||
# Disable because of issues in lancedb rust core code
|
||||
# - name: Rust Clippy
|
||||
# working-directory: java/core/lancedb-jni
|
||||
# run: cargo clippy --all-targets -- -D warnings
|
||||
- name: Running tests with Java 17
|
||||
run: |
|
||||
export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS \
|
||||
-XX:+IgnoreUnrecognizedVMOptions \
|
||||
--add-opens=java.base/java.lang=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.io=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.net=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.nio=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
|
||||
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
|
||||
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
|
||||
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
|
||||
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
|
||||
-Djdk.reflect.useDirectMethodHandle=false \
|
||||
-Dio.netty.tryReflectionSetAccessible=true"
|
||||
JAVA_HOME=$JAVA_17 mvn clean test
|
||||
|
||||
2
.github/workflows/lance-release-timer.yml
vendored
2
.github/workflows/lance-release-timer.yml
vendored
@@ -59,4 +59,4 @@ jobs:
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,url,displayTitle
|
||||
gh run list --workflow codex-update-lance-dependency.yml --limit 1 --json databaseId,htmlUrl,displayTitle
|
||||
|
||||
4
.github/workflows/license-header-check.yml
vendored
4
.github/workflows/license-header-check.yml
vendored
@@ -10,10 +10,6 @@ on:
|
||||
- nodejs/**
|
||||
- java/**
|
||||
- .github/workflows/license-header-check.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
check-licenses:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
127
.github/workflows/nodejs.yml
vendored
127
.github/workflows/nodejs.yml
vendored
@@ -7,17 +7,10 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- nodejs/**
|
||||
- rust/**
|
||||
- docs/src/js/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -42,17 +35,11 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: pnpm/action-setup@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October. The library itself still supports Node >= 18
|
||||
# (see test matrix below).
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: actions-rust-lang/setup-rust-toolchain@v1
|
||||
with:
|
||||
components: rustfmt, clippy
|
||||
@@ -67,13 +54,11 @@ jobs:
|
||||
run: cargo clippy --profile ci --all --all-features -- -D warnings
|
||||
- name: Lint Typescript
|
||||
run: |
|
||||
pnpm install --frozen-lockfile
|
||||
pnpm lint-ci
|
||||
npm ci
|
||||
npm run lint-ci
|
||||
- name: Lint examples
|
||||
working-directory: nodejs/examples
|
||||
# The `@lancedb/lancedb` dep points at file:../dist; pnpm errors if
|
||||
# that dir is missing, so create an empty one for lint-only runs.
|
||||
run: mkdir -p ../dist && pnpm install --frozen-lockfile && pnpm lint-ci
|
||||
run: npm ci && npm run lint-ci
|
||||
linux:
|
||||
name: Linux (NodeJS ${{ matrix.node-version }})
|
||||
timeout-minutes: 30
|
||||
@@ -90,77 +75,56 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: pnpm/action-setup@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js 24 for build
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October. Build/install runs on Node 24; tests run on the
|
||||
# matrix version below using direct jest invocation.
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
pnpm install --frozen-lockfile
|
||||
# No `--` separator: pnpm forwards it literally, which would
|
||||
# make napi-rs treat `--profile ci` as a cargo passthrough arg.
|
||||
pnpm build:debug --profile ci
|
||||
pnpm tsc
|
||||
- name: Setup examples
|
||||
working-directory: nodejs/examples
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Check docs
|
||||
run: |
|
||||
# We run this as part of the job because the binary needs to be built
|
||||
# first to export the types of the native code.
|
||||
set -e
|
||||
# `pnpm docs` would invoke pnpm's built-in `docs` command, not
|
||||
# the script — use `pnpm run docs`.
|
||||
pnpm run docs
|
||||
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'pnpm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
fi
|
||||
- uses: actions/setup-node@v4
|
||||
name: Setup Node.js ${{ matrix.node-version }} for test
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
npm ci
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Setup localstack
|
||||
working-directory: .
|
||||
run: docker compose up --detach --wait
|
||||
- name: Test
|
||||
env:
|
||||
S3_TEST: "1"
|
||||
# Newer @smithy/core uses dynamic ESM imports.
|
||||
NODE_OPTIONS: "--experimental-vm-modules"
|
||||
# Invoke jest directly because pnpm 11 itself requires Node 22+
|
||||
# while the matrix tests on older Node versions.
|
||||
run: npx jest --verbose
|
||||
run: npm run test
|
||||
- name: Setup examples
|
||||
working-directory: nodejs/examples
|
||||
run: npm ci
|
||||
- name: Test examples
|
||||
working-directory: ./
|
||||
env:
|
||||
OPENAI_API_KEY: test
|
||||
OPENAI_BASE_URL: http://0.0.0.0:8000
|
||||
NODE_OPTIONS: "--experimental-vm-modules"
|
||||
run: |
|
||||
python ci/mock_openai.py &
|
||||
cd nodejs/examples
|
||||
npx jest --testEnvironment jest-environment-node-single-context --verbose
|
||||
npm test
|
||||
- name: Check docs
|
||||
run: |
|
||||
# We run this as part of the job because the binary needs to be built
|
||||
# first to export the types of the native code.
|
||||
set -e
|
||||
npm ci
|
||||
npm run docs
|
||||
if ! git diff --exit-code -- ../ ':(exclude)Cargo.lock'; then
|
||||
echo "Docs need to be updated"
|
||||
echo "Run 'npm run docs', fix any warnings, and commit the changes."
|
||||
exit 1
|
||||
fi
|
||||
macos:
|
||||
timeout-minutes: 30
|
||||
# macos-15 ships a newer linker; the older macos-14 linker fails to insert
|
||||
# branch islands when the debug cdylib's __text section exceeds the 128 MB
|
||||
# AArch64 B/BL branch range.
|
||||
runs-on: "macos-15"
|
||||
runs-on: "macos-14"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -170,28 +134,21 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- uses: pnpm/action-setup@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
version: 11.1.1
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: 'pnpm'
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
node-version: 20
|
||||
cache: 'npm'
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install protobuf
|
||||
npm install -g @napi-rs/cli
|
||||
- name: Build
|
||||
run: |
|
||||
pnpm install --frozen-lockfile
|
||||
# No `--` separator: pnpm forwards it literally, which would
|
||||
# make napi-rs treat `--profile ci` as a cargo passthrough arg.
|
||||
pnpm build:debug --profile ci
|
||||
pnpm tsc
|
||||
npm ci
|
||||
npm run build:debug -- --profile ci
|
||||
npm run tsc
|
||||
- name: Test
|
||||
run: |
|
||||
pnpm test
|
||||
npm run test
|
||||
|
||||
126
.github/workflows/npm-publish.yml
vendored
126
.github/workflows/npm-publish.yml
vendored
@@ -19,7 +19,6 @@ on:
|
||||
paths:
|
||||
- .github/workflows/npm-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -98,6 +97,12 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
settings:
|
||||
- target: x86_64-apple-darwin
|
||||
host: macos-latest
|
||||
features: ","
|
||||
pre_build: |-
|
||||
brew install protobuf
|
||||
rustup target add x86_64-apple-darwin
|
||||
- target: aarch64-apple-darwin
|
||||
host: macos-latest
|
||||
features: fp16kernels
|
||||
@@ -125,22 +130,20 @@ jobs:
|
||||
pre_build: |-
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
# The base image (manylinux2014-cross) sets TARGET_CC to the old
|
||||
# GCC 4.8 cross-compiler. aws-lc-sys checks TARGET_CC before CC,
|
||||
# so it picks up GCC even though the napi-rs image sets CC=clang.
|
||||
# Override to use the image's clang-18 which supports -fuse-ld=lld.
|
||||
export TARGET_CC=clang TARGET_CXX=clang++
|
||||
apt-get install -y protobuf-compiler pkg-config
|
||||
- target: x86_64-unknown-linux-musl
|
||||
# This one seems to need some extra memory
|
||||
host: ubuntu-2404-8x-x64
|
||||
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
||||
features: fp16kernels
|
||||
pre_build: |-
|
||||
set -e &&
|
||||
sudo apt-get update &&
|
||||
sudo apt-get install -y protobuf-compiler pkg-config &&
|
||||
rustup target add x86_64-unknown-linux-musl &&
|
||||
export EXTRA_ARGS="-x"
|
||||
apk add protobuf-dev curl &&
|
||||
ln -s /usr/lib/gcc/x86_64-alpine-linux-musl/14.2.0/crtbeginS.o /usr/lib/crtbeginS.o &&
|
||||
ln -s /usr/lib/libgcc_s.so /usr/lib/libgcc.so &&
|
||||
CC=gcc &&
|
||||
CXX=g++
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
host: ubuntu-2404-8x-x64
|
||||
# https://github.com/napi-rs/napi-rs/blob/main/debian-aarch64.Dockerfile
|
||||
@@ -150,20 +153,21 @@ jobs:
|
||||
set -e &&
|
||||
apt-get update &&
|
||||
apt-get install -y protobuf-compiler pkg-config &&
|
||||
export TARGET_CC=clang TARGET_CXX=clang++ &&
|
||||
# The manylinux2014 sysroot has glibc 2.17 headers which lack
|
||||
# AT_HWCAP2 (added in Linux 3.17). Define it for aws-lc-sys.
|
||||
export CFLAGS="$CFLAGS -DAT_HWCAP2=26" &&
|
||||
# https://github.com/aws/aws-lc-rs/issues/737#issuecomment-2725918627
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc/aarch64-unknown-linux-gnu/4.8.5/crtbeginS.o /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/crtbeginS.o &&
|
||||
ln -s /usr/aarch64-unknown-linux-gnu/lib/gcc /usr/aarch64-unknown-linux-gnu/aarch64-unknown-linux-gnu/sysroot/usr/lib/gcc &&
|
||||
rustup target add aarch64-unknown-linux-gnu
|
||||
- target: aarch64-unknown-linux-musl
|
||||
host: ubuntu-2404-8x-x64
|
||||
# https://github.com/napi-rs/napi-rs/blob/main/alpine.Dockerfile
|
||||
docker: ghcr.io/napi-rs/napi-rs/nodejs-rust:lts-alpine
|
||||
features: ","
|
||||
pre_build: |-
|
||||
set -e &&
|
||||
sudo apt-get update &&
|
||||
sudo apt-get install -y protobuf-compiler &&
|
||||
apk add protobuf-dev &&
|
||||
rustup target add aarch64-unknown-linux-musl &&
|
||||
export EXTRA_ARGS="-x"
|
||||
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc &&
|
||||
export CXX_aarch64_unknown_linux_musl=aarch64-linux-musl-g++
|
||||
name: build - ${{ matrix.settings.target }}
|
||||
runs-on: ${{ matrix.settings.host }}
|
||||
defaults:
|
||||
@@ -171,18 +175,13 @@ jobs:
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
if: ${{ !matrix.settings.docker }}
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- name: Install
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
if: ${{ !matrix.settings.docker }}
|
||||
@@ -199,18 +198,12 @@ jobs:
|
||||
.cargo-cache
|
||||
target/
|
||||
key: nodejs-${{ matrix.settings.target }}-cargo-${{ matrix.settings.host }}
|
||||
- name: Setup toolchain
|
||||
run: ${{ matrix.settings.setup }}
|
||||
if: ${{ matrix.settings.setup }}
|
||||
shell: bash
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Install Zig
|
||||
uses: mlugg/setup-zig@v2
|
||||
if: ${{ contains(matrix.settings.target, 'musl') }}
|
||||
with:
|
||||
version: 0.14.1
|
||||
- name: Install cargo-zigbuild
|
||||
uses: taiki-e/install-action@v2
|
||||
if: ${{ contains(matrix.settings.target, 'musl') }}
|
||||
with:
|
||||
tool: cargo-zigbuild
|
||||
run: npm ci
|
||||
- name: Build in docker
|
||||
uses: addnab/docker-run-action@v3
|
||||
if: ${{ matrix.settings.docker }}
|
||||
@@ -223,24 +216,24 @@ jobs:
|
||||
run: |
|
||||
set -e
|
||||
${{ matrix.settings.pre_build }}
|
||||
npx napi build --platform --release \
|
||||
npx napi build --platform --release --no-const-enum \
|
||||
--features ${{ matrix.settings.features }} \
|
||||
--target ${{ matrix.settings.target }} \
|
||||
--dts ../lancedb/native.d.ts \
|
||||
--js ../lancedb/native.js \
|
||||
--strip \
|
||||
--output-dir dist/
|
||||
dist/
|
||||
- name: Build
|
||||
run: |
|
||||
${{ matrix.settings.pre_build }}
|
||||
npx napi build --platform --release \
|
||||
npx napi build --platform --release --no-const-enum \
|
||||
--features ${{ matrix.settings.features }} \
|
||||
--target ${{ matrix.settings.target }} \
|
||||
--dts ../lancedb/native.d.ts \
|
||||
--js ../lancedb/native.js \
|
||||
--strip \
|
||||
$EXTRA_ARGS \
|
||||
--output-dir dist/
|
||||
dist/
|
||||
if: ${{ !matrix.settings.docker }}
|
||||
shell: bash
|
||||
- name: Upload artifact
|
||||
@@ -253,7 +246,7 @@ jobs:
|
||||
# one to do the upload.
|
||||
- name: Make generic artifacts
|
||||
if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
|
||||
run: pnpm tsc
|
||||
run: npm run tsc
|
||||
- name: Upload Generic Artifacts
|
||||
if: ${{ matrix.settings.target == 'aarch64-apple-darwin' }}
|
||||
uses: actions/upload-artifact@v4
|
||||
@@ -278,7 +271,7 @@ jobs:
|
||||
- target: x86_64-unknown-linux-gnu
|
||||
host: ubuntu-latest
|
||||
- target: aarch64-unknown-linux-gnu
|
||||
host: ubuntu-2404-8x-arm64
|
||||
host: buildjet-16vcpu-ubuntu-2204-arm
|
||||
node:
|
||||
- '20'
|
||||
runs-on: ${{ matrix.settings.host }}
|
||||
@@ -288,24 +281,14 @@ jobs:
|
||||
working-directory: nodejs
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup Node.js 24 for install
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
# pnpm 11 requires Node >= 22.13; use 24 since 22 hits EOL
|
||||
# in October.
|
||||
node-version: 24
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
- name: Setup Node.js ${{ matrix.node }} for test
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: ${{ matrix.node }}
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
@@ -326,9 +309,7 @@ jobs:
|
||||
- name: Move built files
|
||||
run: cp dist/native.d.ts dist/native.js dist/*.node lancedb/
|
||||
- name: Test bindings
|
||||
# Invoke jest directly because pnpm 11 itself requires Node 22+
|
||||
# while the matrix tests on older Node versions.
|
||||
run: npx jest --verbose
|
||||
run: npm test
|
||||
publish:
|
||||
name: Publish
|
||||
runs-on: ubuntu-latest
|
||||
@@ -340,19 +321,15 @@ jobs:
|
||||
- test-lancedb
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Setup pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
with:
|
||||
version: 11.1.1
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 24
|
||||
cache: pnpm
|
||||
cache-dependency-path: nodejs/pnpm-lock.yaml
|
||||
node-version: 20
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
run: npm ci
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: nodejs-dist
|
||||
@@ -372,20 +349,19 @@ jobs:
|
||||
- name: Display structure of downloaded files
|
||||
run: find dist && find nodejs-artifacts
|
||||
- name: Move artifacts
|
||||
run: pnpm exec napi artifacts -d nodejs-artifacts
|
||||
run: npx napi artifacts -d nodejs-artifacts
|
||||
- name: List packages
|
||||
run: find npm
|
||||
- name: Publish
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
|
||||
run: |
|
||||
npm config set provenance true
|
||||
ARGS="--access public"
|
||||
if [[ $DRY_RUN == "true" ]]; then
|
||||
ARGS="$ARGS --dry-run"
|
||||
fi
|
||||
VERSION=$(node -p "require('./package.json').version")
|
||||
if [[ $VERSION == *-* ]]; then
|
||||
if [[ $GITHUB_REF =~ refs/tags/v(.*)-beta.* ]]; then
|
||||
ARGS="$ARGS --tag preview"
|
||||
fi
|
||||
npm publish $ARGS
|
||||
@@ -393,7 +369,7 @@ jobs:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-lancedb, test-lancedb, publish]
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
40
.github/workflows/pypi-publish.yml
vendored
40
.github/workflows/pypi-publish.yml
vendored
@@ -9,21 +9,14 @@ on:
|
||||
paths:
|
||||
- .github/workflows/pypi-publish.yml
|
||||
- Cargo.toml # Change in dependency frequently breaks builds
|
||||
- Cargo.lock
|
||||
|
||||
env:
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
name: Python ${{ matrix.config.platform }} manylinux${{ matrix.config.manylinux }}
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
@@ -51,28 +44,28 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: 3.8
|
||||
- uses: ./.github/workflows/build_linux_wheel
|
||||
with:
|
||||
python-minor-version: 10
|
||||
python-minor-version: 8
|
||||
args: "--release --strip ${{ matrix.config.extra_args }}"
|
||||
arm-build: ${{ matrix.config.platform == 'aarch64' }}
|
||||
manylinux: ${{ matrix.config.manylinux }}
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
mac:
|
||||
timeout-minutes: 90
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: ${{ matrix.config.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- target: x86_64-apple-darwin
|
||||
runner: macos-13
|
||||
- target: aarch64-apple-darwin
|
||||
runner: warp-macos-14-arm64-6x
|
||||
env:
|
||||
@@ -83,22 +76,20 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: 3.12
|
||||
- uses: ./.github/workflows/build_mac_wheel
|
||||
with:
|
||||
python-minor-version: 10
|
||||
python-minor-version: 8
|
||||
args: "--release --strip --target ${{ matrix.config.target }} --features fp16kernels"
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
windows:
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
id-token: write
|
||||
contents: read
|
||||
runs-on: windows-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -106,17 +97,18 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: 3.12
|
||||
- uses: ./.github/workflows/build_windows_wheel
|
||||
with:
|
||||
python-minor-version: 10
|
||||
python-minor-version: 8
|
||||
args: "--release --strip"
|
||||
vcpkg_token: ${{ secrets.VCPKG_GITHUB_PACKAGES }}
|
||||
- uses: ./.github/workflows/upload_wheel
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
with:
|
||||
pypi_token: ${{ secrets.LANCEDB_PYPI_API_TOKEN }}
|
||||
fury_token: ${{ secrets.FURY_TOKEN }}
|
||||
gh-release:
|
||||
if: startsWith(github.ref, 'refs/tags/python-v')
|
||||
@@ -191,7 +183,7 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/python-v')
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
|
||||
67
.github/workflows/python.yml
vendored
67
.github/workflows/python.yml
vendored
@@ -7,18 +7,8 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- python/**
|
||||
- rust/**
|
||||
- .github/workflows/python.yml
|
||||
- .github/workflows/build_linux_wheel/**
|
||||
- .github/workflows/build_mac_wheel/**
|
||||
- .github/workflows/build_windows_wheel/**
|
||||
- .github/workflows/run_tests/**
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
@@ -28,7 +18,7 @@ env:
|
||||
# Color output for pytest is off by default.
|
||||
PYTEST_ADDOPTS: "--color=yes"
|
||||
FORCE_COLOR: "1"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lance-format/ https://pypi.fury.io/lancedb/"
|
||||
PIP_EXTRA_INDEX_URL: "https://pypi.fury.io/lancedb/"
|
||||
RUST_BACKTRACE: "1"
|
||||
|
||||
jobs:
|
||||
@@ -46,9 +36,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: "3.12"
|
||||
- name: Install ruff
|
||||
run: |
|
||||
pip install ruff==0.9.9
|
||||
@@ -59,8 +49,8 @@ jobs:
|
||||
|
||||
type-check:
|
||||
name: "Type Check"
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-2404-8x-x64
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-22.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -71,9 +61,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: "3.12"
|
||||
- name: Install protobuf compiler
|
||||
run: |
|
||||
sudo apt update
|
||||
@@ -88,8 +78,8 @@ jobs:
|
||||
|
||||
doctest:
|
||||
name: "Doctest"
|
||||
timeout-minutes: 60
|
||||
runs-on: ubuntu-2404-8x-x64
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -100,9 +90,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: "3.12"
|
||||
cache: "pip"
|
||||
- name: Install protobuf
|
||||
run: |
|
||||
@@ -110,7 +100,8 @@ jobs:
|
||||
sudo apt install -y protobuf-compiler
|
||||
- name: Install
|
||||
run: |
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests,dev,embeddings]
|
||||
pip install tantivy
|
||||
pip install mlx
|
||||
- name: Doctest
|
||||
run: pytest --doctest-modules python/lancedb
|
||||
@@ -119,7 +110,7 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
python-minor-version: ["10", "13"]
|
||||
python-minor-version: ["9", "12"]
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
@@ -135,7 +126,7 @@ jobs:
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.${{ matrix.python-minor-version }}
|
||||
- uses: ./.github/workflows/build_linux_wheel
|
||||
@@ -152,9 +143,16 @@ jobs:
|
||||
- name: Delete wheels
|
||||
run: rm -rf target/wheels
|
||||
platform:
|
||||
name: "Mac"
|
||||
name: "Mac: ${{ matrix.config.name }}"
|
||||
timeout-minutes: 30
|
||||
runs-on: macos-14
|
||||
strategy:
|
||||
matrix:
|
||||
config:
|
||||
- name: x86
|
||||
runner: macos-13
|
||||
- name: Arm
|
||||
runner: macos-14
|
||||
runs-on: "${{ matrix.config.runner }}"
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -165,9 +163,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: "3.12"
|
||||
- uses: ./.github/workflows/build_mac_wheel
|
||||
with:
|
||||
args: --profile ci
|
||||
@@ -194,9 +192,9 @@ jobs:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.13"
|
||||
python-version: "3.12"
|
||||
- uses: ./.github/workflows/build_windows_wheel
|
||||
with:
|
||||
args: --profile ci
|
||||
@@ -205,7 +203,7 @@ jobs:
|
||||
- name: Delete wheels
|
||||
run: rm -rf target/wheels
|
||||
pydantic1x:
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 30
|
||||
runs-on: "ubuntu-24.04"
|
||||
defaults:
|
||||
run:
|
||||
@@ -221,13 +219,14 @@ jobs:
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: 3.9
|
||||
- name: Install lancedb
|
||||
run: |
|
||||
pip install "pydantic<2"
|
||||
pip install pyarrow==16
|
||||
pip install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install --extra-index-url https://pypi.fury.io/lancedb/ -e .[tests]
|
||||
pip install tantivy
|
||||
- name: Run tests
|
||||
run: pytest -m "not slow and not s3_test" -x -v --durations=30 python/tests
|
||||
|
||||
2
.github/workflows/run_tests/action.yml
vendored
2
.github/workflows/run_tests/action.yml
vendored
@@ -15,7 +15,7 @@ runs:
|
||||
- name: Install lancedb
|
||||
shell: bash
|
||||
run: |
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lance-format/ --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
pip3 install --extra-index-url https://pypi.fury.io/lancedb/ $(ls target/wheels/lancedb-*.whl)[tests,dev]
|
||||
- name: Setup localstack for integration tests
|
||||
if: ${{ inputs.integration == 'true' }}
|
||||
shell: bash
|
||||
|
||||
71
.github/workflows/rust.yml
vendored
71
.github/workflows/rust.yml
vendored
@@ -7,17 +7,9 @@ on:
|
||||
pull_request:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- Cargo.lock
|
||||
- rust-toolchain.toml
|
||||
- deny.toml
|
||||
- rust/**
|
||||
- nodejs/Cargo.toml
|
||||
- python/Cargo.toml
|
||||
- .github/workflows/rust.yml
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -56,19 +48,6 @@ jobs:
|
||||
run: cargo fmt --all -- --check
|
||||
- name: Run clippy
|
||||
run: cargo clippy --profile ci --workspace --tests --all-features -- -D warnings
|
||||
- name: Run clippy (without remote feature)
|
||||
run: cargo clippy --profile ci --workspace --tests -- -D warnings
|
||||
|
||||
deny:
|
||||
# Supply-chain checks: advisories, licenses, banned crates, and source
|
||||
# restrictions. Configuration lives in `deny.toml` at the workspace root.
|
||||
timeout-minutes: 10
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: EmbarkStudios/cargo-deny-action@v2
|
||||
with:
|
||||
command: check advisories bans licenses sources
|
||||
|
||||
build-no-lock:
|
||||
runs-on: ubuntu-24.04
|
||||
@@ -119,9 +98,7 @@ jobs:
|
||||
lfs: true
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y protobuf-compiler libssl-dev
|
||||
run: sudo apt install -y protobuf-compiler libssl-dev
|
||||
- uses: rui314/setup-mold@v1
|
||||
- name: Make Swap
|
||||
run: |
|
||||
@@ -145,7 +122,7 @@ jobs:
|
||||
timeout-minutes: 30
|
||||
strategy:
|
||||
matrix:
|
||||
mac-runner: ["macos-14", "macos-15"]
|
||||
mac-runner: ["macos-13", "macos-14"]
|
||||
runs-on: "${{ matrix.mac-runner }}"
|
||||
defaults:
|
||||
run:
|
||||
@@ -190,13 +167,13 @@ jobs:
|
||||
- name: Build
|
||||
run: |
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo build --profile ci --features aws,remote --tests --locked --target ${{ matrix.target }}
|
||||
cargo build --profile ci --features remote --tests --locked --target ${{ matrix.target }}
|
||||
- name: Run tests
|
||||
# Can only run tests when target matches host
|
||||
if: ${{ matrix.target == 'x86_64-pc-windows-msvc' }}
|
||||
run: |
|
||||
$env:VCPKG_ROOT = $env:VCPKG_INSTALLATION_ROOT
|
||||
cargo test --profile ci --features aws,remote --locked
|
||||
cargo test --profile ci --features remote --locked
|
||||
|
||||
msrv:
|
||||
# Check the minimum supported Rust version
|
||||
@@ -204,7 +181,7 @@ jobs:
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
matrix:
|
||||
msrv: ["1.91.0"] # This should match up with rust-version in Cargo.toml
|
||||
msrv: ["1.78.0"] # This should match up with rust-version in Cargo.toml
|
||||
env:
|
||||
# Need up-to-date compilers for kernels
|
||||
CC: clang-18
|
||||
@@ -225,36 +202,14 @@ jobs:
|
||||
- name: Downgrade dependencies
|
||||
# These packages have newer requirements for MSRV
|
||||
run: |
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.77.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.68.0
|
||||
cargo update -p aws-config --precise 1.6.0
|
||||
cargo update -p aws-sdk-kms --precise 1.63.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.79.0
|
||||
cargo update -p aws-sdk-sso --precise 1.62.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.63.0
|
||||
cargo update -p aws-sdk-sts --precise 1.63.0
|
||||
# aws-runtime/sigv4/credential-types/types and the aws-smithy-*
|
||||
# crates bumped their MSRV to 1.91.1 in late 2026; pin to the last
|
||||
# 1.91.0-compatible versions. The order matters — each downgrade
|
||||
# only succeeds once everything that still pins it at a higher
|
||||
# version has itself been downgraded.
|
||||
cargo update -p aws-runtime --precise 1.5.12
|
||||
cargo update -p aws-types --precise 1.3.9
|
||||
cargo update -p aws-sigv4 --precise 1.3.5
|
||||
cargo update -p aws-credential-types --precise 1.2.8
|
||||
cargo update -p aws-smithy-checksums --precise 0.63.9
|
||||
cargo update -p aws-smithy-runtime --precise 1.9.3
|
||||
cargo update -p aws-smithy-http --precise 0.62.4
|
||||
cargo update -p aws-smithy-eventstream --precise 0.60.12
|
||||
cargo update -p aws-smithy-http-client --precise 1.1.3
|
||||
cargo update -p aws-smithy-observability --precise 0.1.4
|
||||
cargo update -p aws-smithy-query --precise 0.60.8
|
||||
cargo update -p aws-smithy-runtime-api --precise 1.9.1
|
||||
cargo update -p aws-smithy-async --precise 1.2.6
|
||||
cargo update -p aws-smithy-types --precise 1.3.5
|
||||
cargo update -p aws-smithy-xml --precise 0.60.11
|
||||
cargo update -p aws-sdk-bedrockruntime --precise 1.64.0
|
||||
cargo update -p aws-sdk-dynamodb --precise 1.55.0
|
||||
cargo update -p aws-config --precise 1.5.10
|
||||
cargo update -p aws-sdk-kms --precise 1.51.0
|
||||
cargo update -p aws-sdk-s3 --precise 1.65.0
|
||||
cargo update -p aws-sdk-sso --precise 1.50.0
|
||||
cargo update -p aws-sdk-ssooidc --precise 1.51.0
|
||||
cargo update -p aws-sdk-sts --precise 1.51.0
|
||||
cargo update -p home --precise 0.5.9
|
||||
- name: cargo +${{ matrix.msrv }} check
|
||||
env:
|
||||
RUSTUP_TOOLCHAIN: ${{ matrix.msrv }}
|
||||
run: cargo check --profile ci --workspace --tests --benches --all-features
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -3,9 +3,6 @@ name: Update NodeJs package-lock.json
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
31
.github/workflows/upload_wheel/action.yml
vendored
31
.github/workflows/upload_wheel/action.yml
vendored
@@ -2,6 +2,9 @@ name: upload-wheel
|
||||
|
||||
description: "Upload wheels to Pypi"
|
||||
inputs:
|
||||
pypi_token:
|
||||
required: true
|
||||
description: "release token for the repo"
|
||||
fury_token:
|
||||
required: true
|
||||
description: "release token for the fury repo"
|
||||
@@ -9,6 +12,12 @@ inputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install twine
|
||||
python3 -m pip install --upgrade pkginfo
|
||||
- name: Choose repo
|
||||
shell: bash
|
||||
id: choose_repo
|
||||
@@ -18,17 +27,19 @@ runs:
|
||||
else
|
||||
echo "repo=pypi" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Publish to Fury
|
||||
if: steps.choose_repo.outputs.repo == 'fury'
|
||||
- name: Publish to PyPI
|
||||
shell: bash
|
||||
env:
|
||||
FURY_TOKEN: ${{ inputs.fury_token }}
|
||||
PYPI_TOKEN: ${{ inputs.pypi_token }}
|
||||
run: |
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
- name: Publish to PyPI
|
||||
if: steps.choose_repo.outputs.repo == 'pypi'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
packages-dir: target/wheels/
|
||||
if [[ ${{ steps.choose_repo.outputs.repo }} == fury ]]; then
|
||||
WHEEL=$(ls target/wheels/lancedb-*.whl 2> /dev/null | head -n 1)
|
||||
echo "Uploading $WHEEL to Fury"
|
||||
curl -f -F package=@$WHEEL https://$FURY_TOKEN@push.fury.io/lancedb/
|
||||
else
|
||||
twine upload --repository ${{ steps.choose_repo.outputs.repo }} \
|
||||
--username __token__ \
|
||||
--password $PYPI_TOKEN \
|
||||
target/wheels/lancedb-*.whl
|
||||
fi
|
||||
|
||||
25
AGENTS.md
25
AGENTS.md
@@ -17,30 +17,9 @@ Common commands:
|
||||
* Run tests: `cargo test --quiet --features remote --tests`
|
||||
* Run specific test: `cargo test --quiet --features remote -p <package_name> --test <test_name>`
|
||||
* Lint: `cargo clippy --quiet --features remote --tests --examples`
|
||||
* Format Rust: `cargo fmt --all`
|
||||
* Format Python: `ruff format .`
|
||||
* Lint Python: `ruff check .`
|
||||
* Bootstrap Python dev env: `cd python && uv run --extra tests --extra dev maturin develop --extras tests,dev`
|
||||
* Run Python tests: `cd python && uv run --extra tests pytest python/tests -vv --durations=10 -m "not slow and not s3_test"`
|
||||
* Run specific Python test: `cd python && uv run --extra tests pytest python/tests/<test_file>.py::<test_name> -q`
|
||||
* Format: `cargo fmt --all`
|
||||
|
||||
For Python validation, prefer the uv-managed environment declared by `python/uv.lock`.
|
||||
Do not treat system `python`, global `pytest`, or missing editable-install errors as
|
||||
final blockers; bootstrap or enter the uv environment instead. If `lancedb._lancedb`
|
||||
is missing or stale, or if Rust/PyO3 binding code changed, rebuild the Python
|
||||
extension with the bootstrap command above before running tests.
|
||||
|
||||
Before committing changes, run formatting for every language you touched. At minimum:
|
||||
|
||||
* Rust changes: run `cargo fmt --all`.
|
||||
* Python changes: run `ruff format .` and `ruff check .` from the repository root,
|
||||
and run targeted tests through `cd python && uv run ...`.
|
||||
* TypeScript changes: run the relevant `npm`/`pnpm` lint, format, build, and docs commands in `nodejs`.
|
||||
|
||||
Before creating a PR, make sure the PR title follows Conventional Commits, such as
|
||||
`fix: support nested field paths in native index creation` or
|
||||
`feat(python): add dataset multiprocessing support`. The semantic-release check uses the
|
||||
PR title and body as the merge commit message, so a non-conventional PR title will fail CI.
|
||||
Before committing changes, run formatting.
|
||||
|
||||
## Coding tips
|
||||
|
||||
|
||||
5123
Cargo.lock
generated
5123
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
71
Cargo.toml
71
Cargo.toml
@@ -1,60 +1,59 @@
|
||||
[workspace]
|
||||
members = ["rust/lancedb", "nodejs", "python"]
|
||||
members = ["rust/lancedb", "nodejs", "python", "java/core/lancedb-jni"]
|
||||
# Python package needs to be built by maturin.
|
||||
exclude = ["python"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
edition = "2021"
|
||||
authors = ["LanceDB Devs <dev@lancedb.com>"]
|
||||
license = "Apache-2.0"
|
||||
repository = "https://github.com/lancedb/lancedb"
|
||||
description = "Serverless, low-latency vector database for AI applications"
|
||||
keywords = ["lancedb", "lance", "database", "vector", "search"]
|
||||
categories = ["database-implementations"]
|
||||
rust-version = "1.91.0"
|
||||
rust-version = "1.78.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=7.0.0-beta.13", default-features = false, "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=7.0.0-beta.13", "tag" = "v7.0.0-beta.13", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.0.0-beta.3", default-features = false, "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.0-beta.3", "features" = ["dir-aws", "dir-gcp", "dir-azure", "dir-oss", "rest"], "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.0-beta.3", "tag" = "v1.0.0-beta.3", "git" = "https://github.com/lance-format/lance.git" }
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "58.0.0", optional = false }
|
||||
arrow-array = "58.0.0"
|
||||
arrow-data = "58.0.0"
|
||||
arrow-ipc = "58.0.0"
|
||||
arrow-ord = "58.0.0"
|
||||
arrow-schema = "58.0.0"
|
||||
arrow-select = "58.0.0"
|
||||
arrow-cast = "58.0.0"
|
||||
arrow = { version = "56.2", optional = false }
|
||||
arrow-array = "56.2"
|
||||
arrow-data = "56.2"
|
||||
arrow-ipc = "56.2"
|
||||
arrow-ord = "56.2"
|
||||
arrow-schema = "56.2"
|
||||
arrow-select = "56.2"
|
||||
arrow-cast = "56.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "53.0.0", default-features = false }
|
||||
datafusion-catalog = "53.0.0"
|
||||
datafusion-common = { version = "53.0.0", default-features = false }
|
||||
datafusion-execution = "53.0.0"
|
||||
datafusion-expr = "53.0.0"
|
||||
datafusion-functions = "53.0.0"
|
||||
datafusion-physical-plan = "53.0.0"
|
||||
datafusion-physical-expr = "53.0.0"
|
||||
datafusion-sql = "53.0.0"
|
||||
datafusion = { version = "50.1", default-features = false }
|
||||
datafusion-catalog = "50.1"
|
||||
datafusion-common = { version = "50.1", default-features = false }
|
||||
datafusion-execution = "50.1"
|
||||
datafusion-expr = "50.1"
|
||||
datafusion-physical-plan = "50.1"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
half = { "version" = "2.6.0", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
log = "0.4"
|
||||
moka = { version = "0.12", features = ["future"] }
|
||||
object_store = "0.13.2"
|
||||
object_store = "0.12.0"
|
||||
pin-project = "1.0.7"
|
||||
rand = "0.9"
|
||||
snafu = "0.8"
|
||||
|
||||
9
Makefile
9
Makefile
@@ -1,9 +0,0 @@
|
||||
.PHONY: licenses
|
||||
|
||||
licenses:
|
||||
cargo about generate about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c about.toml
|
||||
cd python && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
|
||||
cd python && uv sync --all-extras && uv tool run pip-licenses --python .venv/bin/python --format=markdown --with-urls --output-file=PYTHON_THIRD_PARTY_LICENSES.md
|
||||
cd nodejs && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
|
||||
cd nodejs && npx license-checker --markdown --out NODEJS_THIRD_PARTY_LICENSES.md
|
||||
cd java && ./mvnw license:aggregate-add-third-party -q
|
||||
@@ -15,7 +15,7 @@
|
||||
|
||||
# **The Multimodal AI Lakehouse**
|
||||
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://docs.lancedb.com) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
[**How to Install** ](#how-to-install) ✦ [**Detailed Documentation**](https://lancedb.github.io/lancedb/) ✦ [**Tutorials and Recipes**](https://github.com/lancedb/vectordb-recipes/tree/main) ✦ [**Contributors**](#contributors)
|
||||
|
||||
**The ultimate multimodal data platform for AI/ML applications.**
|
||||
|
||||
@@ -57,7 +57,7 @@ LanceDB is a central location where developers can build, train and analyze thei
|
||||
|
||||
## **How to Install**:
|
||||
|
||||
Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up LanceDB locally.
|
||||
Follow the [Quickstart](https://lancedb.com/docs/quickstart/) doc to set up LanceDB locally.
|
||||
|
||||
**API & SDK:** We also support Python, Typescript and Rust SDKs
|
||||
|
||||
@@ -66,7 +66,7 @@ Follow the [Quickstart](https://docs.lancedb.com/quickstart) doc to set up Lance
|
||||
| Python SDK | https://lancedb.github.io/lancedb/python/python/ |
|
||||
| Typescript SDK | https://lancedb.github.io/lancedb/js/globals/ |
|
||||
| Rust SDK | https://docs.rs/lancedb/latest/lancedb/index.html |
|
||||
| REST API | https://docs.lancedb.com/api-reference/rest |
|
||||
| REST API | https://docs.lancedb.com/api-reference/introduction |
|
||||
|
||||
## **Join Us and Contribute**
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
70
about.hbs
70
about.hbs
@@ -1,70 +0,0 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<style>
|
||||
@media (prefers-color-scheme: dark) {
|
||||
body {
|
||||
background: #333;
|
||||
color: white;
|
||||
}
|
||||
a {
|
||||
color: skyblue;
|
||||
}
|
||||
}
|
||||
.container {
|
||||
font-family: sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.intro {
|
||||
text-align: center;
|
||||
}
|
||||
.licenses-list {
|
||||
list-style-type: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
.license-used-by {
|
||||
margin-top: -10px;
|
||||
}
|
||||
.license-text {
|
||||
max-height: 200px;
|
||||
overflow-y: scroll;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<main class="container">
|
||||
<div class="intro">
|
||||
<h1>Third Party Licenses</h1>
|
||||
<p>This page lists the licenses of the projects used in cargo-about.</p>
|
||||
</div>
|
||||
|
||||
<h2>Overview of licenses:</h2>
|
||||
<ul class="licenses-overview">
|
||||
{{#each overview}}
|
||||
<li><a href="#{{id}}">{{name}}</a> ({{count}})</li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
|
||||
<h2>All license text:</h2>
|
||||
<ul class="licenses-list">
|
||||
{{#each licenses}}
|
||||
<li class="license">
|
||||
<h3 id="{{id}}">{{name}}</h3>
|
||||
<h4>Used by:</h4>
|
||||
<ul class="license-used-by">
|
||||
{{#each used_by}}
|
||||
<li><a href="{{#if crate.repository}} {{crate.repository}} {{else}} https://crates.io/crates/{{crate.name}} {{/if}}">{{crate.name}} {{crate.version}}</a></li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
<pre class="license-text">{{text}}</pre>
|
||||
</li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
</main>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
18
about.toml
18
about.toml
@@ -1,18 +0,0 @@
|
||||
accepted = [
|
||||
"0BSD",
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"BSL-1.0",
|
||||
"bzip2-1.0.6",
|
||||
"CC0-1.0",
|
||||
"CDDL-1.0",
|
||||
"CDLA-Permissive-2.0",
|
||||
"ISC",
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import functools
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -27,7 +26,6 @@ SEMVER_RE = re.compile(
|
||||
)
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
@dataclass(frozen=True)
|
||||
class SemVer:
|
||||
major: int
|
||||
@@ -158,9 +156,7 @@ def read_current_version(repo_root: Path) -> str:
|
||||
|
||||
|
||||
def determine_latest_tag(tags: Iterable[TagInfo]) -> TagInfo:
|
||||
# Stable releases (no prerelease) are always preferred over pre-releases.
|
||||
# Within each group, standard semver ordering applies.
|
||||
return max(tags, key=lambda tag: (not tag.semver.prerelease, tag.semver))
|
||||
return max(tags, key=lambda tag: tag.semver)
|
||||
|
||||
|
||||
def write_outputs(args: argparse.Namespace, payload: dict) -> None:
|
||||
|
||||
@@ -16,7 +16,7 @@ check_command_exists() {
|
||||
}
|
||||
|
||||
if [[ ! -e ./lancedb ]]; then
|
||||
if [[ x${SOPHON_READ_TOKEN} != "x" ]]; then
|
||||
if [[ -v SOPHON_READ_TOKEN ]]; then
|
||||
INPUT="lancedb-linux-x64"
|
||||
gh release \
|
||||
--repo lancedb/lancedb \
|
||||
|
||||
@@ -229,29 +229,6 @@ def set_local_version():
|
||||
update_cargo_toml(line_updater)
|
||||
|
||||
|
||||
def update_lockfiles(version: str, fallback_to_git: bool = False):
|
||||
"""
|
||||
Update Cargo metadata and optionally fall back to using the git tag if the
|
||||
requested crates.io version is unavailable.
|
||||
"""
|
||||
try:
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
except Exception as e:
|
||||
if fallback_to_git and "failed to select a version" in str(e):
|
||||
print(
|
||||
f" failed for crates.io v{version}, retrying with git tag...",
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_preview_version(version)
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description="Set the version of the Lance package.")
|
||||
parser.add_argument(
|
||||
"version",
|
||||
@@ -267,7 +244,6 @@ if args.version == "stable":
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_stable_version(latest_stable_version)
|
||||
update_lockfiles(latest_stable_version)
|
||||
elif args.version == "preview":
|
||||
latest_preview_version = get_latest_preview_version()
|
||||
print(
|
||||
@@ -275,10 +251,8 @@ elif args.version == "preview":
|
||||
file=sys.stderr,
|
||||
)
|
||||
set_preview_version(latest_preview_version)
|
||||
update_lockfiles(latest_preview_version)
|
||||
elif args.version == "local":
|
||||
set_local_version()
|
||||
update_lockfiles("local")
|
||||
else:
|
||||
# Parse the version number.
|
||||
version = args.version
|
||||
@@ -288,7 +262,9 @@ else:
|
||||
|
||||
if "beta" in version:
|
||||
set_preview_version(version)
|
||||
update_lockfiles(version)
|
||||
else:
|
||||
set_stable_version(version)
|
||||
update_lockfiles(version, fallback_to_git=True)
|
||||
|
||||
print("Updating lockfiles...", file=sys.stderr, end="")
|
||||
run_command("cargo metadata > /dev/null")
|
||||
print(" done.", file=sys.stderr)
|
||||
|
||||
196
deny.toml
196
deny.toml
@@ -1,196 +0,0 @@
|
||||
# cargo-deny configuration for LanceDB.
|
||||
#
|
||||
# Run locally with `cargo deny check`. See
|
||||
# https://embarkstudios.github.io/cargo-deny/ for the full reference.
|
||||
|
||||
# The set of target triples we care about. cargo-deny will only consider
|
||||
# dependencies that are used on at least one of these targets. Keeping this
|
||||
# explicit avoids noise from platform-specific crates (e.g. wasm, android,
|
||||
# ios) that we never actually ship.
|
||||
[graph]
|
||||
targets = [
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-pc-windows-msvc",
|
||||
"aarch64-pc-windows-msvc",
|
||||
]
|
||||
all-features = true
|
||||
|
||||
[output]
|
||||
feature-depth = 1
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Advisories: security vulnerabilities and yanked crates.
|
||||
# ---------------------------------------------------------------------------
|
||||
[advisories]
|
||||
version = 2
|
||||
# Fail the check if any crate in the lockfile has been yanked from crates.io.
|
||||
# Yanked crates are a signal the author retracted the release (often due to
|
||||
# bugs or security issues) and should not be depended on.
|
||||
yanked = "deny"
|
||||
# Advisory IDs we have explicitly reviewed and chosen to accept. Every
|
||||
# entry must include a rationale and, where possible, an upstream issue
|
||||
# pointing to a fix. Revisit this list whenever dependencies are updated.
|
||||
ignore = [
|
||||
# rsa: Marvin Attack timing side-channel in PKCS#1 v1.5 decryption.
|
||||
# Reached only through opendal → reqsign → rsa. We do not use RSA
|
||||
# decryption in LanceDB ourselves; this is dormant in the signing path.
|
||||
# No fixed release exists upstream as of this writing.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2023-0071
|
||||
{ id = "RUSTSEC-2023-0071", reason = "rsa crate via opendal/reqsign; no fixed upstream release" },
|
||||
|
||||
# instant: unmaintained. Pulled in via backoff → instant. Upstream
|
||||
# recommends switching to `web-time`; fix has to come from backoff.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0384
|
||||
{ id = "RUSTSEC-2024-0384", reason = "transitive via backoff; waiting on backoff replacement" },
|
||||
|
||||
# paste: unmaintained (author archived the repo). Used transitively by
|
||||
# datafusion and the arrow ecosystem; widespread, no drop-in replacement.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0436
|
||||
{ id = "RUSTSEC-2024-0436", reason = "transitive via datafusion; awaiting ecosystem migration" },
|
||||
|
||||
# encoding: unmaintained. Reached through lindera-dictionary, which is
|
||||
# required by the native Lindera tokenizer path. Lindera has not migrated
|
||||
# off this crate yet.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2021-0153
|
||||
{ id = "RUSTSEC-2021-0153", reason = "transitive via lindera-dictionary for native Lindera tokenizer" },
|
||||
|
||||
# fast-float: unsound and unmaintained. Reached only through polars-arrow
|
||||
# from the optional Polars integration; replacement requires a Polars
|
||||
# dependency upgrade.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2024-0379
|
||||
{ id = "RUSTSEC-2024-0379", reason = "transitive via polars-arrow; waiting on Polars migration" },
|
||||
|
||||
# tantivy: segfault on malformed input due to missing bounds check.
|
||||
# Pulled in via lance for full-text search. We only feed tantivy
|
||||
# documents we construct ourselves, not attacker-controlled bytes.
|
||||
# Tracked for a lance dependency bump.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0003
|
||||
{ id = "RUSTSEC-2025-0003", reason = "tantivy via lance; inputs are internally produced, not user-supplied bytes" },
|
||||
|
||||
# backoff: unmaintained. Reached only via async-openai. Replacement
|
||||
# requires async-openai to migrate (or us to drop async-openai).
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0012
|
||||
{ id = "RUSTSEC-2025-0012", reason = "transitive via async-openai; waiting on upstream migration" },
|
||||
|
||||
# number_prefix: unmaintained. Transitive via indicatif → hf-hub.
|
||||
# No security impact, just maintenance status.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0119
|
||||
{ id = "RUSTSEC-2025-0119", reason = "transitive via hf-hub/indicatif; cosmetic formatting crate" },
|
||||
|
||||
# bincode: unmaintained. Reached through lindera and lindera-dictionary,
|
||||
# which are required by the native Lindera tokenizer path. Lindera has not
|
||||
# migrated to another serialization format yet.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2025-0141
|
||||
{ id = "RUSTSEC-2025-0141", reason = "transitive via lindera/lindera-dictionary for native Lindera tokenizer" },
|
||||
|
||||
# lru: soundness issue in IterMut. Reached only through aws-sdk-s3 in
|
||||
# LanceDB's dev-dependency graph; LanceDB does not use that iterator
|
||||
# directly. Clearing this requires the AWS SDK chain to update lru.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0002
|
||||
{ id = "RUSTSEC-2026-0002", reason = "transitive via aws-sdk-s3 dev-dependency; waiting on AWS SDK lru upgrade" },
|
||||
|
||||
# rustls-webpki 0.101.7 (old major line): name-constraint checks for
|
||||
# URI / wildcard names. Pulled in only via the legacy rustls 0.21 chain
|
||||
# from aws-smithy-http-client. The 0.103 line we actively use is patched.
|
||||
# Clearing the 0.101 copy requires the aws-sdk chain to migrate off
|
||||
# rustls 0.21.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0098
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0099
|
||||
{ id = "RUSTSEC-2026-0098", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
{ id = "RUSTSEC-2026-0099", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rustls-webpki 0.101.7: reachable panic in CRL parsing. Same legacy
|
||||
# rustls 0.21 chain from aws-smithy-http-client as above. The 0.103 line
|
||||
# we actively use is upgraded to 0.103.13 which contains the fix.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0104
|
||||
{ id = "RUSTSEC-2026-0104", reason = "only affects rustls-webpki 0.101 from legacy aws-smithy/rustls 0.21 chain" },
|
||||
|
||||
# rand 0.8.5: soundness issue only when ThreadRng reseeds inside a custom
|
||||
# logger. Reached through several transitive chains. LanceDB does not use
|
||||
# rand from a custom logger; upgrade once all pinned chains accept 0.8.6+.
|
||||
# https://rustsec.org/advisories/RUSTSEC-2026-0097
|
||||
{ id = "RUSTSEC-2026-0097", reason = "transitive rand 0.8.5; LanceDB does not call ThreadRng from custom logging" },
|
||||
]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Licenses: only allow licenses we've reviewed as compatible with Apache-2.0.
|
||||
# ---------------------------------------------------------------------------
|
||||
[licenses]
|
||||
version = 2
|
||||
# SPDX identifiers for licenses that are compatible with our Apache-2.0
|
||||
# distribution. Additions require legal review.
|
||||
allow = [
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"MIT",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"ISC",
|
||||
"Unicode-3.0",
|
||||
"Unicode-DFS-2016",
|
||||
"Zlib",
|
||||
"CC0-1.0",
|
||||
"MPL-2.0",
|
||||
"BSL-1.0",
|
||||
"OpenSSL",
|
||||
# 0BSD ("BSD Zero Clause") is effectively public domain — no attribution
|
||||
# required. Pulled in by `mock_instant`.
|
||||
"0BSD",
|
||||
# bzip2-1.0.6 is the permissive upstream bzip2 license (BSD-like). Pulled
|
||||
# in by `libbz2-rs-sys`, the pure-Rust bzip2 implementation.
|
||||
"bzip2-1.0.6",
|
||||
# CDLA-Permissive-2.0 is a permissive data license used by `webpki-roots`
|
||||
# for the Mozilla CA root bundle. Data-only, distribution-compatible.
|
||||
"CDLA-Permissive-2.0",
|
||||
]
|
||||
confidence-threshold = 0.8
|
||||
# Crates whose license cannot be determined from Cargo metadata but whose
|
||||
# license we've manually confirmed from upstream. Keep this list minimal.
|
||||
[[licenses.clarify]]
|
||||
# polars-arrow-format omits the `license` field in its Cargo.toml, but the
|
||||
# upstream repo (pola-rs/polars-arrow-format) is dual-licensed Apache-2.0 OR
|
||||
# MIT. See https://github.com/pola-rs/polars-arrow-format/blob/main/LICENSE
|
||||
crate = "polars-arrow-format"
|
||||
expression = "Apache-2.0 OR MIT"
|
||||
license-files = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bans: disallow specific crates and flag dependency hygiene issues.
|
||||
# ---------------------------------------------------------------------------
|
||||
[bans]
|
||||
# Warn (not deny) on duplicate versions of the same crate. In a large
|
||||
# workspace like this one, duplicates are common and often unavoidable
|
||||
# transitively. We surface them to discourage growth, but don't fail CI.
|
||||
multiple-versions = "warn"
|
||||
# Wildcard version requirements (`foo = "*"`) are a footgun — they let any
|
||||
# future release in without review. Ban them outright.
|
||||
wildcards = "deny"
|
||||
# Internal workspace crates reference each other via `path = "..."`, which
|
||||
# cargo-deny sees as a wildcard version. That's fine for private workspace
|
||||
# members (not published to crates.io), so allow it specifically for paths.
|
||||
allow-wildcard-paths = true
|
||||
# Features that, if enabled, should cause the check to fail.
|
||||
deny = []
|
||||
# Crates to skip when checking for duplicate versions.
|
||||
skip = []
|
||||
# Similar to `skip`, but also skips the entire transitive subtree.
|
||||
skip-tree = []
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sources: restrict where crates can come from.
|
||||
# ---------------------------------------------------------------------------
|
||||
[sources]
|
||||
# Deny any registry other than the ones explicitly listed below.
|
||||
unknown-registry = "deny"
|
||||
# Deny any git dependency whose host isn't in the allow-list below. This
|
||||
# prevents accidental pulls from arbitrary forks.
|
||||
unknown-git = "deny"
|
||||
allow-registry = ["https://github.com/rust-lang/crates.io-index"]
|
||||
# Lance is developed in a sibling repo and pulled as a git dependency until
|
||||
# releases are cut to crates.io. Allow that specific host.
|
||||
allow-git = [
|
||||
"https://github.com/lance-format/lance",
|
||||
]
|
||||
@@ -1,7 +1,7 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
localstack:
|
||||
image: localstack/localstack:4.0
|
||||
image: localstack/localstack:3.3
|
||||
ports:
|
||||
- 4566:4566
|
||||
environment:
|
||||
|
||||
@@ -1,27 +1,27 @@
|
||||
# Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
# Usage: docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.12-slim-bookworm
|
||||
#Simple base dockerfile that supports basic dependencies required to run lance with FTS and Hybrid Search
|
||||
#Usage docker build -t lancedb:latest -f Dockerfile .
|
||||
FROM python:3.10-slim-buster
|
||||
|
||||
# Install build dependencies in a single layer
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
build-essential \
|
||||
protobuf-compiler \
|
||||
git \
|
||||
ca-certificates && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Rust (pinned installer, non-interactive)
|
||||
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable --profile minimal
|
||||
# Install Rust
|
||||
RUN apt-get update && apt-get install -y curl build-essential && \
|
||||
curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
|
||||
# Set the environment variable for Rust
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
# Install protobuf compiler
|
||||
RUN apt-get install -y protobuf-compiler && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN apt-get -y update &&\
|
||||
apt-get -y upgrade && \
|
||||
apt-get -y install git
|
||||
|
||||
|
||||
# Verify installations
|
||||
RUN python --version && \
|
||||
rustc --version && \
|
||||
protoc --version
|
||||
|
||||
RUN pip install --no-cache-dir lancedb
|
||||
RUN pip install tantivy lancedb
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# LanceDB Documentation
|
||||
|
||||
LanceDB docs are available at [docs.lancedb.com](https://docs.lancedb.com).
|
||||
LanceDB docs are deployed to https://lancedb.github.io/lancedb/.
|
||||
|
||||
The SDK docs are built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
Docs is built and deployed automatically by [Github Actions](../.github/workflows/docs.yml)
|
||||
whenever a commit is pushed to the `main` branch. So it is possible for the docs to show
|
||||
unreleased features.
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ watch:
|
||||
theme:
|
||||
name: "material"
|
||||
logo: assets/logo.png
|
||||
favicon: assets/favicon.ico
|
||||
favicon: assets/logo.png
|
||||
palette:
|
||||
# Palette toggle for light mode
|
||||
- scheme: lancedb
|
||||
@@ -32,6 +32,8 @@ theme:
|
||||
- content.tooltips
|
||||
- toc.follow
|
||||
- navigation.top
|
||||
- navigation.tabs
|
||||
- navigation.tabs.sticky
|
||||
- navigation.footer
|
||||
- navigation.tracking
|
||||
- navigation.instant
|
||||
@@ -52,21 +54,14 @@ plugins:
|
||||
options:
|
||||
docstring_style: numpy
|
||||
heading_level: 3
|
||||
show_source: true
|
||||
show_symbol_type_in_heading: true
|
||||
show_signature_annotations: true
|
||||
show_root_heading: true
|
||||
show_docstring_examples: true
|
||||
show_docstring_attributes: false
|
||||
show_docstring_other_parameters: true
|
||||
show_symbol_type_heading: true
|
||||
show_labels: false
|
||||
show_if_no_docstring: true
|
||||
show_source: false
|
||||
members_order: source
|
||||
docstring_section_style: list
|
||||
signature_crossrefs: true
|
||||
separate_signature: true
|
||||
filters:
|
||||
- "!^_"
|
||||
import:
|
||||
# for cross references
|
||||
- https://arrow.apache.org/docs/objects.inv
|
||||
@@ -120,16 +115,14 @@ markdown_extensions:
|
||||
emoji_index: !!python/name:material.extensions.emoji.twemoji
|
||||
emoji_generator: !!python/name:material.extensions.emoji.to_svg
|
||||
- markdown.extensions.toc:
|
||||
toc_depth: 4
|
||||
permalink: true
|
||||
permalink_title: Anchor link to this section
|
||||
baselevel: 1
|
||||
permalink: ""
|
||||
|
||||
nav:
|
||||
- Documentation:
|
||||
- SDK Reference: index.md
|
||||
- API reference:
|
||||
- Overview: index.md
|
||||
- Python: python/python.md
|
||||
- Javascript/TypeScript: js/globals.md
|
||||
- Java: java/java.md
|
||||
- Rust: https://docs.rs/lancedb/latest/lancedb/index.html
|
||||
|
||||
extra_css:
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
mkdocs==1.6.1
|
||||
mkdocs==1.5.3
|
||||
mkdocs-jupyter==0.24.1
|
||||
mkdocs-material==9.6.23
|
||||
mkdocs-autorefs>=0.5,<=1.0
|
||||
mkdocstrings[python]>=0.24,<1.0
|
||||
griffe>=0.40,<1.0
|
||||
mkdocs-render-swagger-plugin>=0.1.0
|
||||
pydantic>=2.0,<3.0
|
||||
mkdocs-redirects>=1.2.0
|
||||
mkdocs-material==9.5.3
|
||||
mkdocs-autorefs<=1.0
|
||||
mkdocstrings[python]==0.25.2
|
||||
griffe
|
||||
mkdocs-render-swagger-plugin
|
||||
pydantic
|
||||
mkdocs-redirects
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 15 KiB |
@@ -1,111 +0,0 @@
|
||||
# VoyageAI Embeddings : Multimodal
|
||||
|
||||
VoyageAI embeddings can also be used to embed both text and image data, only some of the models support image data and you can check the list
|
||||
under [https://docs.voyageai.com/docs/multimodal-embeddings](https://docs.voyageai.com/docs/multimodal-embeddings)
|
||||
|
||||
Supported multimodal models:
|
||||
|
||||
- `voyage-multimodal-3` - 1024 dimensions (text + images)
|
||||
- `voyage-multimodal-3.5` - Flexible dimensions (256, 512, 1024 default, 2048). Supports text, images, and video.
|
||||
|
||||
### Video Support (voyage-multimodal-3.5)
|
||||
|
||||
The `voyage-multimodal-3.5` model supports video input through:
|
||||
- Video URLs (`.mp4`, `.webm`, `.mov`, `.avi`, `.mkv`, `.m4v`, `.gif`)
|
||||
- Video file paths
|
||||
|
||||
Constraints: Max 20MB video size.
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|-------------------------|-------------------------------------------|
|
||||
| `name` | `str` | `"voyage-multimodal-3"` | The model ID of the VoyageAI model to use |
|
||||
| `output_dimension` | `int` | `None` | Output dimension for voyage-multimodal-3.5. Valid: 256, 512, 1024, 2048 |
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import base64
|
||||
import os
|
||||
from io import BytesIO
|
||||
|
||||
import requests
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import get_registry
|
||||
import pandas as pd
|
||||
|
||||
os.environ['VOYAGE_API_KEY'] = 'YOUR_VOYAGE_API_KEY'
|
||||
|
||||
db = lancedb.connect(".lancedb")
|
||||
func = get_registry().get("voyageai").create(name="voyage-multimodal-3")
|
||||
|
||||
|
||||
def image_to_base64(image_bytes: bytes):
|
||||
buffered = BytesIO(image_bytes)
|
||||
img_str = base64.b64encode(buffered.getvalue())
|
||||
return img_str.decode("utf-8")
|
||||
|
||||
|
||||
class Images(LanceModel):
|
||||
label: str
|
||||
image_uri: str = func.SourceField() # image uri as the source
|
||||
image_bytes: str = func.SourceField() # image bytes base64 encoded as the source
|
||||
vector: Vector(func.ndims()) = func.VectorField() # vector column
|
||||
vec_from_bytes: Vector(func.ndims()) = func.VectorField() # Another vector column
|
||||
|
||||
|
||||
if "images" in db.table_names():
|
||||
db.drop_table("images")
|
||||
table = db.create_table("images", schema=Images)
|
||||
labels = ["cat", "cat", "dog", "dog", "horse", "horse"]
|
||||
uris = [
|
||||
"http://farm1.staticflickr.com/53/167798175_7c7845bbbd_z.jpg",
|
||||
"http://farm1.staticflickr.com/134/332220238_da527d8140_z.jpg",
|
||||
"http://farm9.staticflickr.com/8387/8602747737_2e5c2a45d4_z.jpg",
|
||||
"http://farm5.staticflickr.com/4092/5017326486_1f46057f5f_z.jpg",
|
||||
"http://farm9.staticflickr.com/8216/8434969557_d37882c42d_z.jpg",
|
||||
"http://farm6.staticflickr.com/5142/5835678453_4f3a4edb45_z.jpg",
|
||||
]
|
||||
# get each uri as bytes
|
||||
images_bytes = [image_to_base64(requests.get(uri).content) for uri in uris]
|
||||
table.add(
|
||||
pd.DataFrame({"label": labels, "image_uri": uris, "image_bytes": images_bytes})
|
||||
)
|
||||
```
|
||||
Now we can search using text from both the default vector column and the custom vector column
|
||||
```python
|
||||
|
||||
# text search
|
||||
actual = table.search("man's best friend", "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label) # prints "dog"
|
||||
|
||||
frombytes = (
|
||||
table.search("man's best friend", vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(frombytes.label)
|
||||
|
||||
```
|
||||
|
||||
Because we're using a multi-modal embedding function, we can also search using images
|
||||
|
||||
```python
|
||||
# image search
|
||||
query_image_uri = "http://farm1.staticflickr.com/200/467715466_ed4a31801f_z.jpg"
|
||||
image_bytes = requests.get(query_image_uri).content
|
||||
query_image = Image.open(BytesIO(image_bytes))
|
||||
actual = table.search(query_image, "vec_from_bytes").limit(1).to_pydantic(Images)[0]
|
||||
print(actual.label == "dog")
|
||||
|
||||
# image search using a custom vector column
|
||||
other = (
|
||||
table.search(query_image, vector_column_name="vec_from_bytes")
|
||||
.limit(1)
|
||||
.to_pydantic(Images)[0]
|
||||
)
|
||||
print(actual.label)
|
||||
|
||||
```
|
||||
@@ -1,62 +0,0 @@
|
||||
# VoyageAI Embeddings
|
||||
|
||||
Voyage AI provides cutting-edge embedding and rerankers.
|
||||
|
||||
|
||||
Using voyageai API requires voyageai package, which can be installed using `pip install voyageai`. Voyage AI embeddings are used to generate embeddings for text data. The embeddings can be used for various tasks like semantic search, clustering, and classification.
|
||||
You also need to set the `VOYAGE_API_KEY` environment variable to use the VoyageAI API.
|
||||
|
||||
Supported models are:
|
||||
|
||||
**Voyage-4 Series (Latest)**
|
||||
|
||||
- voyage-4 (1024 dims, general-purpose and multilingual retrieval, 320K batch tokens)
|
||||
- voyage-4-lite (1024 dims, optimized for latency and cost, 1M batch tokens)
|
||||
- voyage-4-large (1024 dims, best retrieval quality, 120K batch tokens)
|
||||
|
||||
**Voyage-3 Series**
|
||||
|
||||
- voyage-3
|
||||
- voyage-3-lite
|
||||
|
||||
**Domain-Specific Models**
|
||||
|
||||
- voyage-finance-2
|
||||
- voyage-multilingual-2
|
||||
- voyage-law-2
|
||||
- voyage-code-2
|
||||
|
||||
|
||||
Supported parameters (to be passed in `create` method) are:
|
||||
|
||||
| Parameter | Type | Default Value | Description |
|
||||
|---|---|--------|---------|
|
||||
| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-4, voyage-4-lite, voyage-4-large, voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
|
||||
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
|
||||
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
|
||||
|
||||
|
||||
Usage Example:
|
||||
|
||||
```python
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from lancedb.embeddings import EmbeddingFunctionRegistry
|
||||
|
||||
voyageai = EmbeddingFunctionRegistry
|
||||
.get_instance()
|
||||
.get("voyageai")
|
||||
.create(name="voyage-3")
|
||||
|
||||
class TextModel(LanceModel):
|
||||
text: str = voyageai.SourceField()
|
||||
vector: Vector(voyageai.ndims()) = voyageai.VectorField()
|
||||
|
||||
data = [ { "text": "hello world" },
|
||||
{ "text": "goodbye world" }]
|
||||
|
||||
db = lancedb.connect("~/.lancedb")
|
||||
tbl = db.create_table("test", schema=TextModel, mode="overwrite")
|
||||
|
||||
tbl.add(data)
|
||||
```
|
||||
@@ -1,12 +1,7 @@
|
||||
# SDK Reference
|
||||
# API Reference
|
||||
|
||||
This site contains the API reference for the client SDKs supported by [LanceDB](https://lancedb.com).
|
||||
This page contains the API reference for the SDKs supported by the LanceDB team.
|
||||
|
||||
- [Python](python/python.md)
|
||||
- [JavaScript/TypeScript](js/globals.md)
|
||||
- [Java](java/java.md)
|
||||
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
|
||||
|
||||
!!! info "LanceDB Documentation"
|
||||
|
||||
If you're looking for the full documentation of LanceDB, visit [docs.lancedb.com](https://docs.lancedb.com).
|
||||
- [Rust](https://docs.rs/lancedb/latest/lancedb/index.html)
|
||||
@@ -1,499 +0,0 @@
|
||||
# Java SDK
|
||||
|
||||
The LanceDB Java SDK provides a convenient way to interact with LanceDB Cloud and Enterprise deployments using the Lance REST Namespace API.
|
||||
|
||||
!!! note
|
||||
The Java SDK currently only works for LanceDB remote database that connects to LanceDB Cloud and Enterprise.
|
||||
Local database support is a work in progress. Check [LANCEDB-2848](https://github.com/lancedb/lancedb/issues/2848) for the latest progress.
|
||||
|
||||
## Installation
|
||||
|
||||
Add the following dependency to your `pom.xml`:
|
||||
|
||||
```xml
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.29.1-beta.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Connecting to LanceDB Cloud
|
||||
|
||||
```java
|
||||
import com.lancedb.LanceDbNamespaceClientBuilder;
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
|
||||
// If your DB url is db://example-db, then your database here is example-db
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
.apiKey("your_lancedb_cloud_api_key")
|
||||
.database("your_database_name")
|
||||
.build();
|
||||
```
|
||||
|
||||
### Connecting to LanceDB Enterprise
|
||||
|
||||
For LanceDB Enterprise deployments with a custom endpoint:
|
||||
|
||||
```java
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
.apiKey("your_lancedb_enterprise_api_key")
|
||||
.database("your_database_name")
|
||||
.endpoint("<your_enterprise_endpoint>")
|
||||
.build();
|
||||
```
|
||||
|
||||
### Configuration Options
|
||||
|
||||
| Method | Description | Required |
|
||||
|--------|-------------|----------|
|
||||
| `apiKey(String)` | LanceDB API key | Yes |
|
||||
| `database(String)` | Database name | Yes |
|
||||
| `endpoint(String)` | Custom endpoint URL for Enterprise deployments | No |
|
||||
| `region(String)` | AWS region (default: "us-east-1") | No |
|
||||
| `config(String, String)` | Additional configuration parameters | No |
|
||||
|
||||
## Metadata Operations
|
||||
|
||||
### Creating a Namespace Path
|
||||
|
||||
Namespace paths organize tables hierarchically. Create the desired namespace path before creating tables within it:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.CreateNamespaceRequest;
|
||||
import org.lance.namespace.model.CreateNamespaceResponse;
|
||||
|
||||
// Create a child namespace path
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
You can also create nested namespace paths:
|
||||
|
||||
```java
|
||||
// Create a nested namespace path: parent/child
|
||||
CreateNamespaceRequest request = new CreateNamespaceRequest();
|
||||
request.setId(Arrays.asList("parent_namespace", "child_namespace"));
|
||||
|
||||
CreateNamespaceResponse response = namespaceClient.createNamespace(request);
|
||||
```
|
||||
|
||||
### Describing a Namespace Path
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DescribeNamespaceRequest;
|
||||
import org.lance.namespace.model.DescribeNamespaceResponse;
|
||||
|
||||
DescribeNamespaceRequest request = new DescribeNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
DescribeNamespaceResponse response = namespaceClient.describeNamespace(request);
|
||||
System.out.println("Namespace properties: " + response.getProperties());
|
||||
```
|
||||
|
||||
### Listing Namespace Paths
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.ListNamespacesRequest;
|
||||
import org.lance.namespace.model.ListNamespacesResponse;
|
||||
|
||||
// List all namespace paths at the root level
|
||||
ListNamespacesRequest request = new ListNamespacesRequest();
|
||||
request.setId(Arrays.asList()); // Empty for root
|
||||
|
||||
ListNamespacesResponse response = namespaceClient.listNamespaces(request);
|
||||
for (String ns : response.getNamespaces()) {
|
||||
System.out.println("Namespace path: " + ns);
|
||||
}
|
||||
|
||||
// List child namespace paths under a parent path
|
||||
ListNamespacesRequest childRequest = new ListNamespacesRequest();
|
||||
childRequest.setId(Arrays.asList("parent_namespace"));
|
||||
|
||||
ListNamespacesResponse childResponse = namespaceClient.listNamespaces(childRequest);
|
||||
```
|
||||
|
||||
### Listing Tables
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.ListTablesRequest;
|
||||
import org.lance.namespace.model.ListTablesResponse;
|
||||
|
||||
// List tables in a namespace path
|
||||
ListTablesRequest request = new ListTablesRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
ListTablesResponse response = namespaceClient.listTables(request);
|
||||
for (String table : response.getTables()) {
|
||||
System.out.println("Table: " + table);
|
||||
}
|
||||
```
|
||||
|
||||
### Dropping a Namespace Path
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DropNamespaceRequest;
|
||||
import org.lance.namespace.model.DropNamespaceResponse;
|
||||
|
||||
DropNamespaceRequest request = new DropNamespaceRequest();
|
||||
request.setId(Arrays.asList("my_namespace"));
|
||||
|
||||
DropNamespaceResponse response = namespaceClient.dropNamespace(request);
|
||||
```
|
||||
|
||||
### Describing a Table
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DescribeTableRequest;
|
||||
import org.lance.namespace.model.DescribeTableResponse;
|
||||
|
||||
DescribeTableRequest request = new DescribeTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
DescribeTableResponse response = namespaceClient.describeTable(request);
|
||||
System.out.println("Table version: " + response.getVersion());
|
||||
System.out.println("Schema fields: " + response.getSchema().getFields());
|
||||
```
|
||||
|
||||
### Dropping a Table
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DropTableRequest;
|
||||
import org.lance.namespace.model.DropTableResponse;
|
||||
|
||||
DropTableRequest request = new DropTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
DropTableResponse response = namespaceClient.dropTable(request);
|
||||
```
|
||||
|
||||
## Writing Data
|
||||
|
||||
### Creating a Table
|
||||
|
||||
Tables are created within a namespace path by providing data in Apache Arrow IPC format:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
import org.lance.namespace.model.CreateTableRequest;
|
||||
import org.lance.namespace.model.CreateTableResponse;
|
||||
import org.apache.arrow.memory.BufferAllocator;
|
||||
import org.apache.arrow.memory.RootAllocator;
|
||||
import org.apache.arrow.vector.IntVector;
|
||||
import org.apache.arrow.vector.VarCharVector;
|
||||
import org.apache.arrow.vector.VectorSchemaRoot;
|
||||
import org.apache.arrow.vector.complex.FixedSizeListVector;
|
||||
import org.apache.arrow.vector.Float4Vector;
|
||||
import org.apache.arrow.vector.ipc.ArrowStreamWriter;
|
||||
import org.apache.arrow.vector.types.FloatingPointPrecision;
|
||||
import org.apache.arrow.vector.types.pojo.ArrowType;
|
||||
import org.apache.arrow.vector.types.pojo.Field;
|
||||
import org.apache.arrow.vector.types.pojo.FieldType;
|
||||
import org.apache.arrow.vector.types.pojo.Schema;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.channels.Channels;
|
||||
import java.util.Arrays;
|
||||
|
||||
// Create schema with id, name, and embedding fields
|
||||
Schema schema = new Schema(Arrays.asList(
|
||||
new Field("id", FieldType.nullable(new ArrowType.Int(32, true)), null),
|
||||
new Field("name", FieldType.nullable(new ArrowType.Utf8()), null),
|
||||
new Field("embedding",
|
||||
FieldType.nullable(new ArrowType.FixedSizeList(128)),
|
||||
Arrays.asList(new Field("item",
|
||||
FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)),
|
||||
null)))
|
||||
));
|
||||
|
||||
try (BufferAllocator allocator = new RootAllocator();
|
||||
VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
|
||||
|
||||
// Populate data
|
||||
root.setRowCount(3);
|
||||
IntVector idVector = (IntVector) root.getVector("id");
|
||||
VarCharVector nameVector = (VarCharVector) root.getVector("name");
|
||||
FixedSizeListVector embeddingVector = (FixedSizeListVector) root.getVector("embedding");
|
||||
Float4Vector embeddingData = (Float4Vector) embeddingVector.getDataVector();
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
idVector.setSafe(i, i + 1);
|
||||
nameVector.setSafe(i, ("item_" + i).getBytes());
|
||||
embeddingVector.setNotNull(i);
|
||||
for (int j = 0; j < 128; j++) {
|
||||
embeddingData.setSafe(i * 128 + j, (float) i);
|
||||
}
|
||||
}
|
||||
idVector.setValueCount(3);
|
||||
nameVector.setValueCount(3);
|
||||
embeddingData.setValueCount(3 * 128);
|
||||
embeddingVector.setValueCount(3);
|
||||
|
||||
// Serialize to Arrow IPC format
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(out))) {
|
||||
writer.start();
|
||||
writer.writeBatch();
|
||||
writer.end();
|
||||
}
|
||||
byte[] tableData = out.toByteArray();
|
||||
|
||||
// Create a table in a namespace path
|
||||
CreateTableRequest request = new CreateTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
CreateTableResponse response = namespaceClient.createTable(request, tableData);
|
||||
}
|
||||
```
|
||||
|
||||
### Insert
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.InsertIntoTableRequest;
|
||||
import org.lance.namespace.model.InsertIntoTableResponse;
|
||||
|
||||
// Prepare data in Arrow IPC format (similar to create table example)
|
||||
byte[] insertData = prepareArrowData();
|
||||
|
||||
InsertIntoTableRequest request = new InsertIntoTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
request.setMode(InsertIntoTableRequest.ModeEnum.APPEND);
|
||||
|
||||
InsertIntoTableResponse response = namespaceClient.insertIntoTable(request, insertData);
|
||||
System.out.println("New version: " + response.getVersion());
|
||||
```
|
||||
|
||||
### Update
|
||||
|
||||
Update rows matching a predicate condition:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.UpdateTableRequest;
|
||||
import org.lance.namespace.model.UpdateTableResponse;
|
||||
|
||||
UpdateTableRequest request = new UpdateTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Predicate to select rows to update
|
||||
request.setPredicate("id = 1");
|
||||
|
||||
// Set new values using SQL expressions as [column_name, expression] pairs
|
||||
request.setUpdates(Arrays.asList(
|
||||
Arrays.asList("name", "'updated_name'")
|
||||
));
|
||||
|
||||
UpdateTableResponse response = namespaceClient.updateTable(request);
|
||||
System.out.println("Updated rows: " + response.getUpdatedRows());
|
||||
```
|
||||
|
||||
### Delete
|
||||
|
||||
Delete rows matching a predicate condition:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.DeleteFromTableRequest;
|
||||
import org.lance.namespace.model.DeleteFromTableResponse;
|
||||
|
||||
DeleteFromTableRequest request = new DeleteFromTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Predicate to select rows to delete
|
||||
request.setPredicate("id > 100");
|
||||
|
||||
DeleteFromTableResponse response = namespaceClient.deleteFromTable(request);
|
||||
System.out.println("New version: " + response.getVersion());
|
||||
```
|
||||
|
||||
### Merge Insert (Upsert)
|
||||
|
||||
Merge insert allows you to update existing rows and insert new rows in a single operation based on a key column:
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.MergeInsertIntoTableRequest;
|
||||
import org.lance.namespace.model.MergeInsertIntoTableResponse;
|
||||
|
||||
// Prepare data with rows to update (id=2,3) and new rows (id=4)
|
||||
byte[] mergeData = prepareArrowData(); // Contains rows with id=2,3,4
|
||||
|
||||
MergeInsertIntoTableRequest request = new MergeInsertIntoTableRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
// Match on the "id" column
|
||||
request.setOn("id");
|
||||
|
||||
// Update all columns when a matching row is found
|
||||
request.setWhenMatchedUpdateAll(true);
|
||||
|
||||
// Insert new rows when no match is found
|
||||
request.setWhenNotMatchedInsertAll(true);
|
||||
|
||||
MergeInsertIntoTableResponse response = namespaceClient.mergeInsertIntoTable(request, mergeData);
|
||||
|
||||
System.out.println("Updated rows: " + response.getNumUpdatedRows());
|
||||
System.out.println("Inserted rows: " + response.getNumInsertedRows());
|
||||
```
|
||||
|
||||
## Querying Data
|
||||
|
||||
### Counting Rows
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.CountTableRowsRequest;
|
||||
|
||||
CountTableRowsRequest request = new CountTableRowsRequest();
|
||||
request.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
|
||||
Long rowCount = namespaceClient.countTableRows(request);
|
||||
System.out.println("Row count: " + rowCount);
|
||||
```
|
||||
|
||||
### Vector Search
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.QueryTableRequest;
|
||||
import org.lance.namespace.model.QueryTableRequestVector;
|
||||
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10); // Return top 10 results
|
||||
|
||||
// Set the query vector
|
||||
List<Float> queryVector = new ArrayList<>();
|
||||
for (int i = 0; i < 128; i++) {
|
||||
queryVector.add(1.0f);
|
||||
}
|
||||
QueryTableRequestVector vector = new QueryTableRequestVector();
|
||||
vector.setSingleVector(queryVector);
|
||||
query.setVector(vector);
|
||||
|
||||
// Specify columns to return
|
||||
query.setColumns(Arrays.asList("id", "name", "embedding"));
|
||||
|
||||
// Execute query - returns Arrow IPC format
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Full Text Search
|
||||
|
||||
```java
|
||||
import org.lance.namespace.model.QueryTableRequest;
|
||||
import org.lance.namespace.model.QueryTableRequestFullTextQuery;
|
||||
import org.lance.namespace.model.StringFtsQuery;
|
||||
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10);
|
||||
|
||||
// Set full text search query
|
||||
StringFtsQuery stringQuery = new StringFtsQuery();
|
||||
stringQuery.setQuery("search terms");
|
||||
stringQuery.setColumns(Arrays.asList("text_column"));
|
||||
|
||||
QueryTableRequestFullTextQuery fts = new QueryTableRequestFullTextQuery();
|
||||
fts.setStringQuery(stringQuery);
|
||||
query.setFullTextQuery(fts);
|
||||
|
||||
// Specify columns to return
|
||||
query.setColumns(Arrays.asList("id", "text_column"));
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Query with Filter
|
||||
|
||||
```java
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(10);
|
||||
query.setFilter("id > 50");
|
||||
query.setColumns(Arrays.asList("id", "name"));
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Query with Prefilter
|
||||
|
||||
```java
|
||||
QueryTableRequest query = new QueryTableRequest();
|
||||
query.setId(Arrays.asList("my_namespace", "my_table"));
|
||||
query.setK(5);
|
||||
query.setPrefilter(true); // Apply filter before vector search
|
||||
query.setFilter("category = 'electronics'");
|
||||
|
||||
// Set query vector
|
||||
QueryTableRequestVector vector = new QueryTableRequestVector();
|
||||
vector.setSingleVector(queryVector);
|
||||
query.setVector(vector);
|
||||
|
||||
byte[] result = namespaceClient.queryTable(query);
|
||||
```
|
||||
|
||||
### Reading Query Results
|
||||
|
||||
Query results are returned in Apache Arrow IPC file format. Here's how to read them:
|
||||
|
||||
```java
|
||||
import org.apache.arrow.vector.ipc.ArrowFileReader;
|
||||
import org.apache.arrow.vector.VectorSchemaRoot;
|
||||
import org.apache.arrow.memory.BufferAllocator;
|
||||
import org.apache.arrow.memory.RootAllocator;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.SeekableByteChannel;
|
||||
|
||||
// Helper class to read Arrow data from byte array
|
||||
class ByteArraySeekableByteChannel implements SeekableByteChannel {
|
||||
private final byte[] data;
|
||||
private long position = 0;
|
||||
private boolean isOpen = true;
|
||||
|
||||
public ByteArraySeekableByteChannel(byte[] data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(ByteBuffer dst) {
|
||||
int remaining = dst.remaining();
|
||||
int available = (int) (data.length - position);
|
||||
if (available <= 0) return -1;
|
||||
int toRead = Math.min(remaining, available);
|
||||
dst.put(data, (int) position, toRead);
|
||||
position += toRead;
|
||||
return toRead;
|
||||
}
|
||||
|
||||
@Override public long position() { return position; }
|
||||
@Override public SeekableByteChannel position(long newPosition) { position = newPosition; return this; }
|
||||
@Override public long size() { return data.length; }
|
||||
@Override public boolean isOpen() { return isOpen; }
|
||||
@Override public void close() { isOpen = false; }
|
||||
@Override public int write(ByteBuffer src) { throw new UnsupportedOperationException(); }
|
||||
@Override public SeekableByteChannel truncate(long size) { throw new UnsupportedOperationException(); }
|
||||
}
|
||||
|
||||
// Read query results
|
||||
byte[] queryResult = namespaceClient.queryTable(query);
|
||||
|
||||
try (BufferAllocator allocator = new RootAllocator();
|
||||
ArrowFileReader reader = new ArrowFileReader(
|
||||
new ByteArraySeekableByteChannel(queryResult), allocator)) {
|
||||
|
||||
for (int i = 0; i < reader.getRecordBlocks().size(); i++) {
|
||||
reader.loadRecordBatch(reader.getRecordBlocks().get(i));
|
||||
VectorSchemaRoot root = reader.getVectorSchemaRoot();
|
||||
|
||||
// Access data
|
||||
IntVector idVector = (IntVector) root.getVector("id");
|
||||
VarCharVector nameVector = (VarCharVector) root.getVector("name");
|
||||
|
||||
for (int row = 0; row < root.getRowCount(); row++) {
|
||||
int id = idVector.get(row);
|
||||
String name = new String(nameVector.get(row));
|
||||
System.out.println("Row " + row + ": id=" + id + ", name=" + name);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -34,7 +34,7 @@ const results = await table.vectorSearch([0.1, 0.3]).limit(20).toArray();
|
||||
console.log(results);
|
||||
```
|
||||
|
||||
The [quickstart](https://docs.lancedb.com/quickstart/) contains more complete examples.
|
||||
The [quickstart](https://lancedb.github.io/lancedb/basic/) contains a more complete example.
|
||||
|
||||
## Development
|
||||
|
||||
|
||||
@@ -12,22 +12,20 @@ Typescript.
|
||||
* `src/`: Rust bindings source code
|
||||
* `lancedb/`: Typescript package source code
|
||||
* `__test__/`: Unit tests
|
||||
* `examples/`: A pnpm package with the examples shown in the documentation
|
||||
* `examples/`: An npm package with the examples shown in the documentation
|
||||
|
||||
## Development environment
|
||||
|
||||
To set up your development environment, you will need to install the following:
|
||||
|
||||
1. Node.js 22 or later (required by pnpm 11)
|
||||
2. [pnpm](https://pnpm.io/installation) 11 or later (or run via `corepack enable`,
|
||||
which uses the `packageManager` field in `package.json`)
|
||||
3. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
4. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
1. Node.js 14 or later
|
||||
2. Rust's package manager, Cargo. Use [rustup](https://rustup.rs/) to install.
|
||||
3. [protoc](https://grpc.io/docs/protoc-installation/) (Protocol Buffers compiler)
|
||||
|
||||
Initial setup:
|
||||
|
||||
```shell
|
||||
pnpm install
|
||||
npm install
|
||||
```
|
||||
|
||||
### Commit Hooks
|
||||
@@ -41,38 +39,38 @@ pre-commit install
|
||||
|
||||
## Development
|
||||
|
||||
Most common development commands can be run using the pnpm scripts.
|
||||
Most common development commands can be run using the npm scripts.
|
||||
|
||||
Build the package
|
||||
|
||||
```shell
|
||||
pnpm install
|
||||
pnpm build
|
||||
npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
Lint:
|
||||
|
||||
```shell
|
||||
pnpm lint
|
||||
npm run lint
|
||||
```
|
||||
|
||||
Format and fix lints:
|
||||
|
||||
```shell
|
||||
pnpm lint-fix
|
||||
npm run lint-fix
|
||||
```
|
||||
|
||||
Run tests:
|
||||
|
||||
```shell
|
||||
pnpm test
|
||||
npm test
|
||||
```
|
||||
|
||||
To run a single test:
|
||||
|
||||
```shell
|
||||
# Single file: table.test.ts
|
||||
pnpm test -- table.test.ts
|
||||
npm test -- table.test.ts
|
||||
# Single test: 'merge insert' in table.test.ts
|
||||
pnpm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
npm test -- table.test.ts --testNamePattern=merge\ insert
|
||||
```
|
||||
|
||||
@@ -61,8 +61,8 @@ sharing the same data, deletion, and index files.
|
||||
* **options.sourceVersion?**: `number`
|
||||
The version of the source table to clone.
|
||||
|
||||
* **options.targetNamespacePath?**: `string`[]
|
||||
The namespace path for the target table (defaults to root namespace).
|
||||
* **options.targetNamespace?**: `string`[]
|
||||
The namespace for the target table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -116,13 +116,13 @@ Creates a new empty Table
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createEmptyTable(name, schema, namespacePath, options)
|
||||
#### createEmptyTable(name, schema, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createEmptyTable(
|
||||
name,
|
||||
schema,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -136,8 +136,8 @@ Creates a new empty Table
|
||||
* **schema**: [`SchemaLike`](../type-aliases/SchemaLike.md)
|
||||
The schema of the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
@@ -148,39 +148,12 @@ Creates a new empty Table
|
||||
|
||||
***
|
||||
|
||||
### createNamespace()
|
||||
|
||||
```ts
|
||||
abstract createNamespace(namespacePath, options?): Promise<CreateNamespaceResponse>
|
||||
```
|
||||
|
||||
Create a new namespace at the given path.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to create.
|
||||
|
||||
* **options?**: `Partial`<[`CreateNamespaceOptions`](../interfaces/CreateNamespaceOptions.md)>
|
||||
Creation `mode`
|
||||
("create" | "exist_ok" | "overwrite") and optional `properties`
|
||||
to attach to the namespace.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`CreateNamespaceResponse`](../interfaces/CreateNamespaceResponse.md)>
|
||||
|
||||
The properties of the
|
||||
created namespace and an optional transaction id.
|
||||
|
||||
***
|
||||
|
||||
### createTable()
|
||||
|
||||
#### createTable(options, namespacePath)
|
||||
#### createTable(options, namespace)
|
||||
|
||||
```ts
|
||||
abstract createTable(options, namespacePath?): Promise<Table>
|
||||
abstract createTable(options, namespace?): Promise<Table>
|
||||
```
|
||||
|
||||
Creates a new Table and initialize it with new data.
|
||||
@@ -190,8 +163,8 @@ Creates a new Table and initialize it with new data.
|
||||
* **options**: `object` & `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
The options object.
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
##### Returns
|
||||
|
||||
@@ -224,13 +197,13 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
`Promise`<[`Table`](Table.md)>
|
||||
|
||||
#### createTable(name, data, namespacePath, options)
|
||||
#### createTable(name, data, namespace, options)
|
||||
|
||||
```ts
|
||||
abstract createTable(
|
||||
name,
|
||||
data,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -245,8 +218,8 @@ Creates a new Table and initialize it with new data.
|
||||
Non-empty Array of Records
|
||||
to be inserted into the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to create the table in (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to create the table in (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`CreateTableOptions`](../interfaces/CreateTableOptions.md)>
|
||||
Additional options
|
||||
@@ -257,29 +230,6 @@ Creates a new Table and initialize it with new data.
|
||||
|
||||
***
|
||||
|
||||
### describeNamespace()
|
||||
|
||||
```ts
|
||||
abstract describeNamespace(namespacePath): Promise<DescribeNamespaceResponse>
|
||||
```
|
||||
|
||||
Describe a namespace, returning its properties.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to describe, in
|
||||
parent → child order, e.g. `["analytics", "sales"]`.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DescribeNamespaceResponse`](../interfaces/DescribeNamespaceResponse.md)>
|
||||
|
||||
The namespace's properties
|
||||
(may be undefined if the namespace has none).
|
||||
|
||||
***
|
||||
|
||||
### display()
|
||||
|
||||
```ts
|
||||
@@ -297,15 +247,15 @@ Return a brief description of the connection
|
||||
### dropAllTables()
|
||||
|
||||
```ts
|
||||
abstract dropAllTables(namespacePath?): Promise<void>
|
||||
abstract dropAllTables(namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop all tables in the database.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to drop tables from (defaults to root namespace).
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to drop tables from (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -313,40 +263,10 @@ Drop all tables in the database.
|
||||
|
||||
***
|
||||
|
||||
### dropNamespace()
|
||||
|
||||
```ts
|
||||
abstract dropNamespace(namespacePath, options?): Promise<DropNamespaceResponse>
|
||||
```
|
||||
|
||||
Drop a namespace.
|
||||
|
||||
Use `behavior: "cascade"` to also drop everything contained in the
|
||||
namespace (sub-namespaces and tables). The default `"restrict"`
|
||||
behavior refuses to drop a non-empty namespace.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath**: `string`[]
|
||||
The namespace path to drop.
|
||||
|
||||
* **options?**: `Partial`<[`DropNamespaceOptions`](../interfaces/DropNamespaceOptions.md)>
|
||||
`mode` ("skip" | "fail"
|
||||
for missing-namespace handling) and `behavior` ("restrict" | "cascade").
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`DropNamespaceResponse`](../interfaces/DropNamespaceResponse.md)>
|
||||
|
||||
Any properties returned by
|
||||
the server and an optional transaction id.
|
||||
|
||||
***
|
||||
|
||||
### dropTable()
|
||||
|
||||
```ts
|
||||
abstract dropTable(name, namespacePath?): Promise<void>
|
||||
abstract dropTable(name, namespace?): Promise<void>
|
||||
```
|
||||
|
||||
Drop an existing table.
|
||||
@@ -356,8 +276,8 @@ Drop an existing table.
|
||||
* **name**: `string`
|
||||
The name of the table to drop.
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path of the table (defaults to root namespace).
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -379,42 +299,12 @@ Return true if the connection has not been closed
|
||||
|
||||
***
|
||||
|
||||
### listNamespaces()
|
||||
|
||||
```ts
|
||||
abstract listNamespaces(namespacePath?, options?): Promise<ListNamespacesResponse>
|
||||
```
|
||||
|
||||
List the immediate child namespaces under the given parent.
|
||||
|
||||
Results may be paginated. To retrieve subsequent pages, pass the
|
||||
`pageToken` returned by a previous call.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The parent namespace path. Defaults
|
||||
to the root namespace if omitted.
|
||||
|
||||
* **options?**: `Partial`<[`ListNamespacesOptions`](../interfaces/ListNamespacesOptions.md)>
|
||||
Pagination options
|
||||
(`pageToken`, `limit`).
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`ListNamespacesResponse`](../interfaces/ListNamespacesResponse.md)>
|
||||
|
||||
Child namespace names and
|
||||
an optional token for fetching the next page.
|
||||
|
||||
***
|
||||
|
||||
### openTable()
|
||||
|
||||
```ts
|
||||
abstract openTable(
|
||||
name,
|
||||
namespacePath?,
|
||||
namespace?,
|
||||
options?): Promise<Table>
|
||||
```
|
||||
|
||||
@@ -425,8 +315,8 @@ Open a table in the database.
|
||||
* **name**: `string`
|
||||
The name of the table
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path of the table (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace of the table (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`OpenTableOptions`](../interfaces/OpenTableOptions.md)>
|
||||
Additional options
|
||||
@@ -437,39 +327,6 @@ Open a table in the database.
|
||||
|
||||
***
|
||||
|
||||
### renameTable()
|
||||
|
||||
```ts
|
||||
abstract renameTable(
|
||||
currentName,
|
||||
newName,
|
||||
options?): Promise<void>
|
||||
```
|
||||
|
||||
Rename a table.
|
||||
|
||||
Currently only supported by LanceDB Cloud. Local OSS connections and
|
||||
namespace-backed connections (via [connectNamespace](../functions/connectNamespace.md)) reject with
|
||||
a "not supported" error.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **currentName**: `string`
|
||||
The current name of the table.
|
||||
|
||||
* **newName**: `string`
|
||||
The new name for the table.
|
||||
|
||||
* **options?**: [`RenameTableOptions`](../interfaces/RenameTableOptions.md)
|
||||
Optional namespace paths. When
|
||||
`newNamespacePath` is omitted the table stays in `namespacePath`.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### tableNames()
|
||||
|
||||
#### tableNames(options)
|
||||
@@ -492,10 +349,10 @@ Tables will be returned in lexicographical order.
|
||||
|
||||
`Promise`<`string`[]>
|
||||
|
||||
#### tableNames(namespacePath, options)
|
||||
#### tableNames(namespace, options)
|
||||
|
||||
```ts
|
||||
abstract tableNames(namespacePath?, options?): Promise<string[]>
|
||||
abstract tableNames(namespace?, options?): Promise<string[]>
|
||||
```
|
||||
|
||||
List all the table names in this database.
|
||||
@@ -504,8 +361,8 @@ Tables will be returned in lexicographical order.
|
||||
|
||||
##### Parameters
|
||||
|
||||
* **namespacePath?**: `string`[]
|
||||
The namespace path to list tables from (defaults to root namespace)
|
||||
* **namespace?**: `string`[]
|
||||
The namespace to list tables from (defaults to root namespace)
|
||||
|
||||
* **options?**: `Partial`<[`TableNamesOptions`](../interfaces/TableNamesOptions.md)>
|
||||
options to control the
|
||||
|
||||
@@ -147,7 +147,7 @@ A new PermutationBuilder instance
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
builder.splitCalculated({ calculation: "user_id % 3" });
|
||||
builder.splitCalculated("user_id % 3");
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
@@ -343,30 +343,6 @@ This is useful for pagination.
|
||||
|
||||
***
|
||||
|
||||
### orderBy()
|
||||
|
||||
```ts
|
||||
orderBy(ordering): this
|
||||
```
|
||||
|
||||
Sort the results by the specified column(s).
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **ordering**: [`ColumnOrdering`](../interfaces/ColumnOrdering.md) \| [`ColumnOrdering`](../interfaces/ColumnOrdering.md)[]
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
This query builder.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.orderBy`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / Scannable
|
||||
|
||||
# Class: Scannable
|
||||
|
||||
A data source that can be scanned as a stream of Arrow `RecordBatch`es.
|
||||
|
||||
`Scannable` wraps the schema + optional row count + rescannable flag and
|
||||
a callback that yields batches one at a time. It is passed to consumers
|
||||
(e.g. `Table.add`, `createTable`, `mergeInsert` — follow-up work) that
|
||||
need to pull data without materializing the full dataset in JS memory.
|
||||
|
||||
Batches cross the JS↔Rust boundary as Arrow IPC Stream messages; a fresh
|
||||
writer serializes each batch, and the Rust side decodes it with
|
||||
`arrow_ipc::reader::StreamReader`. One batch is in flight at a time.
|
||||
|
||||
## Properties
|
||||
|
||||
### numRows
|
||||
|
||||
```ts
|
||||
readonly numRows: null | number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### rescannable
|
||||
|
||||
```ts
|
||||
readonly rescannable: boolean;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### schema
|
||||
|
||||
```ts
|
||||
readonly schema: Schema<any>;
|
||||
```
|
||||
|
||||
## Methods
|
||||
|
||||
### fromFactory()
|
||||
|
||||
```ts
|
||||
static fromFactory(
|
||||
schema,
|
||||
factory,
|
||||
opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an explicit schema and a factory that returns a
|
||||
fresh batch iterator on each call.
|
||||
|
||||
The factory is invoked once per scan. Each iterator yields
|
||||
`RecordBatch`es matching the declared schema. Use this when you need
|
||||
direct control over the pull loop — for example, to wrap a streaming
|
||||
source whose batches are produced lazily.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **schema**: `Schema`<`any`>
|
||||
The Arrow schema of the produced batches.
|
||||
|
||||
* **factory**
|
||||
Called at the start of each scan to produce a batch
|
||||
iterator. Must be idempotent when `rescannable` is true.
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
Optional hints. `rescannable` defaults to `true`; set to
|
||||
`false` if calling `factory()` twice would not reproduce the same data.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromIterable()
|
||||
|
||||
```ts
|
||||
static fromIterable(
|
||||
schema,
|
||||
iter,
|
||||
opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an iterable of `RecordBatch`es. `rescannable`
|
||||
defaults to `false`. Pass an explicit schema so the consumer can
|
||||
validate before any batch is pulled.
|
||||
|
||||
`opts.rescannable: true` is honest for replayable iterables (Arrays,
|
||||
Sets, or custom iterables whose `[Symbol.iterator]()` returns a fresh
|
||||
iterator each call). It is rejected for one-shot iterables (generators,
|
||||
async generators, or already-an-iterator inputs) because their
|
||||
`[Symbol.iterator]()` returns the same exhausted object on the second
|
||||
scan. For replayable sources outside this shape, use
|
||||
`fromFactory(schema, () => createIter(), { rescannable: true })`.
|
||||
|
||||
Note: when `opts.rescannable` is `true`, the constructor calls
|
||||
`[Symbol.iterator]()` once on the input to perform the structural check.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **schema**: `Schema`<`any`>
|
||||
|
||||
* **iter**: `Iterable`<`RecordBatch`<`any`>> \| `AsyncIterable`<`RecordBatch`<`any`>>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromRecordBatchReader()
|
||||
|
||||
```ts
|
||||
static fromRecordBatchReader(reader, opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an Arrow `RecordBatchReader`. A reader can only
|
||||
be consumed once; `rescannable` defaults to `false`.
|
||||
|
||||
The reader must already be opened (via `.open()`) so its `.schema` is
|
||||
populated. `RecordBatchReader.from(...)` returns an unopened reader.
|
||||
|
||||
`opts.rescannable: true` is rejected because `RecordBatchReader` is a
|
||||
self-iterator (its `[Symbol.iterator]()` returns itself), and this
|
||||
constructor does not call `reader.reset()` between scans, so a second
|
||||
scan would always see an exhausted reader. For genuinely replayable
|
||||
sources, use
|
||||
`fromFactory(schema, () => openReader(), { rescannable: true })`,
|
||||
which mints a fresh reader on each scan.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **reader**: `RecordBatchReader`<`any`>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
|
||||
***
|
||||
|
||||
### fromTable()
|
||||
|
||||
```ts
|
||||
static fromTable(table, opts): Promise<Scannable>
|
||||
```
|
||||
|
||||
Build a Scannable from an in-memory Arrow `Table`. Always rescannable;
|
||||
the table's batches are replayed on each scan.
|
||||
|
||||
The table's row count is authoritative: `opts.numRows` must either be
|
||||
omitted or equal to `table.numRows`. `opts.rescannable` of `false` is
|
||||
rejected because in-memory Tables are always rescannable.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **table**: `Table`<`any`>
|
||||
|
||||
* **opts**: [`ScannableOptions`](../interfaces/ScannableOptions.md) = `{}`
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<[`Scannable`](Scannable.md)>
|
||||
@@ -71,12 +71,11 @@ Add new columns with defined values.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **newColumnTransforms**: `Field`<`any`> \| `Field`<`any`>[] \| `Schema`<`any`> \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
Either:
|
||||
- An array of objects with column names and SQL expressions to calculate values
|
||||
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
|
||||
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
|
||||
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
|
||||
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
|
||||
pairs of column names and
|
||||
the SQL expression to use to calculate the value of the new column. These
|
||||
expressions will be evaluated for each row in the table, and can
|
||||
reference existing columns in the table.
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -368,27 +367,6 @@ Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
|
||||
|
||||
***
|
||||
|
||||
### initialStorageOptions()
|
||||
|
||||
```ts
|
||||
abstract initialStorageOptions(): Promise<undefined | null | Record<string, string>>
|
||||
```
|
||||
|
||||
Get the initial storage options that were passed in when opening this table.
|
||||
|
||||
For dynamically refreshed options (e.g., credential vending), use
|
||||
[Table.latestStorageOptions](Table.md#lateststorageoptions).
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`undefined` \| `null` \| `Record`<`string`, `string`>>
|
||||
|
||||
The storage options, or undefined if no storage options were configured.
|
||||
|
||||
***
|
||||
|
||||
### isOpen()
|
||||
|
||||
```ts
|
||||
@@ -403,28 +381,6 @@ Return true if the table has not been closed
|
||||
|
||||
***
|
||||
|
||||
### latestStorageOptions()
|
||||
|
||||
```ts
|
||||
abstract latestStorageOptions(): Promise<undefined | null | Record<string, string>>
|
||||
```
|
||||
|
||||
Get the latest storage options, refreshing from provider if configured.
|
||||
|
||||
This method is useful for credential vending scenarios where storage options
|
||||
may be refreshed dynamically. If no dynamic provider is configured, this
|
||||
returns the initial static options.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`undefined` \| `null` \| `Record`<`string`, `string`>>
|
||||
|
||||
The storage options, or undefined if no storage options were configured.
|
||||
|
||||
***
|
||||
|
||||
### listIndices()
|
||||
|
||||
```ts
|
||||
@@ -485,7 +441,19 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
||||
- Prune: Removes old versions of the dataset
|
||||
- Index: Optimizes the indices, adding new data to existing indices
|
||||
|
||||
The frequency an application should call optimize is based on the frequency of
|
||||
Experimental API
|
||||
----------------
|
||||
|
||||
The optimization process is undergoing active development and may change.
|
||||
Our goal with these changes is to improve the performance of optimization and
|
||||
reduce the complexity.
|
||||
|
||||
That being said, it is essential today to run optimize if you want the best
|
||||
performance. It should be stable and safe to use in production, but it our
|
||||
hope that the API may be simplified (or not even need to be called) in the
|
||||
future.
|
||||
|
||||
The frequency an application shoudl call optimize is based on the frequency of
|
||||
data modifications. If data is frequently added, deleted, or updated then
|
||||
optimize should be run frequently. A good rule of thumb is to run optimize if
|
||||
you have added or modified 100,000 or more records or run more than 20 data
|
||||
@@ -501,34 +469,6 @@ Modeled after ``VACUUM`` in PostgreSQL.
|
||||
|
||||
***
|
||||
|
||||
### prewarmData()
|
||||
|
||||
```ts
|
||||
abstract prewarmData(columns?): Promise<void>
|
||||
```
|
||||
|
||||
Prewarm one or more columns of data in the table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **columns?**: `string`[]
|
||||
The columns to prewarm. If undefined, all columns are prewarmed.
|
||||
This will load the column data into the page cache so that future queries that
|
||||
read those columns avoid the initial cold-start latency. This call initiates
|
||||
prewarming and returns once the request is accepted; the warming itself may
|
||||
continue in the background. Calling it on already-prewarmed columns is a
|
||||
no-op on the server.
|
||||
Prewarming is generally useful for columns used in filters or projections.
|
||||
Large columns (e.g. high-dimensional vectors or binary data) may not be
|
||||
practical to prewarm.
|
||||
This feature is currently only supported on remote tables.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### prewarmIndex()
|
||||
|
||||
```ts
|
||||
@@ -690,74 +630,6 @@ of the given query
|
||||
|
||||
***
|
||||
|
||||
### setLsmWriteSpec()
|
||||
|
||||
```ts
|
||||
abstract setLsmWriteSpec(spec): Promise<void>
|
||||
```
|
||||
|
||||
Install an [LsmWriteSpec](../interfaces/LsmWriteSpec.md) on this table, selecting Lance's MemWAL
|
||||
LSM-style write path for future `mergeInsert` calls.
|
||||
|
||||
`LsmWriteSpec` chooses one of three sharding strategies via `specType`:
|
||||
|
||||
- `"bucket"` — hash-bucket writes by the single-column unenforced primary
|
||||
key (`column` and `numBuckets` required).
|
||||
- `"identity"` — shard by the raw value of a scalar `column`.
|
||||
- `"unsharded"` — route every write to a single shard.
|
||||
|
||||
All variants require the table to have an unenforced primary key
|
||||
([Table#setUnenforcedPrimaryKey](Table.md#setunenforcedprimarykey)); bucket sharding additionally
|
||||
requires it to be the single column being bucketed.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **spec**: [`LsmWriteSpec`](../interfaces/LsmWriteSpec.md)
|
||||
The sharding spec to install.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
await table.setUnenforcedPrimaryKey("id");
|
||||
await table.setLsmWriteSpec({
|
||||
specType: "bucket",
|
||||
column: "id",
|
||||
numBuckets: 16,
|
||||
maintainedIndexes: ["id_idx"],
|
||||
});
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### setUnenforcedPrimaryKey()
|
||||
|
||||
```ts
|
||||
abstract setUnenforcedPrimaryKey(columns): Promise<void>
|
||||
```
|
||||
|
||||
Set the unenforced primary key for this table to a single column.
|
||||
|
||||
"Unenforced" means LanceDB does not check uniqueness on writes; the
|
||||
column is recorded in the schema as the primary key for use by features
|
||||
such as `merge_insert`. Only single-column primary keys are supported,
|
||||
and the key cannot be changed once set.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **columns**: `string` \| `string`[]
|
||||
The primary key column. A one-element
|
||||
array is also accepted; passing more than one column is rejected.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### stats()
|
||||
|
||||
```ts
|
||||
@@ -833,11 +705,8 @@ Create a query that returns a subset of the rows in the table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **rowIds**: readonly (`number` \| `bigint`)[]
|
||||
* **rowIds**: `number`[]
|
||||
The row ids of the rows to return.
|
||||
Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
|
||||
For convenience / backwards compatibility, `number[]` is also accepted (for
|
||||
small row ids that fit in a safe integer).
|
||||
|
||||
#### Returns
|
||||
|
||||
@@ -861,23 +730,6 @@ Return the table as an arrow table
|
||||
|
||||
***
|
||||
|
||||
### unsetLsmWriteSpec()
|
||||
|
||||
```ts
|
||||
abstract unsetLsmWriteSpec(): Promise<void>
|
||||
```
|
||||
|
||||
Remove the [LsmWriteSpec](../interfaces/LsmWriteSpec.md) from this table, reverting to the standard
|
||||
`mergeInsert` write path.
|
||||
|
||||
Errors if no spec is currently set.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`void`>
|
||||
|
||||
***
|
||||
|
||||
### update()
|
||||
|
||||
#### update(opts)
|
||||
|
||||
@@ -498,30 +498,6 @@ This is useful for pagination.
|
||||
|
||||
***
|
||||
|
||||
### orderBy()
|
||||
|
||||
```ts
|
||||
orderBy(ordering): this
|
||||
```
|
||||
|
||||
Sort the results by the specified column(s).
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **ordering**: [`ColumnOrdering`](../interfaces/ColumnOrdering.md) \| [`ColumnOrdering`](../interfaces/ColumnOrdering.md)[]
|
||||
|
||||
#### Returns
|
||||
|
||||
`this`
|
||||
|
||||
This query builder.
|
||||
|
||||
#### Inherited from
|
||||
|
||||
`StandardQueryBase.orderBy`
|
||||
|
||||
***
|
||||
|
||||
### outputSchema()
|
||||
|
||||
```ts
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / connectNamespace
|
||||
|
||||
# Function: connectNamespace()
|
||||
|
||||
## connectNamespace(implName, config, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
config,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect to a LanceDB database through a namespace.
|
||||
|
||||
Unlike [connect](connect.md), which routes by URI scheme (local path vs.
|
||||
`db://` cloud), `connectNamespace` always returns a namespace-backed
|
||||
connection. The `implName` selects the namespace implementation:
|
||||
|
||||
- `"dir"` — directory namespace, configured with [DirNamespaceConfig](../interfaces/DirNamespaceConfig.md).
|
||||
- `"rest"` — remote REST catalog, configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md).
|
||||
- Any other string — full module path for a custom implementation,
|
||||
configured with a free-form string-keyed `properties` map.
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `"dir"`
|
||||
|
||||
* **config**: [`DirNamespaceConfig`](../interfaces/DirNamespaceConfig.md)
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Examples
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("dir", { root: "/path/to/db" });
|
||||
await db.createTable("users", [{ id: 1 }]);
|
||||
```
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("rest", {
|
||||
uri: "https://catalog.example.com",
|
||||
headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
});
|
||||
```
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("my.custom.Namespace", {
|
||||
endpoint: "...",
|
||||
});
|
||||
```
|
||||
|
||||
## connectNamespace(implName, config, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
config,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect through the built-in REST namespace.
|
||||
|
||||
Configured with [RestNamespaceConfig](../interfaces/RestNamespaceConfig.md). See the function-level
|
||||
documentation above for the full surface, examples, and how this
|
||||
relates to [connect](connect.md).
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `"rest"`
|
||||
|
||||
* **config**: [`RestNamespaceConfig`](../interfaces/RestNamespaceConfig.md)
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("rest", {
|
||||
uri: "https://catalog.example.com",
|
||||
headers: { "x-api-key": process.env.CATALOG_KEY ?? "" },
|
||||
});
|
||||
```
|
||||
|
||||
## connectNamespace(implName, properties, options)
|
||||
|
||||
```ts
|
||||
function connectNamespace(
|
||||
implName,
|
||||
properties,
|
||||
options?): Promise<Connection>
|
||||
```
|
||||
|
||||
Connect through a custom namespace implementation by full module path,
|
||||
configured with a free-form string-keyed `properties` map. Use the
|
||||
typed overloads above for the built-in `"dir"` and `"rest"` impls.
|
||||
|
||||
See the function-level documentation above for examples and how this
|
||||
relates to [connect](connect.md).
|
||||
|
||||
### Parameters
|
||||
|
||||
* **implName**: `string`
|
||||
|
||||
* **properties**: `Record`<`string`, `string`>
|
||||
|
||||
* **options?**: `Partial`<[`ConnectNamespaceOptions`](../interfaces/ConnectNamespaceOptions.md)>
|
||||
|
||||
### Returns
|
||||
|
||||
`Promise`<[`Connection`](../classes/Connection.md)>
|
||||
|
||||
### Example
|
||||
|
||||
```ts
|
||||
const db = await connectNamespace("my.custom.Namespace", {
|
||||
endpoint: "...",
|
||||
});
|
||||
```
|
||||
@@ -32,7 +32,6 @@
|
||||
- [PhraseQuery](classes/PhraseQuery.md)
|
||||
- [Query](classes/Query.md)
|
||||
- [QueryBase](classes/QueryBase.md)
|
||||
- [Scannable](classes/Scannable.md)
|
||||
- [Session](classes/Session.md)
|
||||
- [StaticHeaderProvider](classes/StaticHeaderProvider.md)
|
||||
- [Table](classes/Table.md)
|
||||
@@ -51,19 +50,11 @@
|
||||
- [AlterColumnsResult](interfaces/AlterColumnsResult.md)
|
||||
- [ClientConfig](interfaces/ClientConfig.md)
|
||||
- [ColumnAlteration](interfaces/ColumnAlteration.md)
|
||||
- [ColumnOrdering](interfaces/ColumnOrdering.md)
|
||||
- [CompactionStats](interfaces/CompactionStats.md)
|
||||
- [ConnectNamespaceOptions](interfaces/ConnectNamespaceOptions.md)
|
||||
- [ConnectionOptions](interfaces/ConnectionOptions.md)
|
||||
- [CreateNamespaceOptions](interfaces/CreateNamespaceOptions.md)
|
||||
- [CreateNamespaceResponse](interfaces/CreateNamespaceResponse.md)
|
||||
- [CreateTableOptions](interfaces/CreateTableOptions.md)
|
||||
- [DeleteResult](interfaces/DeleteResult.md)
|
||||
- [DescribeNamespaceResponse](interfaces/DescribeNamespaceResponse.md)
|
||||
- [DirNamespaceConfig](interfaces/DirNamespaceConfig.md)
|
||||
- [DropColumnsResult](interfaces/DropColumnsResult.md)
|
||||
- [DropNamespaceOptions](interfaces/DropNamespaceOptions.md)
|
||||
- [DropNamespaceResponse](interfaces/DropNamespaceResponse.md)
|
||||
- [ExecutableQuery](interfaces/ExecutableQuery.md)
|
||||
- [FragmentStatistics](interfaces/FragmentStatistics.md)
|
||||
- [FragmentSummaryStats](interfaces/FragmentSummaryStats.md)
|
||||
@@ -78,19 +69,13 @@
|
||||
- [IvfFlatOptions](interfaces/IvfFlatOptions.md)
|
||||
- [IvfPqOptions](interfaces/IvfPqOptions.md)
|
||||
- [IvfRqOptions](interfaces/IvfRqOptions.md)
|
||||
- [ListNamespacesOptions](interfaces/ListNamespacesOptions.md)
|
||||
- [ListNamespacesResponse](interfaces/ListNamespacesResponse.md)
|
||||
- [LsmWriteSpec](interfaces/LsmWriteSpec.md)
|
||||
- [MergeResult](interfaces/MergeResult.md)
|
||||
- [OpenTableOptions](interfaces/OpenTableOptions.md)
|
||||
- [OptimizeOptions](interfaces/OptimizeOptions.md)
|
||||
- [OptimizeStats](interfaces/OptimizeStats.md)
|
||||
- [QueryExecutionOptions](interfaces/QueryExecutionOptions.md)
|
||||
- [RemovalStats](interfaces/RemovalStats.md)
|
||||
- [RenameTableOptions](interfaces/RenameTableOptions.md)
|
||||
- [RestNamespaceConfig](interfaces/RestNamespaceConfig.md)
|
||||
- [RetryConfig](interfaces/RetryConfig.md)
|
||||
- [ScannableOptions](interfaces/ScannableOptions.md)
|
||||
- [ShuffleOptions](interfaces/ShuffleOptions.md)
|
||||
- [SplitCalculatedOptions](interfaces/SplitCalculatedOptions.md)
|
||||
- [SplitHashOptions](interfaces/SplitHashOptions.md)
|
||||
@@ -105,7 +90,6 @@
|
||||
- [UpdateResult](interfaces/UpdateResult.md)
|
||||
- [Version](interfaces/Version.md)
|
||||
- [WriteExecutionOptions](interfaces/WriteExecutionOptions.md)
|
||||
- [WriteProgress](interfaces/WriteProgress.md)
|
||||
|
||||
## Type Aliases
|
||||
|
||||
@@ -123,7 +107,6 @@
|
||||
|
||||
- [RecordBatchIterator](functions/RecordBatchIterator.md)
|
||||
- [connect](functions/connect.md)
|
||||
- [connectNamespace](functions/connectNamespace.md)
|
||||
- [makeArrowTable](functions/makeArrowTable.md)
|
||||
- [packBits](functions/packBits.md)
|
||||
- [permutationBuilder](functions/permutationBuilder.md)
|
||||
|
||||
@@ -19,39 +19,3 @@ mode: "append" | "overwrite";
|
||||
If "append" (the default) then the new data will be added to the table
|
||||
|
||||
If "overwrite" then the new data will replace the existing data in the table.
|
||||
|
||||
***
|
||||
|
||||
### progress()
|
||||
|
||||
```ts
|
||||
progress: (progress) => void;
|
||||
```
|
||||
|
||||
Optional callback invoked periodically with write progress.
|
||||
|
||||
The callback is fired once per batch written and once more with
|
||||
`done: true` when the write completes. Calls are dispatched
|
||||
asynchronously to the JS event loop and never block the write — a slow
|
||||
callback will queue events rather than back-pressure the writer.
|
||||
|
||||
Errors thrown from the callback are logged with `console.warn` and
|
||||
swallowed — they do not abort the write.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **progress**: [`WriteProgress`](WriteProgress.md)
|
||||
|
||||
#### Returns
|
||||
|
||||
`void`
|
||||
|
||||
#### Example
|
||||
|
||||
```ts
|
||||
await table.add(data, {
|
||||
progress: (p) => {
|
||||
console.log(`${p.outputRows}/${p.totalRows ?? "?"} rows`);
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
@@ -53,18 +53,3 @@ optional tlsConfig: TlsConfig;
|
||||
```ts
|
||||
optional userAgent: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### userId?
|
||||
|
||||
```ts
|
||||
optional userId: string;
|
||||
```
|
||||
|
||||
User identifier for tracking purposes.
|
||||
|
||||
This is sent as the `x-lancedb-user-id` header in requests to LanceDB Cloud/Enterprise.
|
||||
It can be set directly, or via the `LANCEDB_USER_ID` environment variable.
|
||||
Alternatively, set `LANCEDB_USER_ID_ENV_KEY` to specify another environment
|
||||
variable that contains the user ID value.
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ColumnOrdering
|
||||
|
||||
# Interface: ColumnOrdering
|
||||
|
||||
## Properties
|
||||
|
||||
### ascending?
|
||||
|
||||
```ts
|
||||
optional ascending: boolean;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### columnName
|
||||
|
||||
```ts
|
||||
columnName: string;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### nullsFirst?
|
||||
|
||||
```ts
|
||||
optional nullsFirst: boolean;
|
||||
```
|
||||
@@ -1,54 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ConnectNamespaceOptions
|
||||
|
||||
# Interface: ConnectNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Extra properties for the backing namespace client.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
optional readConsistencyInterval: number;
|
||||
```
|
||||
|
||||
The interval, in seconds, at which to check for updates to the table
|
||||
from other processes. If None, then consistency is not checked. For
|
||||
performance reasons, this is the default. For strong consistency, set
|
||||
this to zero seconds. Then every read will check for updates from other
|
||||
processes. As a compromise, you can set this to a non-zero value for
|
||||
eventual consistency.
|
||||
|
||||
***
|
||||
|
||||
### session?
|
||||
|
||||
```ts
|
||||
optional session: Session;
|
||||
```
|
||||
|
||||
The session to use for this connection. Holds shared caches and other
|
||||
session-specific state.
|
||||
|
||||
***
|
||||
|
||||
### storageOptions?
|
||||
|
||||
```ts
|
||||
optional storageOptions: Record<string, string>;
|
||||
```
|
||||
|
||||
Configuration for object storage. The available options are described
|
||||
at https://docs.lancedb.com/storage/
|
||||
@@ -41,29 +41,6 @@ for testing purposes.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): use directory namespace manifests as the source
|
||||
of truth for table metadata. Existing directory-listed root tables are
|
||||
migrated into the manifest on access.
|
||||
|
||||
***
|
||||
|
||||
### namespaceClientProperties?
|
||||
|
||||
```ts
|
||||
optional namespaceClientProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
(For LanceDB OSS only): extra properties for the backing namespace
|
||||
client used by manifest-enabled native connections.
|
||||
|
||||
***
|
||||
|
||||
### readConsistencyInterval?
|
||||
|
||||
```ts
|
||||
@@ -112,4 +89,4 @@ optional storageOptions: Record<string, string>;
|
||||
|
||||
(For LanceDB OSS only): configuration for object storage.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / CreateNamespaceOptions
|
||||
|
||||
# Interface: CreateNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### mode?
|
||||
|
||||
```ts
|
||||
optional mode: "overwrite" | "create" | "exist_ok";
|
||||
```
|
||||
|
||||
Creation mode.
|
||||
|
||||
***
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
Properties to set on the new namespace.
|
||||
@@ -1,23 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / CreateNamespaceResponse
|
||||
|
||||
# Interface: CreateNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### transactionId?
|
||||
|
||||
```ts
|
||||
optional transactionId: string;
|
||||
```
|
||||
@@ -97,4 +97,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
|
||||
@@ -8,14 +8,6 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
numDeletedRows: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### version
|
||||
|
||||
```ts
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DescribeNamespaceResponse
|
||||
|
||||
# Interface: DescribeNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
@@ -1,47 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DirNamespaceConfig
|
||||
|
||||
# Interface: DirNamespaceConfig
|
||||
|
||||
Configuration for the built-in directory namespace (`"dir"`).
|
||||
|
||||
The directory namespace stores tables under a single root path (local
|
||||
filesystem or object storage URI). See
|
||||
[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
|
||||
less-common knobs live under [DirNamespaceConfig.extraProperties](DirNamespaceConfig.md#extraproperties).
|
||||
|
||||
## Properties
|
||||
|
||||
### extraProperties?
|
||||
|
||||
```ts
|
||||
optional extraProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Additional raw properties passed verbatim to the namespace
|
||||
implementation (e.g. `storage.*`, `credential_vendor.*`). Typed
|
||||
fields above take precedence on key collision.
|
||||
|
||||
***
|
||||
|
||||
### manifestEnabled?
|
||||
|
||||
```ts
|
||||
optional manifestEnabled: boolean;
|
||||
```
|
||||
|
||||
Whether to maintain a namespace manifest at the root. Required for
|
||||
child namespaces. Defaults to true on the impl side.
|
||||
|
||||
***
|
||||
|
||||
### root
|
||||
|
||||
```ts
|
||||
root: string;
|
||||
```
|
||||
|
||||
Root path or URI containing the LanceDB tables.
|
||||
@@ -1,27 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DropNamespaceOptions
|
||||
|
||||
# Interface: DropNamespaceOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### behavior?
|
||||
|
||||
```ts
|
||||
optional behavior: "restrict" | "cascade";
|
||||
```
|
||||
|
||||
Refuse to drop if non-empty (restrict) or drop recursively (cascade).
|
||||
|
||||
***
|
||||
|
||||
### mode?
|
||||
|
||||
```ts
|
||||
optional mode: "fail" | "skip";
|
||||
```
|
||||
|
||||
Whether to skip if the namespace doesn't exist, or fail.
|
||||
@@ -1,23 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / DropNamespaceResponse
|
||||
|
||||
# Interface: DropNamespaceResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### properties?
|
||||
|
||||
```ts
|
||||
optional properties: Record<string, string>;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### transactionId?
|
||||
|
||||
```ts
|
||||
optional transactionId: string[];
|
||||
```
|
||||
@@ -1,27 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ListNamespacesOptions
|
||||
|
||||
# Interface: ListNamespacesOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### limit?
|
||||
|
||||
```ts
|
||||
optional limit: number;
|
||||
```
|
||||
|
||||
An optional limit to the number of results to return.
|
||||
|
||||
***
|
||||
|
||||
### pageToken?
|
||||
|
||||
```ts
|
||||
optional pageToken: string;
|
||||
```
|
||||
|
||||
Token from a previous response for pagination.
|
||||
@@ -1,23 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ListNamespacesResponse
|
||||
|
||||
# Interface: ListNamespacesResponse
|
||||
|
||||
## Properties
|
||||
|
||||
### namespaces
|
||||
|
||||
```ts
|
||||
namespaces: string[];
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### pageToken?
|
||||
|
||||
```ts
|
||||
optional pageToken: string;
|
||||
```
|
||||
@@ -1,64 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / LsmWriteSpec
|
||||
|
||||
# Interface: LsmWriteSpec
|
||||
|
||||
Specification selecting Lance's MemWAL LSM-style write path for
|
||||
`mergeInsert`.
|
||||
|
||||
`specType` is `"bucket"`, `"identity"`, or `"unsharded"`. For `"bucket"`,
|
||||
`column` and `numBuckets` are required; for `"identity"`, `column` is
|
||||
required.
|
||||
|
||||
## Properties
|
||||
|
||||
### column?
|
||||
|
||||
```ts
|
||||
optional column: string;
|
||||
```
|
||||
|
||||
Bucket and identity variants: the sharding column.
|
||||
|
||||
***
|
||||
|
||||
### maintainedIndexes?
|
||||
|
||||
```ts
|
||||
optional maintainedIndexes: string[];
|
||||
```
|
||||
|
||||
Names of indexes the MemWAL should keep up to date during writes.
|
||||
|
||||
***
|
||||
|
||||
### numBuckets?
|
||||
|
||||
```ts
|
||||
optional numBuckets: number;
|
||||
```
|
||||
|
||||
Bucket variant: the number of buckets, in `[1, 1024]`.
|
||||
|
||||
***
|
||||
|
||||
### specType
|
||||
|
||||
```ts
|
||||
specType: "bucket" | "identity" | "unsharded";
|
||||
```
|
||||
|
||||
One of `"bucket"`, `"identity"`, or `"unsharded"`.
|
||||
|
||||
***
|
||||
|
||||
### writerConfigDefaults?
|
||||
|
||||
```ts
|
||||
optional writerConfigDefaults: Record<string, string>;
|
||||
```
|
||||
|
||||
Default `ShardWriter` configuration recorded in the MemWAL index.
|
||||
@@ -8,14 +8,6 @@
|
||||
|
||||
## Properties
|
||||
|
||||
### numAttempts
|
||||
|
||||
```ts
|
||||
numAttempts: number;
|
||||
```
|
||||
|
||||
***
|
||||
|
||||
### numDeletedRows
|
||||
|
||||
```ts
|
||||
|
||||
@@ -42,4 +42,4 @@ Configuration for object storage.
|
||||
Options already set on the connection will be inherited by the table,
|
||||
but can be overridden here.
|
||||
|
||||
The available options are described at https://docs.lancedb.com/storage/
|
||||
The available options are described at https://lancedb.github.io/lancedb/guides/storage/
|
||||
|
||||
@@ -37,12 +37,3 @@ tbl.optimize({cleanupOlderThan: new Date()});
|
||||
```ts
|
||||
deleteUnverified: boolean;
|
||||
```
|
||||
|
||||
Because they may be part of an in-progress transaction, files newer than
|
||||
7 days old are not deleted by default. If you are sure that there are no
|
||||
in-progress transactions, then you can set this to true to delete all
|
||||
files older than `cleanupOlderThan`.
|
||||
|
||||
**WARNING**: This should only be set to true if you can guarantee that
|
||||
no other process is currently working on this dataset. Otherwise the
|
||||
dataset could be put into a corrupted state.
|
||||
|
||||
@@ -1,29 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RenameTableOptions
|
||||
|
||||
# Interface: RenameTableOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### namespacePath?
|
||||
|
||||
```ts
|
||||
optional namespacePath: string[];
|
||||
```
|
||||
|
||||
The namespace path of the table being renamed. Defaults to the root
|
||||
namespace (`[]`) when omitted.
|
||||
|
||||
***
|
||||
|
||||
### newNamespacePath?
|
||||
|
||||
```ts
|
||||
optional newNamespacePath: string[];
|
||||
```
|
||||
|
||||
The namespace path to move the table to as part of the rename. When
|
||||
omitted the table stays in `namespacePath`.
|
||||
@@ -1,47 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / RestNamespaceConfig
|
||||
|
||||
# Interface: RestNamespaceConfig
|
||||
|
||||
Configuration for the built-in REST namespace (`"rest"`).
|
||||
|
||||
The REST namespace talks to a remote catalog server over HTTP. See
|
||||
[https://docs.lancedb.com/namespaces](https://docs.lancedb.com/namespaces) for the documented surface;
|
||||
less-common knobs (TLS, metrics) live under
|
||||
[RestNamespaceConfig.extraProperties](RestNamespaceConfig.md#extraproperties).
|
||||
|
||||
## Properties
|
||||
|
||||
### extraProperties?
|
||||
|
||||
```ts
|
||||
optional extraProperties: Record<string, string>;
|
||||
```
|
||||
|
||||
Additional raw properties passed verbatim to the namespace
|
||||
implementation (e.g. `tls.*`, `ops_metrics_enabled`, `delimiter`).
|
||||
Typed fields above take precedence on key collision.
|
||||
|
||||
***
|
||||
|
||||
### headers?
|
||||
|
||||
```ts
|
||||
optional headers: Record<string, string>;
|
||||
```
|
||||
|
||||
HTTP headers forwarded with each request. Keys are passed through
|
||||
as-is (e.g. `"x-api-key"`, `"Authorization"`).
|
||||
|
||||
***
|
||||
|
||||
### uri
|
||||
|
||||
```ts
|
||||
uri: string;
|
||||
```
|
||||
|
||||
Catalog endpoint URL.
|
||||
@@ -1,29 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / ScannableOptions
|
||||
|
||||
# Interface: ScannableOptions
|
||||
|
||||
## Properties
|
||||
|
||||
### numRows?
|
||||
|
||||
```ts
|
||||
optional numRows: number;
|
||||
```
|
||||
|
||||
Hint about the number of rows. Not validated against the stream.
|
||||
|
||||
***
|
||||
|
||||
### rescannable?
|
||||
|
||||
```ts
|
||||
optional rescannable: boolean;
|
||||
```
|
||||
|
||||
Whether the source can be scanned more than once. Defaults to `true` for
|
||||
`fromTable` / `fromFactory` and `false` for `fromIterable` /
|
||||
`fromRecordBatchReader`.
|
||||
@@ -1,84 +0,0 @@
|
||||
[**@lancedb/lancedb**](../README.md) • **Docs**
|
||||
|
||||
***
|
||||
|
||||
[@lancedb/lancedb](../globals.md) / WriteProgress
|
||||
|
||||
# Interface: WriteProgress
|
||||
|
||||
Progress snapshot for a write operation, delivered to the `progress`
|
||||
callback passed to [Table.add](../classes/Table.md#add).
|
||||
|
||||
## Properties
|
||||
|
||||
### activeTasks
|
||||
|
||||
```ts
|
||||
activeTasks: number;
|
||||
```
|
||||
|
||||
Number of parallel write tasks currently in flight.
|
||||
|
||||
***
|
||||
|
||||
### done
|
||||
|
||||
```ts
|
||||
done: boolean;
|
||||
```
|
||||
|
||||
`true` for the final callback; `false` otherwise.
|
||||
|
||||
***
|
||||
|
||||
### elapsedSeconds
|
||||
|
||||
```ts
|
||||
elapsedSeconds: number;
|
||||
```
|
||||
|
||||
Wall-clock seconds since the write started.
|
||||
|
||||
***
|
||||
|
||||
### outputBytes
|
||||
|
||||
```ts
|
||||
outputBytes: number;
|
||||
```
|
||||
|
||||
Number of bytes written so far.
|
||||
|
||||
***
|
||||
|
||||
### outputRows
|
||||
|
||||
```ts
|
||||
outputRows: number;
|
||||
```
|
||||
|
||||
Number of rows written so far.
|
||||
|
||||
***
|
||||
|
||||
### totalRows?
|
||||
|
||||
```ts
|
||||
optional totalRows: number;
|
||||
```
|
||||
|
||||
Total rows expected, when the input source reports it.
|
||||
|
||||
Always set on the final callback (the one with `done: true`), falling
|
||||
back to the actual number of rows written when the source could not
|
||||
report a row count up front.
|
||||
|
||||
***
|
||||
|
||||
### totalTasks
|
||||
|
||||
```ts
|
||||
totalTasks: number;
|
||||
```
|
||||
|
||||
Total number of parallel write tasks (the write parallelism).
|
||||
@@ -52,7 +52,7 @@ new EmbeddingFunction<T, M>(): EmbeddingFunction<T, M>
|
||||
### computeQueryEmbeddings()
|
||||
|
||||
```ts
|
||||
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||
```
|
||||
|
||||
Compute the embeddings for a single query
|
||||
@@ -63,7 +63,7 @@ Compute the embeddings for a single query
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
||||
|
||||
***
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ new TextEmbeddingFunction<M>(): TextEmbeddingFunction<M>
|
||||
### computeQueryEmbeddings()
|
||||
|
||||
```ts
|
||||
computeQueryEmbeddings(data): Promise<number[] | Uint8Array | Float32Array | Float64Array>
|
||||
computeQueryEmbeddings(data): Promise<number[] | Float32Array | Float64Array>
|
||||
```
|
||||
|
||||
Compute the embeddings for a single query
|
||||
@@ -48,7 +48,7 @@ Compute the embeddings for a single query
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`number`[] \| `Uint8Array` \| `Float32Array` \| `Float64Array`>
|
||||
`Promise`<`number`[] \| `Float32Array` \| `Float64Array`>
|
||||
|
||||
#### Overrides
|
||||
|
||||
|
||||
@@ -7,10 +7,5 @@
|
||||
# Type Alias: IntoVector
|
||||
|
||||
```ts
|
||||
type IntoVector:
|
||||
| Float32Array
|
||||
| Float64Array
|
||||
| Uint8Array
|
||||
| number[]
|
||||
| Promise<Float32Array | Float64Array | Uint8Array | number[]>;
|
||||
type IntoVector: Float32Array | Float64Array | number[] | Promise<Float32Array | Float64Array | number[]>;
|
||||
```
|
||||
|
||||
@@ -30,26 +30,6 @@ is also an [asynchronous API client](#connections-asynchronous).
|
||||
|
||||
::: lancedb.table.Table
|
||||
|
||||
::: lancedb.table.FragmentStatistics
|
||||
|
||||
::: lancedb.table.FragmentSummaryStats
|
||||
|
||||
::: lancedb.table.Tags
|
||||
|
||||
## Expressions
|
||||
|
||||
Type-safe expression builder for filters and projections. Use these instead
|
||||
of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||
[select][lancedb.query.LanceQueryBuilder.select].
|
||||
|
||||
::: lancedb.expr.Expr
|
||||
|
||||
::: lancedb.expr.col
|
||||
|
||||
::: lancedb.expr.lit
|
||||
|
||||
::: lancedb.expr.func
|
||||
|
||||
## Querying (Synchronous)
|
||||
|
||||
::: lancedb.query.Query
|
||||
@@ -78,14 +58,6 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||
|
||||
::: lancedb.embeddings.open_clip.OpenClipEmbeddings
|
||||
|
||||
## Remote configuration
|
||||
|
||||
::: lancedb.remote.ClientConfig
|
||||
|
||||
::: lancedb.remote.TimeoutConfig
|
||||
|
||||
::: lancedb.remote.RetryConfig
|
||||
|
||||
## Context
|
||||
|
||||
::: lancedb.context.contextualize
|
||||
@@ -94,11 +66,11 @@ of raw SQL strings with [where][lancedb.query.LanceQueryBuilder.where] and
|
||||
|
||||
## Full text search
|
||||
|
||||
Use [lancedb.table.Table.create_fts_index][] for the synchronous API or
|
||||
[lancedb.table.AsyncTable.create_index][] with [lancedb.index.FTS][] for the
|
||||
asynchronous API.
|
||||
::: lancedb.fts.create_index
|
||||
|
||||
::: lancedb.index.FTS
|
||||
::: lancedb.fts.populate_index
|
||||
|
||||
::: lancedb.fts.search_index
|
||||
|
||||
## Utilities
|
||||
|
||||
@@ -143,8 +115,6 @@ Table hold your actual data as a collection of records / rows.
|
||||
|
||||
::: lancedb.table.AsyncTable
|
||||
|
||||
::: lancedb.table.AsyncTags
|
||||
|
||||
## Indices (Asynchronous)
|
||||
|
||||
Indices can be created on a table to speed up queries. This section
|
||||
@@ -166,14 +136,6 @@ lists the indices that LanceDb supports.
|
||||
|
||||
::: lancedb.index.IvfFlat
|
||||
|
||||
::: lancedb.index.IvfSq
|
||||
|
||||
::: lancedb.index.IvfRq
|
||||
|
||||
::: lancedb.index.HnswFlat
|
||||
|
||||
::: lancedb.table.IndexStatistics
|
||||
|
||||
## Querying (Asynchronous)
|
||||
|
||||
Queries allow you to return data from your database. Basic queries can be
|
||||
|
||||
@@ -85,26 +85,17 @@
|
||||
|
||||
/* Header gradient (only header area) */
|
||||
.md-header {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
box-shadow: inset 0 1px 0 rgba(255,255,255,0.08), 0 1px 0 rgba(0,0,0,0.08);
|
||||
}
|
||||
|
||||
/* Improve brand title contrast on the lavender side */
|
||||
.md-header__title,
|
||||
.md-header__topic,
|
||||
.md-header__title .md-ellipsis,
|
||||
.md-header__topic .md-ellipsis {
|
||||
color: #2b1b3a;
|
||||
text-shadow: 0 1px 0 rgba(255, 255, 255, 0.25);
|
||||
}
|
||||
|
||||
/* Same colors as header for tabs (that hold the text) */
|
||||
.md-tabs {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
}
|
||||
|
||||
/* Dark scheme variant */
|
||||
[data-md-color-scheme="slate"] .md-header,
|
||||
[data-md-color-scheme="slate"] .md-tabs {
|
||||
background: linear-gradient(90deg, #e4d8f8 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
background: linear-gradient(90deg, #3B2E58 0%, #F0B7C1 45%, #E55A2B 100%);
|
||||
}
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
|
||||
List of third-party dependencies grouped by their license type.
|
||||
|
||||
Apache 2.0:
|
||||
|
||||
* error-prone annotations (com.google.errorprone:error_prone_annotations:2.28.0 - https://errorprone.info/error_prone_annotations)
|
||||
|
||||
Apache License 2.0:
|
||||
|
||||
* JsonNullable Jackson module (org.openapitools:jackson-databind-nullable:0.2.6 - https://github.com/OpenAPITools/jackson-databind-nullable)
|
||||
|
||||
Apache License V2.0:
|
||||
|
||||
* FlatBuffers Java API (com.google.flatbuffers:flatbuffers-java:23.5.26 - https://github.com/google/flatbuffers)
|
||||
|
||||
Apache License, Version 2.0:
|
||||
|
||||
* Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/)
|
||||
* Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.2.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.2.1/httpclient5/)
|
||||
* Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5/)
|
||||
* Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5-h2/)
|
||||
* Arrow Format (org.apache.arrow:arrow-format:15.0.0 - https://arrow.apache.org/arrow-format/)
|
||||
* Arrow Java C Data Interface (org.apache.arrow:arrow-c-data:15.0.0 - https://arrow.apache.org/arrow-c-data/)
|
||||
* Arrow Java Dataset (org.apache.arrow:arrow-dataset:15.0.0 - https://arrow.apache.org/arrow-dataset/)
|
||||
* Arrow Memory - Core (org.apache.arrow:arrow-memory-core:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-core/)
|
||||
* Arrow Memory - Netty (org.apache.arrow:arrow-memory-netty:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-netty/)
|
||||
* Arrow Vectors (org.apache.arrow:arrow-vector:15.0.0 - https://arrow.apache.org/arrow-vector/)
|
||||
* Guava: Google Core Libraries for Java (com.google.guava:guava:33.3.1-jre - https://github.com/google/guava)
|
||||
* J2ObjC Annotations (com.google.j2objc:j2objc-annotations:3.0.0 - https://github.com/google/j2objc/)
|
||||
* Netty/Buffer (io.netty:netty-buffer:4.1.104.Final - https://netty.io/netty-buffer/)
|
||||
* Netty/Common (io.netty:netty-common:4.1.104.Final - https://netty.io/netty-common/)
|
||||
|
||||
Apache-2.0:
|
||||
|
||||
* Apache Commons Lang (org.apache.commons:commons-lang3:3.18.0 - https://commons.apache.org/proper/commons-lang/)
|
||||
* lance-namespace-apache-client (org.lance:lance-namespace-apache-client:0.4.5 - https://github.com/openapitools/openapi-generator)
|
||||
* lance-namespace-core (org.lance:lance-namespace-core:0.4.5 - https://lance.org/format/namespace/lance-namespace-core/)
|
||||
|
||||
EDL 1.0:
|
||||
|
||||
* Jakarta Activation API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api)
|
||||
|
||||
Eclipse Distribution License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
* Jakarta XML Binding API (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api)
|
||||
|
||||
Eclipse Public License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
|
||||
The Apache Software License, Version 2.0:
|
||||
|
||||
* FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/)
|
||||
* Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess)
|
||||
* Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture)
|
||||
* Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.16.0 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310)
|
||||
* Jackson module: Old JAXB Annotations (javax.xml.bind) (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.17.1 - https://github.com/FasterXML/jackson-modules-base)
|
||||
* Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.16.0 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.0 - https://github.com/FasterXML/jackson-core)
|
||||
* jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-JAXRS: base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base)
|
||||
* Jackson-JAXRS: JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider)
|
||||
* JAR JNI Loader (org.questdb:jar-jni:1.1.1 - https://github.com/questdb/rust-maven-plugin)
|
||||
* Lance Core (org.lance:lance-core:2.0.0 - https://lance.org/)
|
||||
|
||||
The MIT License:
|
||||
|
||||
* Checker Qual (org.checkerframework:checker-qual:3.43.0 - https://checkerframework.org/)
|
||||
@@ -1,28 +0,0 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
.PHONY: build-lancedb
|
||||
build-lancedb:
|
||||
./mvnw spotless:apply -pl lancedb-core -am
|
||||
./mvnw install -pl lancedb-core -am
|
||||
|
||||
.PHONY: test-lancedb
|
||||
test-lancedb:
|
||||
# Requires LANCEDB_DB and LANCEDB_API_KEY environment variables
|
||||
./mvnw test -pl lancedb-core -P integration-tests
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
./mvnw clean
|
||||
|
||||
.PHONY: build
|
||||
build: build-lancedb
|
||||
@@ -1,4 +1,4 @@
|
||||
# LanceDB Java Enterprise Client
|
||||
# LanceDB Java SDK
|
||||
|
||||
## Configuration and Initialization
|
||||
|
||||
@@ -7,11 +7,10 @@
|
||||
For LanceDB Cloud, use the simplified builder API:
|
||||
|
||||
```java
|
||||
import com.lancedb.LanceDbNamespaceClientBuilder;
|
||||
import org.lance.namespace.LanceNamespace;
|
||||
import com.lancedb.lance.namespace.LanceRestNamespace;
|
||||
|
||||
// If your DB url is db://example-db, then your database here is example-db
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||
.apiKey("your_lancedb_cloud_api_key")
|
||||
.database("your_database_name")
|
||||
.build();
|
||||
@@ -19,13 +18,13 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
|
||||
### LanceDB Enterprise
|
||||
|
||||
For Enterprise deployments, use your custom endpoint:
|
||||
For Enterprise deployments, use your VPC endpoint:
|
||||
|
||||
```java
|
||||
LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
LanceRestNamespace namespace = LanceDBRestNamespaces.builder()
|
||||
.apiKey("your_lancedb_enterprise_api_key")
|
||||
.database("your_database_name")
|
||||
.endpoint("<your_enterprise_endpoint>")
|
||||
.database("your-top-dir") // Your top level folder under your cloud bucket, e.g. s3://your-bucket/your-top-dir/
|
||||
.hostOverride("http://<vpc_endpoint_dns_name>:80")
|
||||
.build();
|
||||
```
|
||||
|
||||
@@ -34,11 +33,5 @@ LanceNamespace namespaceClient = LanceDbNamespaceClientBuilder.newBuilder()
|
||||
Build:
|
||||
|
||||
```shell
|
||||
./mvnw install -pl lancedb-core -am
|
||||
```
|
||||
|
||||
Run tests:
|
||||
|
||||
```shell
|
||||
./mvnw test -pl lancedb-core
|
||||
```
|
||||
./mvnw install
|
||||
```
|
||||
30
java/core/lancedb-jni/Cargo.toml
Normal file
30
java/core/lancedb-jni/Cargo.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "lancedb-jni"
|
||||
description = "JNI bindings for LanceDB"
|
||||
# TODO modify lancedb/Cargo.toml for version and dependencies
|
||||
version = "0.10.0"
|
||||
edition.workspace = true
|
||||
repository.workspace = true
|
||||
readme.workspace = true
|
||||
license.workspace = true
|
||||
keywords.workspace = true
|
||||
categories.workspace = true
|
||||
publish = false
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
lancedb = { path = "../../../rust/lancedb", default-features = false }
|
||||
lance = { workspace = true }
|
||||
arrow = { workspace = true, features = ["ffi"] }
|
||||
arrow-schema.workspace = true
|
||||
tokio = "1.46"
|
||||
jni = "0.21.1"
|
||||
snafu.workspace = true
|
||||
lazy_static.workspace = true
|
||||
serde = { version = "^1" }
|
||||
serde_json = { version = "1" }
|
||||
|
||||
[features]
|
||||
default = ["lancedb/default"]
|
||||
133
java/core/lancedb-jni/src/connection.rs
Normal file
133
java/core/lancedb-jni/src/connection.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use crate::ffi::JNIEnvExt;
|
||||
use crate::traits::IntoJava;
|
||||
use crate::{Error, RT};
|
||||
use jni::objects::{JObject, JString, JValue};
|
||||
use jni::JNIEnv;
|
||||
pub const NATIVE_CONNECTION: &str = "nativeConnectionHandle";
|
||||
use crate::Result;
|
||||
use lancedb::connection::{connect, Connection};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BlockingConnection {
|
||||
pub(crate) inner: Connection,
|
||||
}
|
||||
|
||||
impl BlockingConnection {
|
||||
pub fn create(dataset_uri: &str) -> Result<Self> {
|
||||
let inner = RT.block_on(connect(dataset_uri).execute())?;
|
||||
Ok(Self { inner })
|
||||
}
|
||||
|
||||
pub fn table_names(
|
||||
&self,
|
||||
start_after: Option<String>,
|
||||
limit: Option<i32>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut op = self.inner.table_names();
|
||||
if let Some(start_after) = start_after {
|
||||
op = op.start_after(start_after);
|
||||
}
|
||||
if let Some(limit) = limit {
|
||||
op = op.limit(limit as u32);
|
||||
}
|
||||
Ok(RT.block_on(op.execute())?)
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoJava for BlockingConnection {
|
||||
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a> {
|
||||
attach_native_connection(env, self)
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_native_connection<'local>(
|
||||
env: &mut JNIEnv<'local>,
|
||||
connection: BlockingConnection,
|
||||
) -> JObject<'local> {
|
||||
let j_connection = create_java_connection_object(env);
|
||||
// This block sets a native Rust object (Connection) as a field in the Java object (j_Connection).
|
||||
// Caution: This creates a potential for memory leaks. The Rust object (Connection) is not
|
||||
// automatically garbage-collected by Java, and its memory will not be freed unless
|
||||
// explicitly handled.
|
||||
//
|
||||
// To prevent memory leaks, ensure the following:
|
||||
// 1. The Java object (`j_Connection`) should implement the `java.io.Closeable` interface.
|
||||
// 2. Users of this Java object should be instructed to always use it within a try-with-resources
|
||||
// statement (or manually call the `close()` method) to ensure that `self.close()` is invoked.
|
||||
match unsafe { env.set_rust_field(&j_connection, NATIVE_CONNECTION, connection) } {
|
||||
Ok(_) => j_connection,
|
||||
Err(err) => {
|
||||
env.throw_new(
|
||||
"java/lang/RuntimeException",
|
||||
format!("Failed to set native handle for Connection: {}", err),
|
||||
)
|
||||
.expect("Error throwing exception");
|
||||
JObject::null()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn create_java_connection_object<'a>(env: &mut JNIEnv<'a>) -> JObject<'a> {
|
||||
env.new_object("com/lancedb/lancedb/Connection", "()V", &[])
|
||||
.expect("Failed to create Java Lance Connection instance")
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_releaseNativeConnection(
|
||||
mut env: JNIEnv,
|
||||
j_connection: JObject,
|
||||
) {
|
||||
let _: BlockingConnection = unsafe {
|
||||
env.take_rust_field(j_connection, NATIVE_CONNECTION)
|
||||
.expect("Failed to take native Connection handle")
|
||||
};
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_connect<'local>(
|
||||
mut env: JNIEnv<'local>,
|
||||
_obj: JObject,
|
||||
dataset_uri_object: JString,
|
||||
) -> JObject<'local> {
|
||||
let dataset_uri: String = ok_or_throw!(env, env.get_string(&dataset_uri_object)).into();
|
||||
let blocking_connection = ok_or_throw!(env, BlockingConnection::create(&dataset_uri));
|
||||
blocking_connection.into_java(&mut env)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lancedb_Connection_tableNames<'local>(
|
||||
mut env: JNIEnv<'local>,
|
||||
j_connection: JObject,
|
||||
start_after_obj: JObject, // Optional<String>
|
||||
limit_obj: JObject, // Optional<Integer>
|
||||
) -> JObject<'local> {
|
||||
ok_or_throw!(
|
||||
env,
|
||||
inner_table_names(&mut env, j_connection, start_after_obj, limit_obj)
|
||||
)
|
||||
}
|
||||
|
||||
fn inner_table_names<'local>(
|
||||
env: &mut JNIEnv<'local>,
|
||||
j_connection: JObject,
|
||||
start_after_obj: JObject, // Optional<String>
|
||||
limit_obj: JObject, // Optional<Integer>
|
||||
) -> Result<JObject<'local>> {
|
||||
let start_after = env.get_string_opt(&start_after_obj)?;
|
||||
let limit = env.get_int_opt(&limit_obj)?;
|
||||
let conn =
|
||||
unsafe { env.get_rust_field::<_, _, BlockingConnection>(j_connection, NATIVE_CONNECTION) }?;
|
||||
let table_names = conn.table_names(start_after, limit)?;
|
||||
drop(conn);
|
||||
let j_names = env.new_object("java/util/ArrayList", "()V", &[])?;
|
||||
for item in table_names {
|
||||
let jstr_item = env.new_string(item)?;
|
||||
let item_jobj = JObject::from(jstr_item);
|
||||
let item_gen = JValue::Object(&item_jobj);
|
||||
env.call_method(&j_names, "add", "(Ljava/lang/Object;)Z", &[item_gen])?;
|
||||
}
|
||||
Ok(j_names)
|
||||
}
|
||||
217
java/core/lancedb-jni/src/error.rs
Normal file
217
java/core/lancedb-jni/src/error.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::str::Utf8Error;
|
||||
|
||||
use arrow_schema::ArrowError;
|
||||
use jni::errors::Error as JniError;
|
||||
use serde_json::Error as JsonError;
|
||||
use snafu::{Location, Snafu};
|
||||
|
||||
type BoxedError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||
|
||||
/// Java Exception types
|
||||
pub enum JavaException {
|
||||
IllegalArgumentException,
|
||||
IOException,
|
||||
RuntimeException,
|
||||
}
|
||||
|
||||
impl JavaException {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::IllegalArgumentException => "java/lang/IllegalArgumentException",
|
||||
Self::IOException => "java/io/IOException",
|
||||
Self::RuntimeException => "java/lang/RuntimeException",
|
||||
}
|
||||
}
|
||||
}
|
||||
/// TODO(lu) change to lancedb-jni
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub))]
|
||||
pub enum Error {
|
||||
#[snafu(display("JNI error: {message}, {location}"))]
|
||||
Jni { message: String, location: Location },
|
||||
#[snafu(display("Invalid argument: {message}, {location}"))]
|
||||
InvalidArgument { message: String, location: Location },
|
||||
#[snafu(display("IO error: {source}, {location}"))]
|
||||
IO {
|
||||
source: BoxedError,
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Arrow error: {message}, {location}"))]
|
||||
Arrow { message: String, location: Location },
|
||||
#[snafu(display("Index error: {message}, {location}"))]
|
||||
Index { message: String, location: Location },
|
||||
#[snafu(display("JSON error: {message}, {location}"))]
|
||||
JSON { message: String, location: Location },
|
||||
#[snafu(display("Dataset at path {path} was not found, {location}"))]
|
||||
DatasetNotFound { path: String, location: Location },
|
||||
#[snafu(display("Dataset already exists: {uri}, {location}"))]
|
||||
DatasetAlreadyExists { uri: String, location: Location },
|
||||
#[snafu(display("Table '{name}' already exists"))]
|
||||
TableAlreadyExists { name: String },
|
||||
#[snafu(display("Table '{name}' was not found: {source}"))]
|
||||
TableNotFound {
|
||||
name: String,
|
||||
source: Box<dyn std::error::Error + Send + Sync>,
|
||||
},
|
||||
#[snafu(display("Invalid table name '{name}': {reason}"))]
|
||||
InvalidTableName { name: String, reason: String },
|
||||
#[snafu(display("Embedding function '{name}' was not found: {reason}, {location}"))]
|
||||
EmbeddingFunctionNotFound {
|
||||
name: String,
|
||||
reason: String,
|
||||
location: Location,
|
||||
},
|
||||
#[snafu(display("Other Lance error: {message}, {location}"))]
|
||||
OtherLance { message: String, location: Location },
|
||||
#[snafu(display("Other LanceDB error: {message}, {location}"))]
|
||||
OtherLanceDB { message: String, location: Location },
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Throw as Java Exception
|
||||
pub fn throw(&self, env: &mut jni::JNIEnv) {
|
||||
match self {
|
||||
Self::InvalidArgument { .. }
|
||||
| Self::DatasetNotFound { .. }
|
||||
| Self::DatasetAlreadyExists { .. }
|
||||
| Self::TableAlreadyExists { .. }
|
||||
| Self::TableNotFound { .. }
|
||||
| Self::InvalidTableName { .. }
|
||||
| Self::EmbeddingFunctionNotFound { .. } => {
|
||||
self.throw_as(env, JavaException::IllegalArgumentException)
|
||||
}
|
||||
Self::IO { .. } | Self::Index { .. } => self.throw_as(env, JavaException::IOException),
|
||||
Self::Arrow { .. }
|
||||
| Self::JSON { .. }
|
||||
| Self::OtherLance { .. }
|
||||
| Self::OtherLanceDB { .. }
|
||||
| Self::Jni { .. } => self.throw_as(env, JavaException::RuntimeException),
|
||||
}
|
||||
}
|
||||
|
||||
/// Throw as an concrete Java Exception
|
||||
pub fn throw_as(&self, env: &mut jni::JNIEnv, exception: JavaException) {
|
||||
let message = &format!(
|
||||
"Error when throwing Java exception: {}:{}",
|
||||
exception.as_str(),
|
||||
self
|
||||
);
|
||||
env.throw_new(exception.as_str(), self.to_string())
|
||||
.expect(message);
|
||||
}
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
trait ToSnafuLocation {
|
||||
fn to_snafu_location(&'static self) -> snafu::Location;
|
||||
}
|
||||
|
||||
impl ToSnafuLocation for std::panic::Location<'static> {
|
||||
fn to_snafu_location(&'static self) -> snafu::Location {
|
||||
snafu::Location::new(self.file(), self.line(), self.column())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JniError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: JniError) -> Self {
|
||||
Self::Jni {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: Utf8Error) -> Self {
|
||||
Self::InvalidArgument {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ArrowError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: ArrowError) -> Self {
|
||||
Self::Arrow {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<JsonError> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: JsonError) -> Self {
|
||||
Self::JSON {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lance::Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: lance::Error) -> Self {
|
||||
match source {
|
||||
lance::Error::DatasetNotFound {
|
||||
path,
|
||||
source: _,
|
||||
location,
|
||||
} => Self::DatasetNotFound { path, location },
|
||||
lance::Error::DatasetAlreadyExists { uri, location } => {
|
||||
Self::DatasetAlreadyExists { uri, location }
|
||||
}
|
||||
lance::Error::IO { source, location } => Self::IO { source, location },
|
||||
lance::Error::Arrow { message, location } => Self::Arrow { message, location },
|
||||
lance::Error::Index { message, location } => Self::Index { message, location },
|
||||
lance::Error::InvalidInput { source, location } => Self::InvalidArgument {
|
||||
message: source.to_string(),
|
||||
location,
|
||||
},
|
||||
_ => Self::OtherLance {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::Error> for Error {
|
||||
#[track_caller]
|
||||
fn from(source: lancedb::Error) -> Self {
|
||||
match source {
|
||||
lancedb::Error::InvalidTableName { name, reason } => {
|
||||
Self::InvalidTableName { name, reason }
|
||||
}
|
||||
lancedb::Error::InvalidInput { message } => Self::InvalidArgument {
|
||||
message,
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
lancedb::Error::TableNotFound { name, source } => Self::TableNotFound { name, source },
|
||||
lancedb::Error::TableAlreadyExists { name } => Self::TableAlreadyExists { name },
|
||||
lancedb::Error::EmbeddingFunctionNotFound { name, reason } => {
|
||||
Self::EmbeddingFunctionNotFound {
|
||||
name,
|
||||
reason,
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
}
|
||||
}
|
||||
lancedb::Error::Arrow { source } => Self::Arrow {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
lancedb::Error::Lance { source } => Self::from(source),
|
||||
_ => Self::OtherLanceDB {
|
||||
message: source.to_string(),
|
||||
location: std::panic::Location::caller().to_snafu_location(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
194
java/core/lancedb-jni/src/ffi.rs
Normal file
194
java/core/lancedb-jni/src/ffi.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use core::slice;
|
||||
|
||||
use jni::objects::{JByteBuffer, JObjectArray, JString};
|
||||
use jni::sys::jobjectArray;
|
||||
use jni::{objects::JObject, JNIEnv};
|
||||
|
||||
use crate::error::{Error, Result};
|
||||
|
||||
/// TODO(lu) import from lance-jni without duplicate
|
||||
/// Extend JNIEnv with helper functions.
|
||||
pub trait JNIEnvExt {
|
||||
/// Get integers from Java List<Integer> object.
|
||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>>;
|
||||
|
||||
/// Get strings from Java List<String> object.
|
||||
#[allow(dead_code)]
|
||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>>;
|
||||
|
||||
/// Get strings from Java String[] object.
|
||||
/// Note that get Option<Vec<String>> from Java Optional<String[]> just doesn't work.
|
||||
#[allow(unused)]
|
||||
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>>;
|
||||
|
||||
/// Get Option<String> from Java Optional<String>.
|
||||
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>>;
|
||||
|
||||
/// Get Option<Vec<String>> from Java Optional<List<String>>.
|
||||
#[allow(unused)]
|
||||
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>>;
|
||||
|
||||
/// Get Option<i32> from Java Optional<Integer>.
|
||||
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>>;
|
||||
|
||||
/// Get Option<Vec<i32>> from Java Optional<List<Integer>>.
|
||||
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>>;
|
||||
|
||||
/// Get Option<i64> from Java Optional<Long>.
|
||||
#[allow(unused)]
|
||||
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>>;
|
||||
|
||||
/// Get Option<u64> from Java Optional<Long>.
|
||||
#[allow(unused)]
|
||||
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>>;
|
||||
|
||||
/// Get Option<&[u8]> from Java Optional<ByteBuffer>.
|
||||
#[allow(unused)]
|
||||
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>>;
|
||||
|
||||
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
|
||||
where
|
||||
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>;
|
||||
}
|
||||
|
||||
impl JNIEnvExt for JNIEnv<'_> {
|
||||
fn get_integers(&mut self, obj: &JObject) -> Result<Vec<i32>> {
|
||||
let list = self.get_list(obj)?;
|
||||
let mut iter = list.iter(self)?;
|
||||
let mut results = Vec::with_capacity(list.size(self)? as usize);
|
||||
while let Some(elem) = iter.next(self)? {
|
||||
let int_obj = self.call_method(elem, "intValue", "()I", &[])?;
|
||||
let int_value = int_obj.i()?;
|
||||
results.push(int_value);
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn get_strings(&mut self, obj: &JObject) -> Result<Vec<String>> {
|
||||
let list = self.get_list(obj)?;
|
||||
let mut iter = list.iter(self)?;
|
||||
let mut results = Vec::with_capacity(list.size(self)? as usize);
|
||||
while let Some(elem) = iter.next(self)? {
|
||||
let jstr = JString::from(elem);
|
||||
let val = self.get_string(&jstr)?;
|
||||
results.push(val.to_str()?.to_string())
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn get_strings_array(&mut self, obj: jobjectArray) -> Result<Vec<String>> {
|
||||
let jobject_array = unsafe { JObjectArray::from_raw(obj) };
|
||||
let array_len = self.get_array_length(&jobject_array)?;
|
||||
let mut res: Vec<String> = Vec::new();
|
||||
for i in 0..array_len {
|
||||
let item: JString = self.get_object_array_element(&jobject_array, i)?.into();
|
||||
res.push(self.get_string(&item)?.into());
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn get_string_opt(&mut self, obj: &JObject) -> Result<Option<String>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_string_obj = java_obj_gen.l()?;
|
||||
let jstr = JString::from(java_string_obj);
|
||||
let val = env.get_string(&jstr)?;
|
||||
Ok(val.to_str()?.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn get_strings_opt(&mut self, obj: &JObject) -> Result<Option<Vec<String>>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_list_obj = java_obj_gen.l()?;
|
||||
env.get_strings(&java_list_obj)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_int_opt(&mut self, obj: &JObject) -> Result<Option<i32>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_int_obj = java_obj_gen.l()?;
|
||||
let int_obj = env.call_method(java_int_obj, "intValue", "()I", &[])?;
|
||||
let int_value = int_obj.i()?;
|
||||
Ok(int_value)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_ints_opt(&mut self, obj: &JObject) -> Result<Option<Vec<i32>>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_list_obj = java_obj_gen.l()?;
|
||||
env.get_integers(&java_list_obj)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_long_opt(&mut self, obj: &JObject) -> Result<Option<i64>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_long_obj = java_obj_gen.l()?;
|
||||
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
|
||||
let long_value = long_obj.j()?;
|
||||
Ok(long_value)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_u64_opt(&mut self, obj: &JObject) -> Result<Option<u64>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_long_obj = java_obj_gen.l()?;
|
||||
let long_obj = env.call_method(java_long_obj, "longValue", "()J", &[])?;
|
||||
let long_value = long_obj.j()?;
|
||||
Ok(long_value as u64)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_bytes_opt(&mut self, obj: &JObject) -> Result<Option<&[u8]>> {
|
||||
self.get_optional(obj, |env, inner_obj| {
|
||||
let java_obj_gen = env.call_method(inner_obj, "get", "()Ljava/lang/Object;", &[])?;
|
||||
let java_byte_buffer_obj = java_obj_gen.l()?;
|
||||
let j_byte_buffer = JByteBuffer::from(java_byte_buffer_obj);
|
||||
let raw_data = env.get_direct_buffer_address(&j_byte_buffer)?;
|
||||
let capacity = env.get_direct_buffer_capacity(&j_byte_buffer)?;
|
||||
let data = unsafe { slice::from_raw_parts(raw_data, capacity) };
|
||||
Ok(data)
|
||||
})
|
||||
}
|
||||
|
||||
fn get_optional<T, F>(&mut self, obj: &JObject, f: F) -> Result<Option<T>>
|
||||
where
|
||||
F: FnOnce(&mut JNIEnv, &JObject) -> Result<T>,
|
||||
{
|
||||
if obj.is_null() {
|
||||
return Ok(None);
|
||||
}
|
||||
let is_present = self.call_method(obj, "isPresent", "()Z", &[])?;
|
||||
if !is_present.z()? {
|
||||
// TODO(lu): put get java object into here cuz can only get java Object
|
||||
Ok(None)
|
||||
} else {
|
||||
f(self, obj).map(Some)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseInts(
|
||||
mut env: JNIEnv,
|
||||
_obj: JObject,
|
||||
list_obj: JObject, // List<Integer>
|
||||
) {
|
||||
ok_or_throw_without_return!(env, env.get_integers(&list_obj));
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "system" fn Java_com_lancedb_lance_test_JniTestHelper_parseIntsOpt(
|
||||
mut env: JNIEnv,
|
||||
_obj: JObject,
|
||||
list_obj: JObject, // Optional<List<Integer>>
|
||||
) {
|
||||
ok_or_throw_without_return!(env, env.get_ints_opt(&list_obj));
|
||||
}
|
||||
57
java/core/lancedb-jni/src/lib.rs
Normal file
57
java/core/lancedb-jni/src/lib.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
// TODO import from lance-jni without duplicate
|
||||
#[macro_export]
|
||||
macro_rules! ok_or_throw {
|
||||
($env:expr, $result:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return JObject::null();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! ok_or_throw_without_return {
|
||||
($env:expr, $result:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! ok_or_throw_with_return {
|
||||
($env:expr, $result:expr, $ret:expr) => {
|
||||
match $result {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
Error::from(err).throw(&mut $env);
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
mod connection;
|
||||
pub mod error;
|
||||
mod ffi;
|
||||
mod traits;
|
||||
|
||||
pub use error::{Error, Result};
|
||||
|
||||
lazy_static! {
|
||||
static ref RT: tokio::runtime::Runtime = tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("Failed to create tokio runtime");
|
||||
}
|
||||
114
java/core/lancedb-jni/src/traits.rs
Normal file
114
java/core/lancedb-jni/src/traits.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use jni::objects::{JMap, JObject, JString, JValue};
|
||||
use jni::JNIEnv;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJObject<T> {
|
||||
fn extract(&self) -> Result<T>;
|
||||
}
|
||||
|
||||
/// Convert a Rust type into a Java Object.
|
||||
pub trait IntoJava {
|
||||
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> JObject<'a>;
|
||||
}
|
||||
|
||||
impl FromJObject<i32> for JObject<'_> {
|
||||
fn extract(&self) -> Result<i32> {
|
||||
Ok(JValue::from(self).i()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<i64> for JObject<'_> {
|
||||
fn extract(&self) -> Result<i64> {
|
||||
Ok(JValue::from(self).j()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<f32> for JObject<'_> {
|
||||
fn extract(&self) -> Result<f32> {
|
||||
Ok(JValue::from(self).f()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl FromJObject<f64> for JObject<'_> {
|
||||
fn extract(&self) -> Result<f64> {
|
||||
Ok(JValue::from(self).d()?)
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub trait FromJString {
|
||||
fn extract(&self, env: &mut JNIEnv) -> Result<String>;
|
||||
}
|
||||
|
||||
impl FromJString for JString<'_> {
|
||||
fn extract(&self, env: &mut JNIEnv) -> Result<String> {
|
||||
Ok(env.get_string(self)?.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait JMapExt {
|
||||
#[allow(dead_code)]
|
||||
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>>;
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>>;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn get_map_value<T>(env: &mut JNIEnv, map: &JMap, key: &str) -> Result<Option<T>>
|
||||
where
|
||||
for<'a> JObject<'a>: FromJObject<T>,
|
||||
{
|
||||
let key_obj: JObject = env.new_string(key)?.into();
|
||||
if let Some(value) = map.get(env, &key_obj)? {
|
||||
if value.is_null() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(value.extract()?))
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl JMapExt for JMap<'_, '_, '_> {
|
||||
fn get_string(&self, env: &mut JNIEnv, key: &str) -> Result<Option<String>> {
|
||||
let key_obj: JObject = env.new_string(key)?.into();
|
||||
if let Some(value) = self.get(env, &key_obj)? {
|
||||
let value_str: JString = value.into();
|
||||
Ok(Some(value_str.extract(env)?))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_i32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i32>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_i64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<i64>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_f32(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f32>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
|
||||
fn get_f64(&self, env: &mut JNIEnv, key: &str) -> Result<Option<f64>> {
|
||||
get_map_value(env, self, key)
|
||||
}
|
||||
}
|
||||
103
java/core/pom.xml
Normal file
103
java/core/pom.xml
Normal file
@@ -0,0 +1,103 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.22.4-beta.1</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Core</description>
|
||||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<rust.release.build>false</rust.release.build>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lance-namespace-core</artifactId>
|
||||
<version>0.0.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-vector</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-memory-netty</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-c-data</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.arrow</groupId>
|
||||
<artifactId>arrow-dataset</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.json</groupId>
|
||||
<artifactId>json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.questdb</groupId>
|
||||
<artifactId>jar-jni</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>build-jni</id>
|
||||
<activation>
|
||||
<activeByDefault>true</activeByDefault>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.questdb</groupId>
|
||||
<artifactId>rust-maven-plugin</artifactId>
|
||||
<version>1.1.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>lancedb-jni</id>
|
||||
<goals>
|
||||
<goal>build</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<path>lancedb-jni</path>
|
||||
<release>${rust.release.build}</release>
|
||||
<!-- Copy native libraries to target/classes for runtime access -->
|
||||
<copyTo>${project.build.directory}/classes/nativelib</copyTo>
|
||||
<copyWithPlatformDir>true</copyWithPlatformDir>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>lancedb-jni-test</id>
|
||||
<goals>
|
||||
<goal>test</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<path>lancedb-jni</path>
|
||||
<release>false</release>
|
||||
<verbosity>-v</verbosity>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
108
java/core/src/main/java/com/lancedb/lancedb/Connection.java
Normal file
108
java/core/src/main/java/com/lancedb/lancedb/Connection.java
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb.lancedb;
|
||||
|
||||
import io.questdb.jar.jni.JarJniLoader;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
/** Represents LanceDB database. */
|
||||
public class Connection implements Closeable {
|
||||
static {
|
||||
JarJniLoader.loadLib(Connection.class, "/nativelib", "lancedb_jni");
|
||||
}
|
||||
|
||||
private long nativeConnectionHandle;
|
||||
|
||||
/** Connect to a LanceDB instance. */
|
||||
public static native Connection connect(String uri);
|
||||
|
||||
/**
|
||||
* Get the names of all tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames() {
|
||||
return tableNames(Optional.empty(), Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(int limit) {
|
||||
return tableNames(Optional.empty(), Optional.of(limit));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(String startAfter) {
|
||||
return tableNames(Optional.of(startAfter), Optional.empty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public List<String> tableNames(String startAfter, int limit) {
|
||||
return tableNames(Optional.of(startAfter), Optional.of(limit));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the names of filtered tables in the database. The names are sorted in ascending order.
|
||||
*
|
||||
* @param startAfter If present, only return names that come lexicographically after the supplied
|
||||
* value. This can be combined with limit to implement pagination by setting this to the last
|
||||
* table name from the previous page.
|
||||
* @param limit The number of results to return.
|
||||
* @return the table names
|
||||
*/
|
||||
public native List<String> tableNames(Optional<String> startAfter, Optional<Integer> limit);
|
||||
|
||||
/**
|
||||
* Closes this connection and releases any system resources associated with it. If the connection
|
||||
* is already closed, then invoking this method has no effect.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
if (nativeConnectionHandle != 0) {
|
||||
releaseNativeConnection(nativeConnectionHandle);
|
||||
nativeConnectionHandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Native method to release the Lance connection resources associated with the given handle.
|
||||
*
|
||||
* @param handle The native handle to the connection resource.
|
||||
*/
|
||||
private native void releaseNativeConnection(long handle);
|
||||
|
||||
private Connection() {}
|
||||
}
|
||||
135
java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
Normal file
135
java/core/src/test/java/com/lancedb/lancedb/ConnectionTest.java
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.lancedb.lancedb;
|
||||
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
public class ConnectionTest {
|
||||
private static final String[] TABLE_NAMES = {
|
||||
"dataset_version", "new_empty_dataset", "test", "write_stream"
|
||||
};
|
||||
|
||||
@TempDir static Path tempDir; // Temporary directory for the tests
|
||||
private static URL lanceDbURL;
|
||||
|
||||
@BeforeAll
|
||||
static void setUp() {
|
||||
ClassLoader classLoader = ConnectionTest.class.getClassLoader();
|
||||
lanceDbURL = classLoader.getResource("example_db");
|
||||
}
|
||||
|
||||
@Test
|
||||
void emptyDB() {
|
||||
String databaseUri = tempDir.resolve("emptyDB").toString();
|
||||
try (Connection conn = Connection.connect(databaseUri)) {
|
||||
List<String> tableNames = conn.tableNames();
|
||||
assertTrue(tableNames.isEmpty());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNames() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
List<String> tableNames = conn.tableNames();
|
||||
assertEquals(4, tableNames.size());
|
||||
for (int i = 0; i < TABLE_NAMES.length; i++) {
|
||||
assertEquals(TABLE_NAMES[i], tableNames.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesStartAfter() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
assertTableNamesStartAfter(
|
||||
conn, TABLE_NAMES[0], 3, TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[1], 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[2], 1, TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, TABLE_NAMES[3], 0);
|
||||
assertTableNamesStartAfter(
|
||||
conn, "a_dataset", 4, TABLE_NAMES[0], TABLE_NAMES[1], TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "o_dataset", 2, TABLE_NAMES[2], TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "v_dataset", 1, TABLE_NAMES[3]);
|
||||
assertTableNamesStartAfter(conn, "z_dataset", 0);
|
||||
}
|
||||
}
|
||||
|
||||
private void assertTableNamesStartAfter(
|
||||
Connection conn, String startAfter, int expectedSize, String... expectedNames) {
|
||||
List<String> tableNames = conn.tableNames(startAfter);
|
||||
assertEquals(expectedSize, tableNames.size());
|
||||
for (int i = 0; i < expectedNames.length; i++) {
|
||||
assertEquals(expectedNames[i], tableNames.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesLimit() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
for (int i = 0; i <= TABLE_NAMES.length; i++) {
|
||||
List<String> tableNames = conn.tableNames(i);
|
||||
assertEquals(i, tableNames.size());
|
||||
for (int j = 0; j < i; j++) {
|
||||
assertEquals(TABLE_NAMES[j], tableNames.get(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tableNamesStartAfterLimit() {
|
||||
try (Connection conn = Connection.connect(lanceDbURL.toString())) {
|
||||
List<String> tableNames = conn.tableNames(TABLE_NAMES[0], 2);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[1], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(1));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[1], 1);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(0));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[2], 2);
|
||||
assertEquals(1, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(0));
|
||||
tableNames = conn.tableNames(TABLE_NAMES[3], 2);
|
||||
assertEquals(0, tableNames.size());
|
||||
tableNames = conn.tableNames(TABLE_NAMES[0], 0);
|
||||
assertEquals(0, tableNames.size());
|
||||
|
||||
// Limit larger than the number of remaining tables
|
||||
tableNames = conn.tableNames(TABLE_NAMES[0], 10);
|
||||
assertEquals(3, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[1], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(1));
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(2));
|
||||
|
||||
// Start after a value not in the list
|
||||
tableNames = conn.tableNames("non_existent_table", 2);
|
||||
assertEquals(2, tableNames.size());
|
||||
assertEquals(TABLE_NAMES[2], tableNames.get(0));
|
||||
assertEquals(TABLE_NAMES[3], tableNames.get(1));
|
||||
|
||||
// Start after the last table with a limit
|
||||
tableNames = conn.tableNames(TABLE_NAMES[3], 1);
|
||||
assertEquals(0, tableNames.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
$d51afd07-e3cd-4c76-9b9b-787e13fd55b0<62>=id <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*int3208name <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>*string08
|
||||
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user