mirror of
https://github.com/lancedb/lancedb.git
synced 2026-03-27 02:50:40 +00:00
Compare commits
43 Commits
python-v0.
...
python-v0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d9e2d51f51 | ||
|
|
e081708cce | ||
|
|
2d60ea6938 | ||
|
|
dcb1443143 | ||
|
|
c0230f91d2 | ||
|
|
5d629c9ecb | ||
|
|
14973ac9d1 | ||
|
|
70cbee6293 | ||
|
|
02783bf440 | ||
|
|
4323ca0147 | ||
|
|
bd3dd6a8e5 | ||
|
|
3c1162612e | ||
|
|
53c7c560c9 | ||
|
|
de4f77800d | ||
|
|
b6ab721cf7 | ||
|
|
027d53500b | ||
|
|
9098f47e73 | ||
|
|
826a3e5ee9 | ||
|
|
9fac56252e | ||
|
|
c55ca20c1b | ||
|
|
5cdb15feef | ||
|
|
7a3eea927f | ||
|
|
5dd9b072d8 | ||
|
|
6dde379d44 | ||
|
|
55f09ef1cd | ||
|
|
e9d8651d18 | ||
|
|
071f467571 | ||
|
|
f83aa25119 | ||
|
|
0a8fe4d026 | ||
|
|
3ad7be9825 | ||
|
|
589041d842 | ||
|
|
2e4cd56ab1 | ||
|
|
6fd8586fa7 | ||
|
|
6329b57604 | ||
|
|
c51b13e70f | ||
|
|
0859312b83 | ||
|
|
a6e8ec8d48 | ||
|
|
bd2c6d0763 | ||
|
|
fbf4a53475 | ||
|
|
d3e15f3e17 | ||
|
|
9c017d8348 | ||
|
|
c3cc2530b7 | ||
|
|
571295b0d9 |
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.24.1"
|
||||
current_version = "0.26.2"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
2
.github/workflows/cargo-publish.yml
vendored
2
.github/workflows/cargo-publish.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
173
.github/workflows/codex-fix-ci.yml
vendored
Normal file
173
.github/workflows/codex-fix-ci.yml
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
name: Codex Fix CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workflow_run_url:
|
||||
description: "Failing CI workflow run URL (e.g., https://github.com/lancedb/lancedb/actions/runs/12345678)"
|
||||
required: true
|
||||
type: string
|
||||
branch:
|
||||
description: "Branch to fix (e.g., main, release/v2.0, or feature-branch)"
|
||||
required: true
|
||||
type: string
|
||||
guidelines:
|
||||
description: "Additional guidelines for the fix (optional)"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
actions: read
|
||||
|
||||
jobs:
|
||||
fix-ci:
|
||||
runs-on: warp-ubuntu-latest-x64-4x
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
CC: clang
|
||||
CXX: clang++
|
||||
steps:
|
||||
- name: Show inputs
|
||||
run: |
|
||||
echo "workflow_run_url = ${{ inputs.workflow_run_url }}"
|
||||
echo "branch = ${{ inputs.branch }}"
|
||||
echo "guidelines = ${{ inputs.guidelines }}"
|
||||
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.branch }}
|
||||
fetch-depth: 0
|
||||
persist-credentials: true
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Install Codex CLI
|
||||
run: npm install -g @openai/codex
|
||||
|
||||
- name: Install Rust toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: stable
|
||||
components: clippy, rustfmt
|
||||
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y protobuf-compiler libssl-dev
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
pip install maturin ruff pytest pyarrow pandas polars
|
||||
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: temurin
|
||||
java-version: '11'
|
||||
cache: maven
|
||||
|
||||
- name: Install Node.js dependencies for TypeScript bindings
|
||||
run: |
|
||||
cd nodejs
|
||||
npm ci
|
||||
|
||||
- name: Configure git user
|
||||
run: |
|
||||
git config user.name "lancedb automation"
|
||||
git config user.email "robot@lancedb.com"
|
||||
|
||||
- name: Run Codex to fix CI failure
|
||||
env:
|
||||
WORKFLOW_RUN_URL: ${{ inputs.workflow_run_url }}
|
||||
BRANCH: ${{ inputs.branch }}
|
||||
GUIDELINES: ${{ inputs.guidelines }}
|
||||
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
|
||||
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
cat <<EOF >/tmp/codex-prompt.txt
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Your task is to fix a CI failure.
|
||||
|
||||
Input parameters:
|
||||
- Failing workflow run URL: ${WORKFLOW_RUN_URL}
|
||||
- Branch to fix: ${BRANCH}
|
||||
- Additional guidelines: ${GUIDELINES:-"None provided"}
|
||||
|
||||
Follow these steps exactly:
|
||||
|
||||
1. Extract the run ID from the workflow URL. The URL format is https://github.com/lancedb/lancedb/actions/runs/<run_id>.
|
||||
|
||||
2. Use "gh run view <run_id> --json jobs,conclusion,name" to get information about the failed run.
|
||||
|
||||
3. Identify which jobs failed. For each failed job, use "gh run view <run_id> --job <job_id> --log-failed" to get the failure logs.
|
||||
|
||||
4. Analyze the failure logs to understand what went wrong. Common failures include:
|
||||
- Compilation errors
|
||||
- Test failures
|
||||
- Clippy warnings treated as errors
|
||||
- Formatting issues
|
||||
- Dependency issues
|
||||
|
||||
5. Based on the analysis, fix the issues in the codebase:
|
||||
- For compilation errors: Fix the code that doesn't compile
|
||||
- For test failures: Fix the failing tests or the code they test
|
||||
- For clippy warnings: Apply the suggested fixes
|
||||
- For formatting issues: Run "cargo fmt --all"
|
||||
- For other issues: Apply appropriate fixes
|
||||
|
||||
6. After making fixes, verify them locally:
|
||||
- Run "cargo fmt --all" to ensure formatting is correct
|
||||
- Run "cargo clippy --workspace --tests --all-features -- -D warnings" to check for issues
|
||||
- Run ONLY the specific failing tests to confirm they pass now:
|
||||
- For Rust test failures: Run the specific test with "cargo test -p <crate> <test_name>"
|
||||
- For Python test failures: Build with "cd python && maturin develop" then run "pytest <specific_test_file>::<test_name>"
|
||||
- For Java test failures: Run "cd java && mvn test -Dtest=<TestClass>#<testMethod>"
|
||||
- For TypeScript test failures: Run "cd nodejs && npm run build && npm test -- --testNamePattern='<test_name>'"
|
||||
- Do NOT run the full test suite - only run the tests that were failing
|
||||
|
||||
7. If the additional guidelines are provided, follow them as well.
|
||||
|
||||
8. Inspect "git status --short" and "git diff" to review your changes.
|
||||
|
||||
9. Create a fix branch: "git checkout -b codex/fix-ci-<run_id>".
|
||||
|
||||
10. Stage all changes with "git add -A" and commit with message "fix: resolve CI failures from run <run_id>".
|
||||
|
||||
11. Push the branch: "git push origin codex/fix-ci-<run_id>". If the remote branch exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/codex/fix-ci-<run_id>" then push. Do NOT use "git push --force" or "git push -f".
|
||||
|
||||
12. Create a pull request targeting "${BRANCH}":
|
||||
- Title: "ci: <short summary describing the fix>" (e.g., "ci: fix clippy warnings in lancedb" or "ci: resolve test flakiness in vector search")
|
||||
- First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'PREOF' > /tmp/pr-body.md). The body should include:
|
||||
- Link to the failing workflow run
|
||||
- Summary of what failed
|
||||
- Description of the fixes applied
|
||||
- Then run "gh pr create --base ${BRANCH} --body-file /tmp/pr-body.md".
|
||||
|
||||
13. Display the new PR URL, "git status --short", and a summary of what was fixed.
|
||||
|
||||
Constraints:
|
||||
- Use bash commands for all operations.
|
||||
- Do not merge the PR.
|
||||
- Do not modify GitHub workflow files unless they are the cause of the failure.
|
||||
- If any command fails, diagnose and attempt to fix the issue instead of aborting immediately.
|
||||
- If you cannot fix the issue automatically, create the PR anyway with a clear explanation of what you tried and what remains to be fixed.
|
||||
- env "GH_TOKEN" is available, use "gh" tools for GitHub-related operations.
|
||||
EOF
|
||||
|
||||
printenv OPENAI_API_KEY | codex login --with-api-key
|
||||
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat /tmp/codex-prompt.txt)"
|
||||
@@ -86,16 +86,17 @@ jobs:
|
||||
You are running inside the lancedb repository on a GitHub Actions runner. Update the Lance dependency to version ${VERSION} and prepare a pull request for maintainers to review.
|
||||
|
||||
Follow these steps exactly:
|
||||
1. Use script "ci/set_lance_version.py" to update Lance dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
|
||||
2. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
|
||||
3. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
4. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
5. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
6. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
|
||||
7. Push the branch to origin. If the branch already exists, force-push your changes.
|
||||
8. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
9. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
|
||||
10. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
1. Use script "ci/set_lance_version.py" to update Lance Rust dependencies. The script already refreshes Cargo metadata, so allow it to finish even if it takes time.
|
||||
2. Update the Java lance-core dependency version in "java/pom.xml": change the "<lance-core.version>...</lance-core.version>" property to "${VERSION}".
|
||||
3. Run "cargo clippy --workspace --tests --all-features -- -D warnings". If diagnostics appear, fix them yourself and rerun clippy until it exits cleanly. Do not skip any warnings.
|
||||
4. After clippy succeeds, run "cargo fmt --all" to format the workspace.
|
||||
5. Ensure the repository is clean except for intentional changes. Inspect "git status --short" and "git diff" to confirm the dependency update and any required fixes.
|
||||
6. Create and switch to a new branch named "${BRANCH_NAME}" (replace any duplicated hyphens if necessary).
|
||||
7. Stage all relevant files with "git add -A". Commit using the message "${COMMIT_TYPE}: update lance dependency to v${VERSION}".
|
||||
8. Push the branch to origin. If the remote branch already exists, delete it first with "gh api -X DELETE repos/lancedb/lancedb/git/refs/heads/${BRANCH_NAME}" then push with "git push origin ${BRANCH_NAME}". Do NOT use "git push --force" or "git push -f".
|
||||
9. env "GH_TOKEN" is available, use "gh" tools for github related operations like creating pull request.
|
||||
10. Create a pull request targeting "main" with title "${COMMIT_TYPE}: update lance dependency to v${VERSION}". First, write the PR body to /tmp/pr-body.md using a heredoc (cat <<'EOF' > /tmp/pr-body.md). The body should summarize the dependency bump, clippy/fmt verification, and link the triggering tag (${TAG}). Then run "gh pr create --body-file /tmp/pr-body.md".
|
||||
11. After creating the PR, display the PR URL, "git status --short", and a concise summary of the commands run and their results.
|
||||
|
||||
Constraints:
|
||||
- Use bash commands; avoid modifying GitHub workflow files other than through the scripted task above.
|
||||
|
||||
1
.github/workflows/nodejs.yml
vendored
1
.github/workflows/nodejs.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
paths:
|
||||
- Cargo.toml
|
||||
- nodejs/**
|
||||
- docs/src/js/**
|
||||
- .github/workflows/nodejs.yml
|
||||
- docker-compose.yml
|
||||
|
||||
|
||||
6
.github/workflows/npm-publish.yml
vendored
6
.github/workflows/npm-publish.yml
vendored
@@ -318,7 +318,7 @@ jobs:
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
node-version: 24
|
||||
cache: npm
|
||||
cache-dependency-path: nodejs/package-lock.json
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
@@ -348,9 +348,9 @@ jobs:
|
||||
run: find npm
|
||||
- name: Publish
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{ secrets.LANCEDB_NPM_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ !startsWith(github.ref, 'refs/tags/v') }}
|
||||
run: |
|
||||
npm config set provenance true
|
||||
ARGS="--access public"
|
||||
if [[ $DRY_RUN == "true" ]]; then
|
||||
ARGS="$ARGS --dry-run"
|
||||
@@ -363,7 +363,7 @@ jobs:
|
||||
name: Report Workflow Failure
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build-lancedb, test-lancedb, publish]
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/v')
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
|
||||
2
.github/workflows/pypi-publish.yml
vendored
2
.github/workflows/pypi-publish.yml
vendored
@@ -181,7 +181,7 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
if: always() && (github.event_name == 'release' || github.event_name == 'workflow_dispatch')
|
||||
if: always() && failure() && startsWith(github.ref, 'refs/tags/python-v')
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: ./.github/actions/create-failure-issue
|
||||
|
||||
853
Cargo.lock
generated
853
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
59
Cargo.toml
59
Cargo.toml
@@ -15,39 +15,40 @@ categories = ["database-implementations"]
|
||||
rust-version = "1.88.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
lance = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-core = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datagen = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-file = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-io = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-index = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-linalg = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-namespace-impls = { "version" = "=1.0.4", default-features = false, "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-table = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-testing = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-datafusion = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-encoding = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance-arrow = { "version" = "=1.0.4", "tag" = "v1.0.4", "git" = "https://github.com/lance-format/lance.git" }
|
||||
lance = { "version" = "=2.0.1", default-features = false }
|
||||
lance-core = "=2.0.1"
|
||||
lance-datagen = "=2.0.1"
|
||||
lance-file = "=2.0.1"
|
||||
lance-io = { "version" = "=2.0.1", default-features = false }
|
||||
lance-index = "=2.0.1"
|
||||
lance-linalg = "=2.0.1"
|
||||
lance-namespace = "=2.0.1"
|
||||
lance-namespace-impls = { "version" = "=2.0.1", default-features = false }
|
||||
lance-table = "=2.0.1"
|
||||
lance-testing = "=2.0.1"
|
||||
lance-datafusion = "=2.0.1"
|
||||
lance-encoding = "=2.0.1"
|
||||
lance-arrow = "=2.0.1"
|
||||
ahash = "0.8"
|
||||
# Note that this one does not include pyarrow
|
||||
arrow = { version = "56.2", optional = false }
|
||||
arrow-array = "56.2"
|
||||
arrow-data = "56.2"
|
||||
arrow-ipc = "56.2"
|
||||
arrow-ord = "56.2"
|
||||
arrow-schema = "56.2"
|
||||
arrow-select = "56.2"
|
||||
arrow-cast = "56.2"
|
||||
arrow = { version = "57.2", optional = false }
|
||||
arrow-array = "57.2"
|
||||
arrow-data = "57.2"
|
||||
arrow-ipc = "57.2"
|
||||
arrow-ord = "57.2"
|
||||
arrow-schema = "57.2"
|
||||
arrow-select = "57.2"
|
||||
arrow-cast = "57.2"
|
||||
async-trait = "0"
|
||||
datafusion = { version = "50.1", default-features = false }
|
||||
datafusion-catalog = "50.1"
|
||||
datafusion-common = { version = "50.1", default-features = false }
|
||||
datafusion-execution = "50.1"
|
||||
datafusion-expr = "50.1"
|
||||
datafusion-physical-plan = "50.1"
|
||||
datafusion = { version = "51.0", default-features = false }
|
||||
datafusion-catalog = "51.0"
|
||||
datafusion-common = { version = "51.0", default-features = false }
|
||||
datafusion-execution = "51.0"
|
||||
datafusion-expr = "51.0"
|
||||
datafusion-physical-plan = "51.0"
|
||||
datafusion-physical-expr = "51.0"
|
||||
env_logger = "0.11"
|
||||
half = { "version" = "2.6.0", default-features = false, features = [
|
||||
half = { "version" = "2.7.1", default-features = false, features = [
|
||||
"num-traits",
|
||||
] }
|
||||
futures = "0"
|
||||
|
||||
9
Makefile
Normal file
9
Makefile
Normal file
@@ -0,0 +1,9 @@
|
||||
.PHONY: licenses
|
||||
|
||||
licenses:
|
||||
cargo about generate about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c about.toml
|
||||
cd python && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
|
||||
cd python && uv sync --all-extras && uv tool run pip-licenses --python .venv/bin/python --format=markdown --with-urls --output-file=PYTHON_THIRD_PARTY_LICENSES.md
|
||||
cd nodejs && cargo about generate ../about.hbs -o RUST_THIRD_PARTY_LICENSES.html -c ../about.toml
|
||||
cd nodejs && npx license-checker --markdown --out NODEJS_THIRD_PARTY_LICENSES.md
|
||||
cd java && ./mvnw license:aggregate-add-third-party -q
|
||||
15276
RUST_THIRD_PARTY_LICENSES.html
Normal file
15276
RUST_THIRD_PARTY_LICENSES.html
Normal file
File diff suppressed because it is too large
Load Diff
70
about.hbs
Normal file
70
about.hbs
Normal file
@@ -0,0 +1,70 @@
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<style>
|
||||
@media (prefers-color-scheme: dark) {
|
||||
body {
|
||||
background: #333;
|
||||
color: white;
|
||||
}
|
||||
a {
|
||||
color: skyblue;
|
||||
}
|
||||
}
|
||||
.container {
|
||||
font-family: sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.intro {
|
||||
text-align: center;
|
||||
}
|
||||
.licenses-list {
|
||||
list-style-type: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
.license-used-by {
|
||||
margin-top: -10px;
|
||||
}
|
||||
.license-text {
|
||||
max-height: 200px;
|
||||
overflow-y: scroll;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<main class="container">
|
||||
<div class="intro">
|
||||
<h1>Third Party Licenses</h1>
|
||||
<p>This page lists the licenses of the projects used in cargo-about.</p>
|
||||
</div>
|
||||
|
||||
<h2>Overview of licenses:</h2>
|
||||
<ul class="licenses-overview">
|
||||
{{#each overview}}
|
||||
<li><a href="#{{id}}">{{name}}</a> ({{count}})</li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
|
||||
<h2>All license text:</h2>
|
||||
<ul class="licenses-list">
|
||||
{{#each licenses}}
|
||||
<li class="license">
|
||||
<h3 id="{{id}}">{{name}}</h3>
|
||||
<h4>Used by:</h4>
|
||||
<ul class="license-used-by">
|
||||
{{#each used_by}}
|
||||
<li><a href="{{#if crate.repository}} {{crate.repository}} {{else}} https://crates.io/crates/{{crate.name}} {{/if}}">{{crate.name}} {{crate.version}}</a></li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
<pre class="license-text">{{text}}</pre>
|
||||
</li>
|
||||
{{/each}}
|
||||
</ul>
|
||||
</main>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
18
about.toml
Normal file
18
about.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
accepted = [
|
||||
"0BSD",
|
||||
"Apache-2.0",
|
||||
"Apache-2.0 WITH LLVM-exception",
|
||||
"BSD-2-Clause",
|
||||
"BSD-3-Clause",
|
||||
"BSL-1.0",
|
||||
"bzip2-1.0.6",
|
||||
"CC0-1.0",
|
||||
"CDDL-1.0",
|
||||
"CDLA-Permissive-2.0",
|
||||
"ISC",
|
||||
"MIT",
|
||||
"MPL-2.0",
|
||||
"OpenSSL",
|
||||
"Unicode-3.0",
|
||||
"Zlib",
|
||||
]
|
||||
@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
|
||||
<dependency>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-core</artifactId>
|
||||
<version>0.24.1</version>
|
||||
<version>0.26.2</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
||||
@@ -367,6 +367,27 @@ Use [Table.listIndices](Table.md#listindices) to find the names of the indices.
|
||||
|
||||
***
|
||||
|
||||
### initialStorageOptions()
|
||||
|
||||
```ts
|
||||
abstract initialStorageOptions(): Promise<undefined | null | Record<string, string>>
|
||||
```
|
||||
|
||||
Get the initial storage options that were passed in when opening this table.
|
||||
|
||||
For dynamically refreshed options (e.g., credential vending), use
|
||||
[Table.latestStorageOptions](Table.md#lateststorageoptions).
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`undefined` \| `null` \| `Record`<`string`, `string`>>
|
||||
|
||||
The storage options, or undefined if no storage options were configured.
|
||||
|
||||
***
|
||||
|
||||
### isOpen()
|
||||
|
||||
```ts
|
||||
@@ -381,6 +402,28 @@ Return true if the table has not been closed
|
||||
|
||||
***
|
||||
|
||||
### latestStorageOptions()
|
||||
|
||||
```ts
|
||||
abstract latestStorageOptions(): Promise<undefined | null | Record<string, string>>
|
||||
```
|
||||
|
||||
Get the latest storage options, refreshing from provider if configured.
|
||||
|
||||
This method is useful for credential vending scenarios where storage options
|
||||
may be refreshed dynamically. If no dynamic provider is configured, this
|
||||
returns the initial static options.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
#### Returns
|
||||
|
||||
`Promise`<`undefined` \| `null` \| `Record`<`string`, `string`>>
|
||||
|
||||
The storage options, or undefined if no storage options were configured.
|
||||
|
||||
***
|
||||
|
||||
### listIndices()
|
||||
|
||||
```ts
|
||||
@@ -705,8 +748,11 @@ Create a query that returns a subset of the rows in the table.
|
||||
|
||||
#### Parameters
|
||||
|
||||
* **rowIds**: `number`[]
|
||||
* **rowIds**: readonly (`number` \| `bigint`)[]
|
||||
The row ids of the rows to return.
|
||||
Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
|
||||
For convenience / backwards compatibility, `number[]` is also accepted (for
|
||||
small row ids that fit in a safe integer).
|
||||
|
||||
#### Returns
|
||||
|
||||
|
||||
71
java/JAVA_THIRD_PARTY_LICENSES.md
Normal file
71
java/JAVA_THIRD_PARTY_LICENSES.md
Normal file
@@ -0,0 +1,71 @@
|
||||
|
||||
List of third-party dependencies grouped by their license type.
|
||||
|
||||
Apache 2.0:
|
||||
|
||||
* error-prone annotations (com.google.errorprone:error_prone_annotations:2.28.0 - https://errorprone.info/error_prone_annotations)
|
||||
|
||||
Apache License 2.0:
|
||||
|
||||
* JsonNullable Jackson module (org.openapitools:jackson-databind-nullable:0.2.6 - https://github.com/OpenAPITools/jackson-databind-nullable)
|
||||
|
||||
Apache License V2.0:
|
||||
|
||||
* FlatBuffers Java API (com.google.flatbuffers:flatbuffers-java:23.5.26 - https://github.com/google/flatbuffers)
|
||||
|
||||
Apache License, Version 2.0:
|
||||
|
||||
* Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/)
|
||||
* Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.2.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.2.1/httpclient5/)
|
||||
* Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5/)
|
||||
* Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5-h2/)
|
||||
* Arrow Format (org.apache.arrow:arrow-format:15.0.0 - https://arrow.apache.org/arrow-format/)
|
||||
* Arrow Java C Data Interface (org.apache.arrow:arrow-c-data:15.0.0 - https://arrow.apache.org/arrow-c-data/)
|
||||
* Arrow Java Dataset (org.apache.arrow:arrow-dataset:15.0.0 - https://arrow.apache.org/arrow-dataset/)
|
||||
* Arrow Memory - Core (org.apache.arrow:arrow-memory-core:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-core/)
|
||||
* Arrow Memory - Netty (org.apache.arrow:arrow-memory-netty:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-netty/)
|
||||
* Arrow Vectors (org.apache.arrow:arrow-vector:15.0.0 - https://arrow.apache.org/arrow-vector/)
|
||||
* Guava: Google Core Libraries for Java (com.google.guava:guava:33.3.1-jre - https://github.com/google/guava)
|
||||
* J2ObjC Annotations (com.google.j2objc:j2objc-annotations:3.0.0 - https://github.com/google/j2objc/)
|
||||
* Netty/Buffer (io.netty:netty-buffer:4.1.104.Final - https://netty.io/netty-buffer/)
|
||||
* Netty/Common (io.netty:netty-common:4.1.104.Final - https://netty.io/netty-common/)
|
||||
|
||||
Apache-2.0:
|
||||
|
||||
* Apache Commons Lang (org.apache.commons:commons-lang3:3.18.0 - https://commons.apache.org/proper/commons-lang/)
|
||||
* lance-namespace-apache-client (org.lance:lance-namespace-apache-client:0.4.5 - https://github.com/openapitools/openapi-generator)
|
||||
* lance-namespace-core (org.lance:lance-namespace-core:0.4.5 - https://lance.org/format/namespace/lance-namespace-core/)
|
||||
|
||||
EDL 1.0:
|
||||
|
||||
* Jakarta Activation API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api)
|
||||
|
||||
Eclipse Distribution License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
* Jakarta XML Binding API (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api)
|
||||
|
||||
Eclipse Public License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
|
||||
The Apache Software License, Version 2.0:
|
||||
|
||||
* FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/)
|
||||
* Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess)
|
||||
* Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture)
|
||||
* Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.16.0 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310)
|
||||
* Jackson module: Old JAXB Annotations (javax.xml.bind) (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.17.1 - https://github.com/FasterXML/jackson-modules-base)
|
||||
* Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.16.0 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.0 - https://github.com/FasterXML/jackson-core)
|
||||
* jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-JAXRS: base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base)
|
||||
* Jackson-JAXRS: JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider)
|
||||
* JAR JNI Loader (org.questdb:jar-jni:1.1.1 - https://github.com/questdb/rust-maven-plugin)
|
||||
* Lance Core (org.lance:lance-core:2.0.0 - https://lance.org/)
|
||||
|
||||
The MIT License:
|
||||
|
||||
* Checker Qual (org.checkerframework:checker-qual:3.43.0 - https://checkerframework.org/)
|
||||
71
java/lancedb-core/JAVA_THIRD_PARTY_LICENSES.md
Normal file
71
java/lancedb-core/JAVA_THIRD_PARTY_LICENSES.md
Normal file
@@ -0,0 +1,71 @@
|
||||
|
||||
List of third-party dependencies grouped by their license type.
|
||||
|
||||
Apache 2.0:
|
||||
|
||||
* error-prone annotations (com.google.errorprone:error_prone_annotations:2.28.0 - https://errorprone.info/error_prone_annotations)
|
||||
|
||||
Apache License 2.0:
|
||||
|
||||
* JsonNullable Jackson module (org.openapitools:jackson-databind-nullable:0.2.6 - https://github.com/OpenAPITools/jackson-databind-nullable)
|
||||
|
||||
Apache License V2.0:
|
||||
|
||||
* FlatBuffers Java API (com.google.flatbuffers:flatbuffers-java:23.5.26 - https://github.com/google/flatbuffers)
|
||||
|
||||
Apache License, Version 2.0:
|
||||
|
||||
* Apache Commons Codec (commons-codec:commons-codec:1.15 - https://commons.apache.org/proper/commons-codec/)
|
||||
* Apache HttpClient (org.apache.httpcomponents.client5:httpclient5:5.2.1 - https://hc.apache.org/httpcomponents-client-5.0.x/5.2.1/httpclient5/)
|
||||
* Apache HttpComponents Core HTTP/1.1 (org.apache.httpcomponents.core5:httpcore5:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5/)
|
||||
* Apache HttpComponents Core HTTP/2 (org.apache.httpcomponents.core5:httpcore5-h2:5.2 - https://hc.apache.org/httpcomponents-core-5.2.x/5.2/httpcore5-h2/)
|
||||
* Arrow Format (org.apache.arrow:arrow-format:15.0.0 - https://arrow.apache.org/arrow-format/)
|
||||
* Arrow Java C Data Interface (org.apache.arrow:arrow-c-data:15.0.0 - https://arrow.apache.org/arrow-c-data/)
|
||||
* Arrow Java Dataset (org.apache.arrow:arrow-dataset:15.0.0 - https://arrow.apache.org/arrow-dataset/)
|
||||
* Arrow Memory - Core (org.apache.arrow:arrow-memory-core:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-core/)
|
||||
* Arrow Memory - Netty (org.apache.arrow:arrow-memory-netty:15.0.0 - https://arrow.apache.org/arrow-memory/arrow-memory-netty/)
|
||||
* Arrow Vectors (org.apache.arrow:arrow-vector:15.0.0 - https://arrow.apache.org/arrow-vector/)
|
||||
* Guava: Google Core Libraries for Java (com.google.guava:guava:33.3.1-jre - https://github.com/google/guava)
|
||||
* J2ObjC Annotations (com.google.j2objc:j2objc-annotations:3.0.0 - https://github.com/google/j2objc/)
|
||||
* Netty/Buffer (io.netty:netty-buffer:4.1.104.Final - https://netty.io/netty-buffer/)
|
||||
* Netty/Common (io.netty:netty-common:4.1.104.Final - https://netty.io/netty-common/)
|
||||
|
||||
Apache-2.0:
|
||||
|
||||
* Apache Commons Lang (org.apache.commons:commons-lang3:3.18.0 - https://commons.apache.org/proper/commons-lang/)
|
||||
* lance-namespace-apache-client (org.lance:lance-namespace-apache-client:0.4.5 - https://github.com/openapitools/openapi-generator)
|
||||
* lance-namespace-core (org.lance:lance-namespace-core:0.4.5 - https://lance.org/format/namespace/lance-namespace-core/)
|
||||
|
||||
EDL 1.0:
|
||||
|
||||
* Jakarta Activation API jar (jakarta.activation:jakarta.activation-api:1.2.2 - https://github.com/eclipse-ee4j/jaf/jakarta.activation-api)
|
||||
|
||||
Eclipse Distribution License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
* Jakarta XML Binding API (jakarta.xml.bind:jakarta.xml.bind-api:2.3.3 - https://github.com/eclipse-ee4j/jaxb-api/jakarta.xml.bind-api)
|
||||
|
||||
Eclipse Public License - v 1.0:
|
||||
|
||||
* Eclipse Collections API (org.eclipse.collections:eclipse-collections-api:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections-api)
|
||||
* Eclipse Collections Main Library (org.eclipse.collections:eclipse-collections:11.1.0 - https://github.com/eclipse/eclipse-collections/eclipse-collections)
|
||||
|
||||
The Apache Software License, Version 2.0:
|
||||
|
||||
* FindBugs-jsr305 (com.google.code.findbugs:jsr305:3.0.2 - http://findbugs.sourceforge.net/)
|
||||
* Guava InternalFutureFailureAccess and InternalFutures (com.google.guava:failureaccess:1.0.2 - https://github.com/google/guava/failureaccess)
|
||||
* Guava ListenableFuture only (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava - https://github.com/google/guava/listenablefuture)
|
||||
* Jackson datatype: JSR310 (com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.16.0 - https://github.com/FasterXML/jackson-modules-java8/jackson-datatype-jsr310)
|
||||
* Jackson module: Old JAXB Annotations (javax.xml.bind) (com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.17.1 - https://github.com/FasterXML/jackson-modules-base)
|
||||
* Jackson-annotations (com.fasterxml.jackson.core:jackson-annotations:2.16.0 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-core (com.fasterxml.jackson.core:jackson-core:2.16.0 - https://github.com/FasterXML/jackson-core)
|
||||
* jackson-databind (com.fasterxml.jackson.core:jackson-databind:2.15.2 - https://github.com/FasterXML/jackson)
|
||||
* Jackson-JAXRS: base (com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-base)
|
||||
* Jackson-JAXRS: JSON (com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.17.1 - https://github.com/FasterXML/jackson-jaxrs-providers/jackson-jaxrs-json-provider)
|
||||
* JAR JNI Loader (org.questdb:jar-jni:1.1.1 - https://github.com/questdb/rust-maven-plugin)
|
||||
* Lance Core (org.lance:lance-core:2.0.0 - https://lance.org/)
|
||||
|
||||
The MIT License:
|
||||
|
||||
* Checker Qual (org.checkerframework:checker-qual:3.43.0 - https://checkerframework.org/)
|
||||
@@ -8,7 +8,7 @@
|
||||
<parent>
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.24.1-final.0</version>
|
||||
<version>0.26.2-final.0</version>
|
||||
<relativePath>../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
|
||||
20
java/pom.xml
20
java/pom.xml
@@ -6,7 +6,7 @@
|
||||
|
||||
<groupId>com.lancedb</groupId>
|
||||
<artifactId>lancedb-parent</artifactId>
|
||||
<version>0.24.1-final.0</version>
|
||||
<version>0.26.2-final.0</version>
|
||||
<packaging>pom</packaging>
|
||||
<name>${project.artifactId}</name>
|
||||
<description>LanceDB Java SDK Parent POM</description>
|
||||
@@ -28,7 +28,7 @@
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<arrow.version>15.0.0</arrow.version>
|
||||
<lance-core.version>1.0.4</lance-core.version>
|
||||
<lance-core.version>2.0.1</lance-core.version>
|
||||
<spotless.skip>false</spotless.skip>
|
||||
<spotless.version>2.30.0</spotless.version>
|
||||
<spotless.java.googlejavaformat.version>1.7</spotless.java.googlejavaformat.version>
|
||||
@@ -160,6 +160,19 @@
|
||||
<groupId>com.diffplug.spotless</groupId>
|
||||
<artifactId>spotless-maven-plugin</artifactId>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>license-maven-plugin</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<configuration>
|
||||
<outputDirectory>${project.basedir}</outputDirectory>
|
||||
<thirdPartyFilename>JAVA_THIRD_PARTY_LICENSES.md</thirdPartyFilename>
|
||||
<fileTemplate>/org/codehaus/mojo/license/third-party-file-groupByLicense.ftl</fileTemplate>
|
||||
<includedScopes>compile,runtime</includedScopes>
|
||||
<excludedScopes>test,provided</excludedScopes>
|
||||
<sortArtifactByName>true</sortArtifactByName>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
@@ -292,11 +305,12 @@
|
||||
<plugin>
|
||||
<groupId>org.sonatype.central</groupId>
|
||||
<artifactId>central-publishing-maven-plugin</artifactId>
|
||||
<version>0.4.0</version>
|
||||
<version>0.8.0</version>
|
||||
<extensions>true</extensions>
|
||||
<configuration>
|
||||
<publishingServerId>ossrh</publishingServerId>
|
||||
<tokenAuth>true</tokenAuth>
|
||||
<autoPublish>true</autoPublish>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "lancedb-nodejs"
|
||||
edition.workspace = true
|
||||
version = "0.24.1"
|
||||
version = "0.26.2"
|
||||
license.workspace = true
|
||||
description.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
668
nodejs/NODEJS_THIRD_PARTY_LICENSES.md
Normal file
668
nodejs/NODEJS_THIRD_PARTY_LICENSES.md
Normal file
@@ -0,0 +1,668 @@
|
||||
[@75lb/deep-merge@1.1.2](https://github.com/75lb/deep-merge) - MIT
|
||||
[@aashutoshrathi/word-wrap@1.2.6](https://github.com/aashutoshrathi/word-wrap) - MIT
|
||||
[@ampproject/remapping@2.2.1](https://github.com/ampproject/remapping) - Apache-2.0
|
||||
[@aws-crypto/crc32@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/crc32c@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/ie11-detection@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/sha1-browser@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/sha256-browser@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/sha256-browser@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/sha256-js@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/sha256-js@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/supports-web-crypto@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/supports-web-crypto@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/util@3.0.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-crypto/util@5.2.0](https://github.com/aws/aws-sdk-js-crypto-helpers) - Apache-2.0
|
||||
[@aws-sdk/client-dynamodb@3.602.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-kms@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-s3@3.550.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sso-oidc@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sso-oidc@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sso@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sso@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sts@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/client-sts@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/core@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/core@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-env@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-env@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-http@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-http@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-ini@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-ini@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-node@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-node@3.600.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-process@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-process@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-sso@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-sso@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-web-identity@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/credential-provider-web-identity@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/endpoint-cache@3.572.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-bucket-endpoint@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-endpoint-discovery@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-expect-continue@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-flexible-checksums@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-host-header@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-host-header@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-location-constraint@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-logger@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-logger@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-recursion-detection@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-recursion-detection@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-sdk-s3@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-signing@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-ssec@3.537.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-user-agent@3.540.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/middleware-user-agent@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/region-config-resolver@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/region-config-resolver@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/signature-v4-multi-region@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/token-providers@3.549.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/token-providers@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/types@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/types@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-arn-parser@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-endpoints@3.540.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-endpoints@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-locate-window@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-user-agent-browser@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-user-agent-browser@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-user-agent-node@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-user-agent-node@3.598.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/util-utf8-browser@3.259.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@aws-sdk/xml-builder@3.535.0](https://github.com/aws/aws-sdk-js-v3) - Apache-2.0
|
||||
[@babel/code-frame@7.26.2](https://github.com/babel/babel) - MIT
|
||||
[@babel/compat-data@7.23.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/core@7.23.7](https://github.com/babel/babel) - MIT
|
||||
[@babel/generator@7.23.6](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-compilation-targets@7.23.6](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-environment-visitor@7.22.20](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-function-name@7.23.0](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-hoist-variables@7.22.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-module-imports@7.22.15](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-module-transforms@7.23.3](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-plugin-utils@7.22.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-simple-access@7.22.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-split-export-declaration@7.22.6](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-string-parser@7.25.9](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-validator-identifier@7.25.9](https://github.com/babel/babel) - MIT
|
||||
[@babel/helper-validator-option@7.23.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/helpers@7.27.0](https://github.com/babel/babel) - MIT
|
||||
[@babel/parser@7.27.0](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-async-generators@7.8.4](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-async-generators) - MIT
|
||||
[@babel/plugin-syntax-bigint@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-bigint) - MIT
|
||||
[@babel/plugin-syntax-class-properties@7.12.13](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-import-meta@7.10.4](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-json-strings@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-json-strings) - MIT
|
||||
[@babel/plugin-syntax-jsx@7.23.3](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-logical-assignment-operators@7.10.4](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-nullish-coalescing-operator@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-nullish-coalescing-operator) - MIT
|
||||
[@babel/plugin-syntax-numeric-separator@7.10.4](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-object-rest-spread@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-object-rest-spread) - MIT
|
||||
[@babel/plugin-syntax-optional-catch-binding@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-catch-binding) - MIT
|
||||
[@babel/plugin-syntax-optional-chaining@7.8.3](https://github.com/babel/babel/tree/master/packages/babel-plugin-syntax-optional-chaining) - MIT
|
||||
[@babel/plugin-syntax-top-level-await@7.14.5](https://github.com/babel/babel) - MIT
|
||||
[@babel/plugin-syntax-typescript@7.23.3](https://github.com/babel/babel) - MIT
|
||||
[@babel/template@7.27.0](https://github.com/babel/babel) - MIT
|
||||
[@babel/traverse@7.23.7](https://github.com/babel/babel) - MIT
|
||||
[@babel/types@7.27.0](https://github.com/babel/babel) - MIT
|
||||
[@bcoe/v8-coverage@0.2.3](https://github.com/demurgos/v8-coverage) - MIT
|
||||
[@biomejs/biome@1.8.3](https://github.com/biomejs/biome) - MIT OR Apache-2.0
|
||||
[@biomejs/cli-darwin-arm64@1.8.3](https://github.com/biomejs/biome) - MIT OR Apache-2.0
|
||||
[@eslint-community/eslint-utils@4.4.0](https://github.com/eslint-community/eslint-utils) - MIT
|
||||
[@eslint-community/regexpp@4.10.0](https://github.com/eslint-community/regexpp) - MIT
|
||||
[@eslint/eslintrc@2.1.4](https://github.com/eslint/eslintrc) - MIT
|
||||
[@eslint/js@8.57.0](https://github.com/eslint/eslint) - MIT
|
||||
[@huggingface/jinja@0.3.2](https://github.com/huggingface/huggingface.js) - MIT
|
||||
[@huggingface/transformers@3.0.2](https://github.com/huggingface/transformers.js) - Apache-2.0
|
||||
[@humanwhocodes/config-array@0.11.14](https://github.com/humanwhocodes/config-array) - Apache-2.0
|
||||
[@humanwhocodes/module-importer@1.0.1](https://github.com/humanwhocodes/module-importer) - Apache-2.0
|
||||
[@humanwhocodes/object-schema@2.0.2](https://github.com/humanwhocodes/object-schema) - BSD-3-Clause
|
||||
[@img/sharp-darwin-arm64@0.33.5](https://github.com/lovell/sharp) - Apache-2.0
|
||||
[@img/sharp-libvips-darwin-arm64@1.0.4](https://github.com/lovell/sharp-libvips) - LGPL-3.0-or-later
|
||||
[@isaacs/cliui@8.0.2](https://github.com/yargs/cliui) - ISC
|
||||
[@isaacs/fs-minipass@4.0.1](https://github.com/npm/fs-minipass) - ISC
|
||||
[@istanbuljs/load-nyc-config@1.1.0](https://github.com/istanbuljs/load-nyc-config) - ISC
|
||||
[@istanbuljs/schema@0.1.3](https://github.com/istanbuljs/schema) - MIT
|
||||
[@jest/console@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/core@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/environment@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/expect-utils@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/expect@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/fake-timers@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/globals@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/reporters@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/schemas@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/source-map@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/test-result@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/test-sequencer@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/transform@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[@jest/types@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[@jridgewell/gen-mapping@0.3.3](https://github.com/jridgewell/gen-mapping) - MIT
|
||||
[@jridgewell/resolve-uri@3.1.1](https://github.com/jridgewell/resolve-uri) - MIT
|
||||
[@jridgewell/set-array@1.1.2](https://github.com/jridgewell/set-array) - MIT
|
||||
[@jridgewell/sourcemap-codec@1.4.15](https://github.com/jridgewell/sourcemap-codec) - MIT
|
||||
[@jridgewell/trace-mapping@0.3.22](https://github.com/jridgewell/trace-mapping) - MIT
|
||||
[@lancedb/lancedb@0.26.2](https://github.com/lancedb/lancedb) - Apache-2.0
|
||||
[@napi-rs/cli@2.18.3](https://github.com/napi-rs/napi-rs) - MIT
|
||||
[@nodelib/fs.scandir@2.1.5](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.scandir) - MIT
|
||||
[@nodelib/fs.stat@2.0.5](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.stat) - MIT
|
||||
[@nodelib/fs.walk@1.2.8](https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.walk) - MIT
|
||||
[@pkgjs/parseargs@0.11.0](https://github.com/pkgjs/parseargs) - MIT
|
||||
[@protobufjs/aspromise@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/base64@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/codegen@2.0.4](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/eventemitter@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/fetch@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/float@1.0.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/inquire@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/path@1.1.2](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/pool@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@protobufjs/utf8@1.1.0](https://github.com/dcodeIO/protobuf.js) - BSD-3-Clause
|
||||
[@shikijs/core@1.10.3](https://github.com/shikijs/shiki) - MIT
|
||||
[@sinclair/typebox@0.27.8](https://github.com/sinclairzx81/typebox) - MIT
|
||||
[@sinonjs/commons@3.0.1](https://github.com/sinonjs/commons) - BSD-3-Clause
|
||||
[@sinonjs/fake-timers@10.3.0](https://github.com/sinonjs/fake-timers) - BSD-3-Clause
|
||||
[@smithy/abort-controller@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/abort-controller@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/chunked-blob-reader-native@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/chunked-blob-reader@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/config-resolver@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/config-resolver@3.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/core@1.4.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/core@2.2.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/credential-provider-imds@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/credential-provider-imds@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/eventstream-codec@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/eventstream-serde-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/eventstream-serde-config-resolver@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/eventstream-serde-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/eventstream-serde-universal@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/fetch-http-handler@2.5.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/fetch-http-handler@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/hash-blob-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/hash-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/hash-node@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/hash-stream-node@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/invalid-dependency@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/invalid-dependency@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/is-array-buffer@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/is-array-buffer@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/md5-js@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-content-length@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-content-length@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-endpoint@2.5.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-endpoint@3.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-retry@2.3.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-retry@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-serde@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-serde@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-stack@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/middleware-stack@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/node-config-provider@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/node-config-provider@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/node-http-handler@2.5.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/node-http-handler@3.1.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/property-provider@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/property-provider@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/protocol-http@3.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/protocol-http@4.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/querystring-builder@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/querystring-builder@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/querystring-parser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/querystring-parser@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/service-error-classification@2.1.5](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/service-error-classification@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/shared-ini-file-loader@2.4.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/shared-ini-file-loader@3.1.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/signature-v4@2.2.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/signature-v4@3.1.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/smithy-client@2.5.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/smithy-client@3.1.4](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/types@2.12.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/types@3.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/url-parser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/url-parser@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-base64@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-base64@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-body-length-browser@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-body-length-browser@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-body-length-node@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-body-length-node@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-buffer-from@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-buffer-from@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-config-provider@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-config-provider@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-defaults-mode-browser@2.2.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-defaults-mode-browser@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-defaults-mode-node@2.3.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-defaults-mode-node@3.0.6](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-endpoints@1.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-endpoints@2.0.3](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-hex-encoding@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-hex-encoding@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-middleware@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-middleware@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-retry@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-retry@3.0.2](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-stream@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-stream@3.0.4](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-uri-escape@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-uri-escape@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-utf8@2.3.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-utf8@3.0.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-waiter@2.2.0](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@smithy/util-waiter@3.1.1](https://github.com/awslabs/smithy-typescript) - Apache-2.0
|
||||
[@swc/helpers@0.5.12](https://github.com/swc-project/swc) - Apache-2.0
|
||||
[@types/axios@0.14.0](https://github.com/mzabriskie/axios) - MIT
|
||||
[@types/babel__core@7.20.5](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/babel__generator@7.6.8](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/babel__template@7.4.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/babel__traverse@7.20.5](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/command-line-args@5.2.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/command-line-usage@5.0.2](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/command-line-usage@5.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/graceful-fs@4.1.9](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/hast@3.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/istanbul-lib-coverage@2.0.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/istanbul-lib-report@3.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/istanbul-reports@3.0.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/jest@29.5.12](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/json-schema@7.0.15](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/node-fetch@2.6.11](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/node@18.19.26](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/node@20.16.10](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/node@20.17.9](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/node@22.7.4](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/semver@7.5.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/stack-utils@2.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/tmp@0.2.6](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/unist@3.0.2](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/yargs-parser@21.0.3](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@types/yargs@17.0.32](https://github.com/DefinitelyTyped/DefinitelyTyped) - MIT
|
||||
[@typescript-eslint/eslint-plugin@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@typescript-eslint/parser@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - BSD-2-Clause
|
||||
[@typescript-eslint/scope-manager@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@typescript-eslint/type-utils@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@typescript-eslint/types@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@typescript-eslint/typescript-estree@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - BSD-2-Clause
|
||||
[@typescript-eslint/utils@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@typescript-eslint/visitor-keys@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[@ungap/structured-clone@1.2.0](https://github.com/ungap/structured-clone) - ISC
|
||||
[abort-controller@3.0.0](https://github.com/mysticatea/abort-controller) - MIT
|
||||
[acorn-jsx@5.3.2](https://github.com/acornjs/acorn-jsx) - MIT
|
||||
[acorn@8.11.3](https://github.com/acornjs/acorn) - MIT
|
||||
[agentkeepalive@4.5.0](https://github.com/node-modules/agentkeepalive) - MIT
|
||||
[ajv@6.12.6](https://github.com/ajv-validator/ajv) - MIT
|
||||
[ansi-escapes@4.3.2](https://github.com/sindresorhus/ansi-escapes) - MIT
|
||||
[ansi-regex@5.0.1](https://github.com/chalk/ansi-regex) - MIT
|
||||
[ansi-regex@6.1.0](https://github.com/chalk/ansi-regex) - MIT
|
||||
[ansi-styles@4.3.0](https://github.com/chalk/ansi-styles) - MIT
|
||||
[ansi-styles@5.2.0](https://github.com/chalk/ansi-styles) - MIT
|
||||
[ansi-styles@6.2.1](https://github.com/chalk/ansi-styles) - MIT
|
||||
[anymatch@3.1.3](https://github.com/micromatch/anymatch) - ISC
|
||||
[apache-arrow@15.0.0](https://github.com/apache/arrow) - Apache-2.0
|
||||
[apache-arrow@16.0.0](https://github.com/apache/arrow) - Apache-2.0
|
||||
[apache-arrow@17.0.0](https://github.com/apache/arrow) - Apache-2.0
|
||||
[apache-arrow@18.0.0](https://github.com/apache/arrow) - Apache-2.0
|
||||
[argparse@1.0.10](https://github.com/nodeca/argparse) - MIT
|
||||
[argparse@2.0.1](https://github.com/nodeca/argparse) - Python-2.0
|
||||
[array-back@3.1.0](https://github.com/75lb/array-back) - MIT
|
||||
[array-back@6.2.2](https://github.com/75lb/array-back) - MIT
|
||||
[array-union@2.1.0](https://github.com/sindresorhus/array-union) - MIT
|
||||
[asynckit@0.4.0](https://github.com/alexindigo/asynckit) - MIT
|
||||
[axios@1.8.4](https://github.com/axios/axios) - MIT
|
||||
[babel-jest@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[babel-plugin-istanbul@6.1.1](https://github.com/istanbuljs/babel-plugin-istanbul) - BSD-3-Clause
|
||||
[babel-plugin-jest-hoist@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[babel-preset-current-node-syntax@1.0.1](https://github.com/nicolo-ribaudo/babel-preset-current-node-syntax) - MIT
|
||||
[babel-preset-jest@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[balanced-match@1.0.2](https://github.com/juliangruber/balanced-match) - MIT
|
||||
[base-64@0.1.0](https://github.com/mathiasbynens/base64) - MIT
|
||||
[bowser@2.11.0](https://github.com/lancedikson/bowser) - MIT
|
||||
[brace-expansion@1.1.11](https://github.com/juliangruber/brace-expansion) - MIT
|
||||
[brace-expansion@2.0.1](https://github.com/juliangruber/brace-expansion) - MIT
|
||||
[braces@3.0.3](https://github.com/micromatch/braces) - MIT
|
||||
[browserslist@4.22.2](https://github.com/browserslist/browserslist) - MIT
|
||||
[bs-logger@0.2.6](https://github.com/huafu/bs-logger) - MIT
|
||||
[bser@2.1.1](https://github.com/facebook/watchman) - Apache-2.0
|
||||
[buffer-from@1.1.2](https://github.com/LinusU/buffer-from) - MIT
|
||||
[callsites@3.1.0](https://github.com/sindresorhus/callsites) - MIT
|
||||
[camelcase@5.3.1](https://github.com/sindresorhus/camelcase) - MIT
|
||||
[camelcase@6.3.0](https://github.com/sindresorhus/camelcase) - MIT
|
||||
[caniuse-lite@1.0.30001579](https://github.com/browserslist/caniuse-lite) - CC-BY-4.0
|
||||
[chalk-template@0.4.0](https://github.com/chalk/chalk-template) - MIT
|
||||
[chalk@4.1.2](https://github.com/chalk/chalk) - MIT
|
||||
[char-regex@1.0.2](https://github.com/Richienb/char-regex) - MIT
|
||||
[charenc@0.0.2](https://github.com/pvorb/node-charenc) - BSD-3-Clause
|
||||
[chownr@3.0.0](https://github.com/isaacs/chownr) - BlueOak-1.0.0
|
||||
[ci-info@3.9.0](https://github.com/watson/ci-info) - MIT
|
||||
[cjs-module-lexer@1.2.3](https://github.com/nodejs/cjs-module-lexer) - MIT
|
||||
[cliui@8.0.1](https://github.com/yargs/cliui) - ISC
|
||||
[co@4.6.0](https://github.com/tj/co) - MIT
|
||||
[collect-v8-coverage@1.0.2](https://github.com/SimenB/collect-v8-coverage) - MIT
|
||||
[color-convert@2.0.1](https://github.com/Qix-/color-convert) - MIT
|
||||
[color-name@1.1.4](https://github.com/colorjs/color-name) - MIT
|
||||
[color-string@1.9.1](https://github.com/Qix-/color-string) - MIT
|
||||
[color@4.2.3](https://github.com/Qix-/color) - MIT
|
||||
[combined-stream@1.0.8](https://github.com/felixge/node-combined-stream) - MIT
|
||||
[command-line-args@5.2.1](https://github.com/75lb/command-line-args) - MIT
|
||||
[command-line-usage@7.0.1](https://github.com/75lb/command-line-usage) - MIT
|
||||
[concat-map@0.0.1](https://github.com/substack/node-concat-map) - MIT
|
||||
[convert-source-map@2.0.0](https://github.com/thlorenz/convert-source-map) - MIT
|
||||
[create-jest@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[cross-spawn@7.0.6](https://github.com/moxystudio/node-cross-spawn) - MIT
|
||||
[crypt@0.0.2](https://github.com/pvorb/node-crypt) - BSD-3-Clause
|
||||
[debug@4.3.4](https://github.com/debug-js/debug) - MIT
|
||||
[dedent@1.5.1](https://github.com/dmnd/dedent) - MIT
|
||||
[deep-is@0.1.4](https://github.com/thlorenz/deep-is) - MIT
|
||||
[deepmerge@4.3.1](https://github.com/TehShrike/deepmerge) - MIT
|
||||
[delayed-stream@1.0.0](https://github.com/felixge/node-delayed-stream) - MIT
|
||||
[detect-libc@2.0.3](https://github.com/lovell/detect-libc) - Apache-2.0
|
||||
[detect-newline@3.1.0](https://github.com/sindresorhus/detect-newline) - MIT
|
||||
[diff-sequences@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[digest-fetch@1.3.0](https://github.com/devfans/digest-fetch) - ISC
|
||||
[dir-glob@3.0.1](https://github.com/kevva/dir-glob) - MIT
|
||||
[doctrine@3.0.0](https://github.com/eslint/doctrine) - Apache-2.0
|
||||
[eastasianwidth@0.2.0](https://github.com/komagata/eastasianwidth) - MIT
|
||||
[electron-to-chromium@1.4.642](https://github.com/kilian/electron-to-chromium) - ISC
|
||||
[emittery@0.13.1](https://github.com/sindresorhus/emittery) - MIT
|
||||
[emoji-regex@8.0.0](https://github.com/mathiasbynens/emoji-regex) - MIT
|
||||
[emoji-regex@9.2.2](https://github.com/mathiasbynens/emoji-regex) - MIT
|
||||
[entities@4.5.0](https://github.com/fb55/entities) - BSD-2-Clause
|
||||
[error-ex@1.3.2](https://github.com/qix-/node-error-ex) - MIT
|
||||
[escalade@3.1.1](https://github.com/lukeed/escalade) - MIT
|
||||
[escape-string-regexp@2.0.0](https://github.com/sindresorhus/escape-string-regexp) - MIT
|
||||
[escape-string-regexp@4.0.0](https://github.com/sindresorhus/escape-string-regexp) - MIT
|
||||
[eslint-scope@7.2.2](https://github.com/eslint/eslint-scope) - BSD-2-Clause
|
||||
[eslint-visitor-keys@3.4.3](https://github.com/eslint/eslint-visitor-keys) - Apache-2.0
|
||||
[eslint@8.57.0](https://github.com/eslint/eslint) - MIT
|
||||
[espree@9.6.1](https://github.com/eslint/espree) - BSD-2-Clause
|
||||
[esprima@4.0.1](https://github.com/jquery/esprima) - BSD-2-Clause
|
||||
[esquery@1.5.0](https://github.com/estools/esquery) - BSD-3-Clause
|
||||
[esrecurse@4.3.0](https://github.com/estools/esrecurse) - BSD-2-Clause
|
||||
[estraverse@5.3.0](https://github.com/estools/estraverse) - BSD-2-Clause
|
||||
[esutils@2.0.3](https://github.com/estools/esutils) - BSD-2-Clause
|
||||
[event-target-shim@5.0.1](https://github.com/mysticatea/event-target-shim) - MIT
|
||||
[execa@5.1.1](https://github.com/sindresorhus/execa) - MIT
|
||||
[exit@0.1.2](https://github.com/cowboy/node-exit) - MIT
|
||||
[expect@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[fast-deep-equal@3.1.3](https://github.com/epoberezkin/fast-deep-equal) - MIT
|
||||
[fast-glob@3.3.2](https://github.com/mrmlnc/fast-glob) - MIT
|
||||
[fast-json-stable-stringify@2.1.0](https://github.com/epoberezkin/fast-json-stable-stringify) - MIT
|
||||
[fast-levenshtein@2.0.6](https://github.com/hiddentao/fast-levenshtein) - MIT
|
||||
[fast-xml-parser@4.2.5](https://github.com/NaturalIntelligence/fast-xml-parser) - MIT
|
||||
[fastq@1.16.0](https://github.com/mcollina/fastq) - ISC
|
||||
[fb-watchman@2.0.2](https://github.com/facebook/watchman) - Apache-2.0
|
||||
[file-entry-cache@6.0.1](https://github.com/royriojas/file-entry-cache) - MIT
|
||||
[fill-range@7.1.1](https://github.com/jonschlinkert/fill-range) - MIT
|
||||
[find-replace@3.0.0](https://github.com/75lb/find-replace) - MIT
|
||||
[find-up@4.1.0](https://github.com/sindresorhus/find-up) - MIT
|
||||
[find-up@5.0.0](https://github.com/sindresorhus/find-up) - MIT
|
||||
[flat-cache@3.2.0](https://github.com/jaredwray/flat-cache) - MIT
|
||||
[flatbuffers@1.12.0](https://github.com/google/flatbuffers) - Apache*
|
||||
[flatbuffers@23.5.26](https://github.com/google/flatbuffers) - Apache*
|
||||
[flatbuffers@24.3.25](https://github.com/google/flatbuffers) - Apache-2.0
|
||||
[flatted@3.2.9](https://github.com/WebReflection/flatted) - ISC
|
||||
[follow-redirects@1.15.6](https://github.com/follow-redirects/follow-redirects) - MIT
|
||||
[foreground-child@3.3.0](https://github.com/tapjs/foreground-child) - ISC
|
||||
[form-data-encoder@1.7.2](https://github.com/octet-stream/form-data-encoder) - MIT
|
||||
[form-data@4.0.0](https://github.com/form-data/form-data) - MIT
|
||||
[formdata-node@4.4.1](https://github.com/octet-stream/form-data) - MIT
|
||||
[fs.realpath@1.0.0](https://github.com/isaacs/fs.realpath) - ISC
|
||||
[fsevents@2.3.3](https://github.com/fsevents/fsevents) - MIT
|
||||
[function-bind@1.1.2](https://github.com/Raynos/function-bind) - MIT
|
||||
[gensync@1.0.0-beta.2](https://github.com/loganfsmyth/gensync) - MIT
|
||||
[get-caller-file@2.0.5](https://github.com/stefanpenner/get-caller-file) - ISC
|
||||
[get-package-type@0.1.0](https://github.com/cfware/get-package-type) - MIT
|
||||
[get-stream@6.0.1](https://github.com/sindresorhus/get-stream) - MIT
|
||||
[glob-parent@5.1.2](https://github.com/gulpjs/glob-parent) - ISC
|
||||
[glob-parent@6.0.2](https://github.com/gulpjs/glob-parent) - ISC
|
||||
[glob@10.4.5](https://github.com/isaacs/node-glob) - ISC
|
||||
[glob@7.2.3](https://github.com/isaacs/node-glob) - ISC
|
||||
[globals@11.12.0](https://github.com/sindresorhus/globals) - MIT
|
||||
[globals@13.24.0](https://github.com/sindresorhus/globals) - MIT
|
||||
[globby@11.1.0](https://github.com/sindresorhus/globby) - MIT
|
||||
[graceful-fs@4.2.11](https://github.com/isaacs/node-graceful-fs) - ISC
|
||||
[graphemer@1.4.0](https://github.com/flmnt/graphemer) - MIT
|
||||
[guid-typescript@1.0.9](https://github.com/NicolasDeveloper/guid-typescript) - ISC
|
||||
[has-flag@4.0.0](https://github.com/sindresorhus/has-flag) - MIT
|
||||
[hasown@2.0.0](https://github.com/inspect-js/hasOwn) - MIT
|
||||
[html-escaper@2.0.2](https://github.com/WebReflection/html-escaper) - MIT
|
||||
[human-signals@2.1.0](https://github.com/ehmicky/human-signals) - Apache-2.0
|
||||
[humanize-ms@1.2.1](https://github.com/node-modules/humanize-ms) - MIT
|
||||
[ignore@5.3.0](https://github.com/kaelzhang/node-ignore) - MIT
|
||||
[import-fresh@3.3.0](https://github.com/sindresorhus/import-fresh) - MIT
|
||||
[import-local@3.1.0](https://github.com/sindresorhus/import-local) - MIT
|
||||
[imurmurhash@0.1.4](https://github.com/jensyt/imurmurhash-js) - MIT
|
||||
[inflight@1.0.6](https://github.com/npm/inflight) - ISC
|
||||
[inherits@2.0.4](https://github.com/isaacs/inherits) - ISC
|
||||
[interpret@1.4.0](https://github.com/gulpjs/interpret) - MIT
|
||||
[is-arrayish@0.2.1](https://github.com/qix-/node-is-arrayish) - MIT
|
||||
[is-arrayish@0.3.2](https://github.com/qix-/node-is-arrayish) - MIT
|
||||
[is-buffer@1.1.6](https://github.com/feross/is-buffer) - MIT
|
||||
[is-core-module@2.13.1](https://github.com/inspect-js/is-core-module) - MIT
|
||||
[is-extglob@2.1.1](https://github.com/jonschlinkert/is-extglob) - MIT
|
||||
[is-fullwidth-code-point@3.0.0](https://github.com/sindresorhus/is-fullwidth-code-point) - MIT
|
||||
[is-generator-fn@2.1.0](https://github.com/sindresorhus/is-generator-fn) - MIT
|
||||
[is-glob@4.0.3](https://github.com/micromatch/is-glob) - MIT
|
||||
[is-number@7.0.0](https://github.com/jonschlinkert/is-number) - MIT
|
||||
[is-path-inside@3.0.3](https://github.com/sindresorhus/is-path-inside) - MIT
|
||||
[is-stream@2.0.1](https://github.com/sindresorhus/is-stream) - MIT
|
||||
[isexe@2.0.0](https://github.com/isaacs/isexe) - ISC
|
||||
[istanbul-lib-coverage@3.2.2](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[istanbul-lib-instrument@5.2.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[istanbul-lib-instrument@6.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[istanbul-lib-report@3.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[istanbul-lib-source-maps@4.0.1](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[istanbul-reports@3.1.6](https://github.com/istanbuljs/istanbuljs) - BSD-3-Clause
|
||||
[jackspeak@3.4.3](https://github.com/isaacs/jackspeak) - BlueOak-1.0.0
|
||||
[jest-changed-files@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-circus@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-cli@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-config@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-diff@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-docblock@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-each@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-environment-node@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-get-type@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[jest-haste-map@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-leak-detector@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-matcher-utils@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-message-util@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-mock@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-pnp-resolver@1.2.3](https://github.com/arcanis/jest-pnp-resolver) - MIT
|
||||
[jest-regex-util@29.6.3](https://github.com/jestjs/jest) - MIT
|
||||
[jest-resolve-dependencies@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-resolve@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-runner@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-runtime@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-snapshot@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-util@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-validate@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-watcher@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest-worker@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[jest@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[js-tokens@4.0.0](https://github.com/lydell/js-tokens) - MIT
|
||||
[js-yaml@3.14.1](https://github.com/nodeca/js-yaml) - MIT
|
||||
[js-yaml@4.1.0](https://github.com/nodeca/js-yaml) - MIT
|
||||
[jsesc@2.5.2](https://github.com/mathiasbynens/jsesc) - MIT
|
||||
[json-bignum@0.0.3](https://github.com/datalanche/json-bignum) - MIT
|
||||
[json-buffer@3.0.1](https://github.com/dominictarr/json-buffer) - MIT
|
||||
[json-parse-even-better-errors@2.3.1](https://github.com/npm/json-parse-even-better-errors) - MIT
|
||||
[json-schema-traverse@0.4.1](https://github.com/epoberezkin/json-schema-traverse) - MIT
|
||||
[json-stable-stringify-without-jsonify@1.0.1](https://github.com/samn/json-stable-stringify) - MIT
|
||||
[json5@2.2.3](https://github.com/json5/json5) - MIT
|
||||
[keyv@4.5.4](https://github.com/jaredwray/keyv) - MIT
|
||||
[kleur@3.0.3](https://github.com/lukeed/kleur) - MIT
|
||||
[leven@3.1.0](https://github.com/sindresorhus/leven) - MIT
|
||||
[levn@0.4.1](https://github.com/gkz/levn) - MIT
|
||||
[lines-and-columns@1.2.4](https://github.com/eventualbuddha/lines-and-columns) - MIT
|
||||
[linkify-it@5.0.0](https://github.com/markdown-it/linkify-it) - MIT
|
||||
[locate-path@5.0.0](https://github.com/sindresorhus/locate-path) - MIT
|
||||
[locate-path@6.0.0](https://github.com/sindresorhus/locate-path) - MIT
|
||||
[lodash.camelcase@4.3.0](https://github.com/lodash/lodash) - MIT
|
||||
[lodash.memoize@4.1.2](https://github.com/lodash/lodash) - MIT
|
||||
[lodash.merge@4.6.2](https://github.com/lodash/lodash) - MIT
|
||||
[lodash@4.17.21](https://github.com/lodash/lodash) - MIT
|
||||
[long@5.2.3](https://github.com/dcodeIO/long.js) - Apache-2.0
|
||||
[lru-cache@10.4.3](https://github.com/isaacs/node-lru-cache) - ISC
|
||||
[lru-cache@5.1.1](https://github.com/isaacs/node-lru-cache) - ISC
|
||||
[lunr@2.3.9](https://github.com/olivernn/lunr.js) - MIT
|
||||
[make-dir@4.0.0](https://github.com/sindresorhus/make-dir) - MIT
|
||||
[make-error@1.3.6](https://github.com/JsCommunity/make-error) - ISC
|
||||
[makeerror@1.0.12](https://github.com/daaku/nodejs-makeerror) - BSD-3-Clause
|
||||
[markdown-it@14.1.0](https://github.com/markdown-it/markdown-it) - MIT
|
||||
[md5@2.3.0](https://github.com/pvorb/node-md5) - BSD-3-Clause
|
||||
[mdurl@2.0.0](https://github.com/markdown-it/mdurl) - MIT
|
||||
[merge-stream@2.0.0](https://github.com/grncdr/merge-stream) - MIT
|
||||
[merge2@1.4.1](https://github.com/teambition/merge2) - MIT
|
||||
[micromatch@4.0.8](https://github.com/micromatch/micromatch) - MIT
|
||||
[mime-db@1.52.0](https://github.com/jshttp/mime-db) - MIT
|
||||
[mime-types@2.1.35](https://github.com/jshttp/mime-types) - MIT
|
||||
[mimic-fn@2.1.0](https://github.com/sindresorhus/mimic-fn) - MIT
|
||||
[minimatch@3.1.2](https://github.com/isaacs/minimatch) - ISC
|
||||
[minimatch@9.0.3](https://github.com/isaacs/minimatch) - ISC
|
||||
[minimatch@9.0.5](https://github.com/isaacs/minimatch) - ISC
|
||||
[minimist@1.2.8](https://github.com/minimistjs/minimist) - MIT
|
||||
[minipass@7.1.2](https://github.com/isaacs/minipass) - ISC
|
||||
[minizlib@3.0.1](https://github.com/isaacs/minizlib) - MIT
|
||||
[mkdirp@3.0.1](https://github.com/isaacs/node-mkdirp) - MIT
|
||||
[mnemonist@0.38.3](https://github.com/yomguithereal/mnemonist) - MIT
|
||||
[ms@2.1.2](https://github.com/zeit/ms) - MIT
|
||||
[ms@2.1.3](https://github.com/vercel/ms) - MIT
|
||||
[natural-compare@1.4.0](https://github.com/litejs/natural-compare-lite) - MIT
|
||||
[node-domexception@1.0.0](https://github.com/jimmywarting/node-domexception) - MIT
|
||||
[node-fetch@2.7.0](https://github.com/bitinn/node-fetch) - MIT
|
||||
[node-int64@0.4.0](https://github.com/broofa/node-int64) - MIT
|
||||
[node-releases@2.0.14](https://github.com/chicoxyzzy/node-releases) - MIT
|
||||
[normalize-path@3.0.0](https://github.com/jonschlinkert/normalize-path) - MIT
|
||||
[npm-run-path@4.0.1](https://github.com/sindresorhus/npm-run-path) - MIT
|
||||
[obliterator@1.6.1](https://github.com/yomguithereal/obliterator) - MIT
|
||||
[once@1.4.0](https://github.com/isaacs/once) - ISC
|
||||
[onetime@5.1.2](https://github.com/sindresorhus/onetime) - MIT
|
||||
[onnxruntime-common@1.19.2](https://github.com/Microsoft/onnxruntime) - MIT
|
||||
[onnxruntime-common@1.20.0-dev.20241016-2b8fc5529b](https://github.com/Microsoft/onnxruntime) - MIT
|
||||
[onnxruntime-node@1.19.2](https://github.com/Microsoft/onnxruntime) - MIT
|
||||
[onnxruntime-web@1.21.0-dev.20241024-d9ca84ef96](https://github.com/Microsoft/onnxruntime) - MIT
|
||||
[openai@4.29.2](https://github.com/openai/openai-node) - Apache-2.0
|
||||
[optionator@0.9.3](https://github.com/gkz/optionator) - MIT
|
||||
[p-limit@2.3.0](https://github.com/sindresorhus/p-limit) - MIT
|
||||
[p-limit@3.1.0](https://github.com/sindresorhus/p-limit) - MIT
|
||||
[p-locate@4.1.0](https://github.com/sindresorhus/p-locate) - MIT
|
||||
[p-locate@5.0.0](https://github.com/sindresorhus/p-locate) - MIT
|
||||
[p-try@2.2.0](https://github.com/sindresorhus/p-try) - MIT
|
||||
[package-json-from-dist@1.0.1](https://github.com/isaacs/package-json-from-dist) - BlueOak-1.0.0
|
||||
[parent-module@1.0.1](https://github.com/sindresorhus/parent-module) - MIT
|
||||
[parse-json@5.2.0](https://github.com/sindresorhus/parse-json) - MIT
|
||||
[path-exists@4.0.0](https://github.com/sindresorhus/path-exists) - MIT
|
||||
[path-is-absolute@1.0.1](https://github.com/sindresorhus/path-is-absolute) - MIT
|
||||
[path-key@3.1.1](https://github.com/sindresorhus/path-key) - MIT
|
||||
[path-parse@1.0.7](https://github.com/jbgutierrez/path-parse) - MIT
|
||||
[path-scurry@1.11.1](https://github.com/isaacs/path-scurry) - BlueOak-1.0.0
|
||||
[path-type@4.0.0](https://github.com/sindresorhus/path-type) - MIT
|
||||
[picocolors@1.0.0](https://github.com/alexeyraspopov/picocolors) - ISC
|
||||
[picomatch@2.3.1](https://github.com/micromatch/picomatch) - MIT
|
||||
[pirates@4.0.6](https://github.com/danez/pirates) - MIT
|
||||
[pkg-dir@4.2.0](https://github.com/sindresorhus/pkg-dir) - MIT
|
||||
[platform@1.3.6](https://github.com/bestiejs/platform.js) - MIT
|
||||
[prelude-ls@1.2.1](https://github.com/gkz/prelude-ls) - MIT
|
||||
[pretty-format@29.7.0](https://github.com/jestjs/jest) - MIT
|
||||
[prompts@2.4.2](https://github.com/terkelg/prompts) - MIT
|
||||
[protobufjs@7.4.0](https://github.com/protobufjs/protobuf.js) - BSD-3-Clause
|
||||
[proxy-from-env@1.1.0](https://github.com/Rob--W/proxy-from-env) - MIT
|
||||
[punycode.js@2.3.1](https://github.com/mathiasbynens/punycode.js) - MIT
|
||||
[punycode@2.3.1](https://github.com/mathiasbynens/punycode.js) - MIT
|
||||
[pure-rand@6.0.4](https://github.com/dubzzz/pure-rand) - MIT
|
||||
[queue-microtask@1.2.3](https://github.com/feross/queue-microtask) - MIT
|
||||
[react-is@18.2.0](https://github.com/facebook/react) - MIT
|
||||
[rechoir@0.6.2](https://github.com/tkellen/node-rechoir) - MIT
|
||||
[reflect-metadata@0.2.2](https://github.com/rbuckton/reflect-metadata) - Apache-2.0
|
||||
[require-directory@2.1.1](https://github.com/troygoode/node-require-directory) - MIT
|
||||
[resolve-cwd@3.0.0](https://github.com/sindresorhus/resolve-cwd) - MIT
|
||||
[resolve-from@4.0.0](https://github.com/sindresorhus/resolve-from) - MIT
|
||||
[resolve-from@5.0.0](https://github.com/sindresorhus/resolve-from) - MIT
|
||||
[resolve.exports@2.0.2](https://github.com/lukeed/resolve.exports) - MIT
|
||||
[resolve@1.22.8](https://github.com/browserify/resolve) - MIT
|
||||
[reusify@1.0.4](https://github.com/mcollina/reusify) - MIT
|
||||
[rimraf@3.0.2](https://github.com/isaacs/rimraf) - ISC
|
||||
[rimraf@5.0.10](https://github.com/isaacs/rimraf) - ISC
|
||||
[run-parallel@1.2.0](https://github.com/feross/run-parallel) - MIT
|
||||
[semver@6.3.1](https://github.com/npm/node-semver) - ISC
|
||||
[semver@7.6.3](https://github.com/npm/node-semver) - ISC
|
||||
[sharp@0.33.5](https://github.com/lovell/sharp) - Apache-2.0
|
||||
[shebang-command@2.0.0](https://github.com/kevva/shebang-command) - MIT
|
||||
[shebang-regex@3.0.0](https://github.com/sindresorhus/shebang-regex) - MIT
|
||||
[shelljs@0.8.5](https://github.com/shelljs/shelljs) - BSD-3-Clause
|
||||
[shiki@1.10.3](https://github.com/shikijs/shiki) - MIT
|
||||
[shx@0.3.4](https://github.com/shelljs/shx) - MIT
|
||||
[signal-exit@3.0.7](https://github.com/tapjs/signal-exit) - ISC
|
||||
[signal-exit@4.1.0](https://github.com/tapjs/signal-exit) - ISC
|
||||
[simple-swizzle@0.2.2](https://github.com/qix-/node-simple-swizzle) - MIT
|
||||
[sisteransi@1.0.5](https://github.com/terkelg/sisteransi) - MIT
|
||||
[slash@3.0.0](https://github.com/sindresorhus/slash) - MIT
|
||||
[source-map-support@0.5.13](https://github.com/evanw/node-source-map-support) - MIT
|
||||
[source-map@0.6.1](https://github.com/mozilla/source-map) - BSD-3-Clause
|
||||
[sprintf-js@1.0.3](https://github.com/alexei/sprintf.js) - BSD-3-Clause
|
||||
[stack-utils@2.0.6](https://github.com/tapjs/stack-utils) - MIT
|
||||
[stream-read-all@3.0.1](https://github.com/75lb/stream-read-all) - MIT
|
||||
[string-length@4.0.2](https://github.com/sindresorhus/string-length) - MIT
|
||||
[string-width@4.2.3](https://github.com/sindresorhus/string-width) - MIT
|
||||
[string-width@5.1.2](https://github.com/sindresorhus/string-width) - MIT
|
||||
[strip-ansi@6.0.1](https://github.com/chalk/strip-ansi) - MIT
|
||||
[strip-ansi@7.1.0](https://github.com/chalk/strip-ansi) - MIT
|
||||
[strip-bom@4.0.0](https://github.com/sindresorhus/strip-bom) - MIT
|
||||
[strip-final-newline@2.0.0](https://github.com/sindresorhus/strip-final-newline) - MIT
|
||||
[strip-json-comments@3.1.1](https://github.com/sindresorhus/strip-json-comments) - MIT
|
||||
[strnum@1.0.5](https://github.com/NaturalIntelligence/strnum) - MIT
|
||||
[supports-color@7.2.0](https://github.com/chalk/supports-color) - MIT
|
||||
[supports-color@8.1.1](https://github.com/chalk/supports-color) - MIT
|
||||
[supports-preserve-symlinks-flag@1.0.0](https://github.com/inspect-js/node-supports-preserve-symlinks-flag) - MIT
|
||||
[table-layout@3.0.2](https://github.com/75lb/table-layout) - MIT
|
||||
[tar@7.4.3](https://github.com/isaacs/node-tar) - ISC
|
||||
[test-exclude@6.0.0](https://github.com/istanbuljs/test-exclude) - ISC
|
||||
[text-table@0.2.0](https://github.com/substack/text-table) - MIT
|
||||
[tmp@0.2.3](https://github.com/raszi/node-tmp) - MIT
|
||||
[tmpl@1.0.5](https://github.com/daaku/nodejs-tmpl) - BSD-3-Clause
|
||||
[to-regex-range@5.0.1](https://github.com/micromatch/to-regex-range) - MIT
|
||||
[tr46@0.0.3](https://github.com/Sebmaster/tr46.js) - MIT
|
||||
[ts-api-utils@1.0.3](https://github.com/JoshuaKGoldberg/ts-api-utils) - MIT
|
||||
[ts-jest@29.1.2](https://github.com/kulshekhar/ts-jest) - MIT
|
||||
[tslib@1.14.1](https://github.com/Microsoft/tslib) - 0BSD
|
||||
[tslib@2.6.2](https://github.com/Microsoft/tslib) - 0BSD
|
||||
[type-check@0.4.0](https://github.com/gkz/type-check) - MIT
|
||||
[type-detect@4.0.8](https://github.com/chaijs/type-detect) - MIT
|
||||
[type-fest@0.20.2](https://github.com/sindresorhus/type-fest) - (MIT OR CC0-1.0)
|
||||
[type-fest@0.21.3](https://github.com/sindresorhus/type-fest) - (MIT OR CC0-1.0)
|
||||
[typedoc-plugin-markdown@4.2.1](https://github.com/typedoc2md/typedoc-plugin-markdown) - MIT
|
||||
[typedoc@0.26.4](https://github.com/TypeStrong/TypeDoc) - Apache-2.0
|
||||
[typescript-eslint@7.1.0](https://github.com/typescript-eslint/typescript-eslint) - MIT
|
||||
[typescript@5.5.4](https://github.com/Microsoft/TypeScript) - Apache-2.0
|
||||
[typical@4.0.0](https://github.com/75lb/typical) - MIT
|
||||
[typical@7.1.1](https://github.com/75lb/typical) - MIT
|
||||
[uc.micro@2.1.0](https://github.com/markdown-it/uc.micro) - MIT
|
||||
[undici-types@5.26.5](https://github.com/nodejs/undici) - MIT
|
||||
[undici-types@6.19.8](https://github.com/nodejs/undici) - MIT
|
||||
[update-browserslist-db@1.0.13](https://github.com/browserslist/update-db) - MIT
|
||||
[uri-js@4.4.1](https://github.com/garycourt/uri-js) - BSD-2-Clause
|
||||
[uuid@9.0.1](https://github.com/uuidjs/uuid) - MIT
|
||||
[v8-to-istanbul@9.2.0](https://github.com/istanbuljs/v8-to-istanbul) - ISC
|
||||
[walker@1.0.8](https://github.com/daaku/nodejs-walker) - Apache-2.0
|
||||
[web-streams-polyfill@3.3.3](https://github.com/MattiasBuelens/web-streams-polyfill) - MIT
|
||||
[web-streams-polyfill@4.0.0-beta.3](https://github.com/MattiasBuelens/web-streams-polyfill) - MIT
|
||||
[webidl-conversions@3.0.1](https://github.com/jsdom/webidl-conversions) - BSD-2-Clause
|
||||
[whatwg-url@5.0.0](https://github.com/jsdom/whatwg-url) - MIT
|
||||
[which@2.0.2](https://github.com/isaacs/node-which) - ISC
|
||||
[wordwrapjs@5.1.0](https://github.com/75lb/wordwrapjs) - MIT
|
||||
[wrap-ansi@7.0.0](https://github.com/chalk/wrap-ansi) - MIT
|
||||
[wrap-ansi@8.1.0](https://github.com/chalk/wrap-ansi) - MIT
|
||||
[wrappy@1.0.2](https://github.com/npm/wrappy) - ISC
|
||||
[write-file-atomic@4.0.2](https://github.com/npm/write-file-atomic) - ISC
|
||||
[y18n@5.0.8](https://github.com/yargs/y18n) - ISC
|
||||
[yallist@3.1.1](https://github.com/isaacs/yallist) - ISC
|
||||
[yallist@5.0.0](https://github.com/isaacs/yallist) - BlueOak-1.0.0
|
||||
[yaml@2.4.5](https://github.com/eemeli/yaml) - ISC
|
||||
[yargs-parser@21.1.1](https://github.com/yargs/yargs-parser) - ISC
|
||||
[yargs@17.7.2](https://github.com/yargs/yargs) - MIT
|
||||
[yocto-queue@0.1.0](https://github.com/sindresorhus/yocto-queue) - MIT
|
||||
14607
nodejs/RUST_THIRD_PARTY_LICENSES.html
Normal file
14607
nodejs/RUST_THIRD_PARTY_LICENSES.html
Normal file
File diff suppressed because it is too large
Load Diff
@@ -312,6 +312,66 @@ describe.each([arrow15, arrow16, arrow17, arrow18])(
|
||||
expect(res.getChild("id")?.toJSON()).toEqual([2, 3]);
|
||||
});
|
||||
|
||||
it("should support takeRowIds with bigint array", async () => {
|
||||
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
|
||||
// Get actual row IDs using withRowId()
|
||||
const allRows = await table.query().withRowId().toArray();
|
||||
const rowIds = allRows.map((row) => row._rowid) as bigint[];
|
||||
|
||||
// Verify row IDs are bigint
|
||||
expect(typeof rowIds[0]).toBe("bigint");
|
||||
|
||||
// Use takeRowIds with bigint array (the main use case from issue #2722)
|
||||
const res = await table.takeRowIds([rowIds[0], rowIds[2]]).toArray();
|
||||
expect(res.map((r) => r.id)).toEqual([1, 3]);
|
||||
});
|
||||
|
||||
it("should support takeRowIds with number array for backwards compatibility", async () => {
|
||||
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
|
||||
// Small row IDs can be passed as numbers
|
||||
const res = await table.takeRowIds([0, 2]).toArray();
|
||||
expect(res.map((r) => r.id)).toEqual([1, 3]);
|
||||
});
|
||||
|
||||
it("should support takeRowIds with mixed bigint and number array", async () => {
|
||||
await table.add([{ id: 1 }, { id: 2 }, { id: 3 }]);
|
||||
// Mixed array of bigint and number
|
||||
const res = await table.takeRowIds([0n, 1, 2n]).toArray();
|
||||
expect(res.map((r) => r.id)).toEqual([1, 2, 3]);
|
||||
});
|
||||
|
||||
it("should throw for non-integer number in takeRowIds", () => {
|
||||
expect(() => table.takeRowIds([1.5])).toThrow(
|
||||
"Row id must be an integer (or bigint)",
|
||||
);
|
||||
expect(() => table.takeRowIds([0, 1.1, 2])).toThrow(
|
||||
"Row id must be an integer (or bigint)",
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw for negative number in takeRowIds", () => {
|
||||
expect(() => table.takeRowIds([-1])).toThrow("Row id cannot be negative");
|
||||
expect(() => table.takeRowIds([0, -5, 2])).toThrow(
|
||||
"Row id cannot be negative",
|
||||
);
|
||||
});
|
||||
|
||||
it("should throw for unsafe large number in takeRowIds", () => {
|
||||
// Number.MAX_SAFE_INTEGER + 1 is not safe
|
||||
const unsafeNumber = Number.MAX_SAFE_INTEGER + 1;
|
||||
expect(() => table.takeRowIds([unsafeNumber])).toThrow(
|
||||
"Row id is too large for number; use bigint instead",
|
||||
);
|
||||
});
|
||||
|
||||
it("should reject negative bigint in takeRowIds", async () => {
|
||||
await table.add([{ id: 1 }]);
|
||||
// Negative bigint should be rejected by the Rust layer
|
||||
expect(() => {
|
||||
table.takeRowIds([-1n]);
|
||||
}).toThrow("Row id cannot be negative");
|
||||
});
|
||||
|
||||
it("should return the table as an instance of an arrow table", async () => {
|
||||
const arrowTbl = await table.toArrow();
|
||||
expect(arrowTbl).toBeInstanceOf(ArrowTable);
|
||||
@@ -1520,9 +1580,9 @@ describe("when optimizing a dataset", () => {
|
||||
|
||||
it("delete unverified", async () => {
|
||||
const version = await table.version();
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${
|
||||
version - 1
|
||||
}.manifest`;
|
||||
const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${String(
|
||||
18446744073709551615n - (BigInt(version) - 1n),
|
||||
).padStart(20, "0")}.manifest`;
|
||||
fs.rmSync(versionFile);
|
||||
|
||||
let stats = await table.optimize({ deleteUnverified: false });
|
||||
|
||||
@@ -347,9 +347,13 @@ export abstract class Table {
|
||||
/**
|
||||
* Create a query that returns a subset of the rows in the table.
|
||||
* @param rowIds The row ids of the rows to return.
|
||||
*
|
||||
* Row ids returned by `withRowId()` are `bigint`, so `bigint[]` is supported.
|
||||
* For convenience / backwards compatibility, `number[]` is also accepted (for
|
||||
* small row ids that fit in a safe integer).
|
||||
* @returns A builder that can be used to parameterize the query.
|
||||
*/
|
||||
abstract takeRowIds(rowIds: number[]): TakeQuery;
|
||||
abstract takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery;
|
||||
|
||||
/**
|
||||
* Create a search query to find the nearest neighbors
|
||||
@@ -538,6 +542,35 @@ export abstract class Table {
|
||||
*
|
||||
*/
|
||||
abstract stats(): Promise<TableStatistics>;
|
||||
|
||||
/**
|
||||
* Get the initial storage options that were passed in when opening this table.
|
||||
*
|
||||
* For dynamically refreshed options (e.g., credential vending), use
|
||||
* {@link Table.latestStorageOptions}.
|
||||
*
|
||||
* Warning: This is an internal API and the return value is subject to change.
|
||||
*
|
||||
* @returns The storage options, or undefined if no storage options were configured.
|
||||
*/
|
||||
abstract initialStorageOptions(): Promise<
|
||||
Record<string, string> | null | undefined
|
||||
>;
|
||||
|
||||
/**
|
||||
* Get the latest storage options, refreshing from provider if configured.
|
||||
*
|
||||
* This method is useful for credential vending scenarios where storage options
|
||||
* may be refreshed dynamically. If no dynamic provider is configured, this
|
||||
* returns the initial static options.
|
||||
*
|
||||
* Warning: This is an internal API and the return value is subject to change.
|
||||
*
|
||||
* @returns The storage options, or undefined if no storage options were configured.
|
||||
*/
|
||||
abstract latestStorageOptions(): Promise<
|
||||
Record<string, string> | null | undefined
|
||||
>;
|
||||
}
|
||||
|
||||
export class LocalTable extends Table {
|
||||
@@ -686,8 +719,24 @@ export class LocalTable extends Table {
|
||||
return new TakeQuery(this.inner.takeOffsets(offsets));
|
||||
}
|
||||
|
||||
takeRowIds(rowIds: number[]): TakeQuery {
|
||||
return new TakeQuery(this.inner.takeRowIds(rowIds));
|
||||
takeRowIds(rowIds: readonly (bigint | number)[]): TakeQuery {
|
||||
const ids = rowIds.map((id) => {
|
||||
if (typeof id === "bigint") {
|
||||
return id;
|
||||
}
|
||||
if (!Number.isInteger(id)) {
|
||||
throw new Error("Row id must be an integer (or bigint)");
|
||||
}
|
||||
if (id < 0) {
|
||||
throw new Error("Row id cannot be negative");
|
||||
}
|
||||
if (!Number.isSafeInteger(id)) {
|
||||
throw new Error("Row id is too large for number; use bigint instead");
|
||||
}
|
||||
return BigInt(id);
|
||||
});
|
||||
|
||||
return new TakeQuery(this.inner.takeRowIds(ids));
|
||||
}
|
||||
|
||||
query(): Query {
|
||||
@@ -858,6 +907,18 @@ export class LocalTable extends Table {
|
||||
return await this.inner.stats();
|
||||
}
|
||||
|
||||
async initialStorageOptions(): Promise<
|
||||
Record<string, string> | null | undefined
|
||||
> {
|
||||
return await this.inner.initialStorageOptions();
|
||||
}
|
||||
|
||||
async latestStorageOptions(): Promise<
|
||||
Record<string, string> | null | undefined
|
||||
> {
|
||||
return await this.inner.latestStorageOptions();
|
||||
}
|
||||
|
||||
mergeInsert(on: string | string[]): MergeInsertBuilder {
|
||||
on = Array.isArray(on) ? on : [on];
|
||||
return new MergeInsertBuilder(this.inner.mergeInsert(on), this.schema());
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-arm64",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.darwin-arm64.node",
|
||||
@@ -8,5 +8,9 @@
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
# `@lancedb/lancedb-darwin-x64`
|
||||
|
||||
This is the **x86_64-apple-darwin** binary for `@lancedb/lancedb`
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-darwin-x64",
|
||||
"version": "0.24.1",
|
||||
"os": ["darwin"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.darwin-x64.node",
|
||||
"files": ["lancedb.darwin-x64.node"],
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-gnu",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-gnu.node",
|
||||
@@ -9,5 +9,9 @@
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["glibc"]
|
||||
"libc": ["glibc"],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-arm64-musl",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["arm64"],
|
||||
"main": "lancedb.linux-arm64-musl.node",
|
||||
@@ -9,5 +9,9 @@
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["musl"]
|
||||
"libc": ["musl"],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-gnu",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-gnu.node",
|
||||
@@ -9,5 +9,9 @@
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["glibc"]
|
||||
"libc": ["glibc"],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-linux-x64-musl",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["linux"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.linux-x64-musl.node",
|
||||
@@ -9,5 +9,9 @@
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"libc": ["musl"]
|
||||
"libc": ["musl"],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-arm64-msvc",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
@@ -14,5 +14,9 @@
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb-win32-x64-msvc",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"os": ["win32"],
|
||||
"cpu": ["x64"],
|
||||
"main": "lancedb.win32-x64-msvc.node",
|
||||
@@ -8,5 +8,9 @@
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">= 18"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
}
|
||||
}
|
||||
|
||||
4
nodejs/package-lock.json
generated
4
nodejs/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@lancedb/lancedb",
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"cpu": [
|
||||
"x64",
|
||||
"arm64"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
"ann"
|
||||
],
|
||||
"private": false,
|
||||
"version": "0.24.1",
|
||||
"version": "0.26.2",
|
||||
"main": "dist/index.js",
|
||||
"exports": {
|
||||
".": "./dist/index.js",
|
||||
@@ -25,7 +25,6 @@
|
||||
"triples": {
|
||||
"defaults": false,
|
||||
"additional": [
|
||||
"x86_64-apple-darwin",
|
||||
"aarch64-apple-darwin",
|
||||
"x86_64-unknown-linux-gnu",
|
||||
"aarch64-unknown-linux-gnu",
|
||||
@@ -37,6 +36,10 @@
|
||||
}
|
||||
},
|
||||
"license": "Apache-2.0",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/lancedb/lancedb"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@aws-sdk/client-dynamodb": "^3.33.0",
|
||||
"@aws-sdk/client-kms": "^3.33.0",
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::header::JsHeaderProvider;
|
||||
use crate::table::Table;
|
||||
use crate::ConnectionOptions;
|
||||
use lancedb::connection::{ConnectBuilder, Connection as LanceDBConnection};
|
||||
|
||||
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
|
||||
|
||||
#[napi]
|
||||
|
||||
@@ -166,6 +166,19 @@ impl Table {
|
||||
Ok(stats.into())
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn initial_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
|
||||
Ok(self.inner_ref()?.initial_storage_options().await)
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn latest_storage_options(&self) -> napi::Result<Option<HashMap<String, String>>> {
|
||||
self.inner_ref()?
|
||||
.latest_storage_options()
|
||||
.await
|
||||
.default_error()
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub async fn update(
|
||||
&self,
|
||||
@@ -208,18 +221,24 @@ impl Table {
|
||||
}
|
||||
|
||||
#[napi(catch_unwind)]
|
||||
pub fn take_row_ids(&self, row_ids: Vec<i64>) -> napi::Result<TakeQuery> {
|
||||
pub fn take_row_ids(&self, row_ids: Vec<BigInt>) -> napi::Result<TakeQuery> {
|
||||
Ok(TakeQuery::new(
|
||||
self.inner_ref()?.take_row_ids(
|
||||
row_ids
|
||||
.into_iter()
|
||||
.map(|o| {
|
||||
u64::try_from(o).map_err(|e| {
|
||||
napi::Error::from_reason(format!(
|
||||
"Failed to convert row id to u64: {}",
|
||||
e
|
||||
.map(|id| {
|
||||
let (negative, value, lossless) = id.get_u64();
|
||||
if negative {
|
||||
Err(napi::Error::from_reason(
|
||||
"Row id cannot be negative".to_string(),
|
||||
))
|
||||
})
|
||||
} else if !lossless {
|
||||
Err(napi::Error::from_reason(
|
||||
"Row id is too large to fit in u64".to_string(),
|
||||
))
|
||||
} else {
|
||||
Ok(value)
|
||||
}
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
),
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[tool.bumpversion]
|
||||
current_version = "0.28.0-beta.0"
|
||||
current_version = "0.30.0-beta.0"
|
||||
parse = """(?x)
|
||||
(?P<major>0|[1-9]\\d*)\\.
|
||||
(?P<minor>0|[1-9]\\d*)\\.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb-python"
|
||||
version = "0.28.0-beta.0"
|
||||
version = "0.30.0-beta.0"
|
||||
edition.workspace = true
|
||||
description = "Python bindings for LanceDB"
|
||||
license.workspace = true
|
||||
@@ -14,15 +14,15 @@ name = "_lancedb"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
arrow = { version = "56.2", features = ["pyarrow"] }
|
||||
arrow = { version = "57.2", features = ["pyarrow"] }
|
||||
async-trait = "0.1"
|
||||
lancedb = { path = "../rust/lancedb", default-features = false }
|
||||
lance-core.workspace = true
|
||||
lance-namespace.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
pyo3 = { version = "0.25", features = ["extension-module", "abi3-py310"] }
|
||||
pyo3-async-runtimes = { version = "0.25", features = [
|
||||
pyo3 = { version = "0.26", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3-async-runtimes = { version = "0.26", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
] }
|
||||
@@ -32,9 +32,9 @@ snafu.workspace = true
|
||||
tokio = { version = "1.40", features = ["sync"] }
|
||||
|
||||
[build-dependencies]
|
||||
pyo3-build-config = { version = "0.25", features = [
|
||||
pyo3-build-config = { version = "0.26", features = [
|
||||
"extension-module",
|
||||
"abi3-py310",
|
||||
"abi3-py39",
|
||||
] }
|
||||
|
||||
[features]
|
||||
|
||||
206
python/PYTHON_THIRD_PARTY_LICENSES.md
Normal file
206
python/PYTHON_THIRD_PARTY_LICENSES.md
Normal file
@@ -0,0 +1,206 @@
|
||||
| Name | Version | License | URL |
|
||||
|--------------------------------|-----------------|--------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|
|
||||
| InstructorEmbedding | 1.0.1 | Apache License 2.0 | https://github.com/HKUNLP/instructor-embedding |
|
||||
| Jinja2 | 3.1.6 | BSD License | https://github.com/pallets/jinja/ |
|
||||
| Markdown | 3.10.2 | BSD-3-Clause | https://Python-Markdown.github.io/ |
|
||||
| MarkupSafe | 3.0.3 | BSD-3-Clause | https://github.com/pallets/markupsafe/ |
|
||||
| PyJWT | 2.11.0 | MIT | https://github.com/jpadilla/pyjwt |
|
||||
| PyYAML | 6.0.3 | MIT License | https://pyyaml.org/ |
|
||||
| Pygments | 2.19.2 | BSD License | https://pygments.org |
|
||||
| accelerate | 1.12.0 | Apache Software License | https://github.com/huggingface/accelerate |
|
||||
| adlfs | 2026.2.0 | BSD License | UNKNOWN |
|
||||
| aiohappyeyeballs | 2.6.1 | Python Software Foundation License | https://github.com/aio-libs/aiohappyeyeballs |
|
||||
| aiohttp | 3.13.3 | Apache-2.0 AND MIT | https://github.com/aio-libs/aiohttp |
|
||||
| aiosignal | 1.4.0 | Apache Software License | https://github.com/aio-libs/aiosignal |
|
||||
| annotated-types | 0.7.0 | MIT License | https://github.com/annotated-types/annotated-types |
|
||||
| anyio | 4.12.1 | MIT | https://anyio.readthedocs.io/en/stable/versionhistory.html |
|
||||
| appnope | 0.1.4 | BSD License | http://github.com/minrk/appnope |
|
||||
| asttokens | 3.0.1 | Apache 2.0 | https://github.com/gristlabs/asttokens |
|
||||
| attrs | 25.4.0 | MIT | https://www.attrs.org/en/stable/changelog.html |
|
||||
| awscli | 1.44.35 | Apache Software License | http://aws.amazon.com/cli/ |
|
||||
| azure-core | 1.38.0 | MIT License | https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/core/azure-core |
|
||||
| azure-datalake-store | 0.0.53 | MIT License | https://github.com/Azure/azure-data-lake-store-python |
|
||||
| azure-identity | 1.25.1 | MIT | https://github.com/Azure/azure-sdk-for-python |
|
||||
| azure-storage-blob | 12.28.0 | MIT License | https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-blob |
|
||||
| babel | 2.18.0 | BSD License | https://babel.pocoo.org/ |
|
||||
| backrefs | 6.1 | MIT | https://github.com/facelessuser/backrefs |
|
||||
| beautifulsoup4 | 4.14.3 | MIT License | https://www.crummy.com/software/BeautifulSoup/bs4/ |
|
||||
| bleach | 6.3.0 | Apache Software License | https://github.com/mozilla/bleach |
|
||||
| boto3 | 1.42.45 | Apache-2.0 | https://github.com/boto/boto3 |
|
||||
| botocore | 1.42.45 | Apache-2.0 | https://github.com/boto/botocore |
|
||||
| cachetools | 7.0.0 | MIT | https://github.com/tkem/cachetools/ |
|
||||
| certifi | 2026.1.4 | Mozilla Public License 2.0 (MPL 2.0) | https://github.com/certifi/python-certifi |
|
||||
| cffi | 2.0.0 | MIT | https://cffi.readthedocs.io/en/latest/whatsnew.html |
|
||||
| cfgv | 3.5.0 | MIT | https://github.com/asottile/cfgv |
|
||||
| charset-normalizer | 3.4.4 | MIT | https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md |
|
||||
| click | 8.3.1 | BSD-3-Clause | https://github.com/pallets/click/ |
|
||||
| cohere | 5.20.4 | MIT License | https://github.com/cohere-ai/cohere-python |
|
||||
| colorama | 0.4.6 | BSD License | https://github.com/tartley/colorama |
|
||||
| colpali_engine | 0.3.13 | MIT License | https://github.com/illuin-tech/colpali |
|
||||
| comm | 0.2.3 | BSD License | https://github.com/ipython/comm |
|
||||
| cryptography | 46.0.4 | Apache-2.0 OR BSD-3-Clause | https://github.com/pyca/cryptography |
|
||||
| datafusion | 51.0.0 | Apache Software License | https://datafusion.apache.org/python |
|
||||
| debugpy | 1.8.20 | MIT License | https://aka.ms/debugpy |
|
||||
| decorator | 5.2.1 | BSD License | UNKNOWN |
|
||||
| defusedxml | 0.7.1 | Python Software Foundation License | https://github.com/tiran/defusedxml |
|
||||
| deprecation | 2.1.0 | Apache Software License | http://deprecation.readthedocs.io/ |
|
||||
| distlib | 0.4.0 | Python Software Foundation License | https://github.com/pypa/distlib |
|
||||
| distro | 1.9.0 | Apache Software License | https://github.com/python-distro/distro |
|
||||
| docutils | 0.19 | BSD License; GNU General Public License (GPL); Public Domain; Python Software Foundation License | https://docutils.sourceforge.io/ |
|
||||
| duckdb | 1.4.4 | MIT License | https://github.com/duckdb/duckdb-python |
|
||||
| executing | 2.2.1 | MIT License | https://github.com/alexmojaki/executing |
|
||||
| fastavro | 1.12.1 | MIT | https://github.com/fastavro/fastavro |
|
||||
| fastjsonschema | 2.21.2 | BSD License | https://github.com/horejsek/python-fastjsonschema |
|
||||
| filelock | 3.20.3 | Unlicense | https://github.com/tox-dev/py-filelock |
|
||||
| frozenlist | 1.8.0 | Apache-2.0 | https://github.com/aio-libs/frozenlist |
|
||||
| fsspec | 2026.2.0 | BSD-3-Clause | https://github.com/fsspec/filesystem_spec |
|
||||
| ftfy | 6.3.1 | Apache-2.0 | https://ftfy.readthedocs.io/en/latest/ |
|
||||
| ghp-import | 2.1.0 | Apache Software License | https://github.com/c-w/ghp-import |
|
||||
| google-ai-generativelanguage | 0.6.15 | Apache Software License | https://github.com/googleapis/google-cloud-python/tree/main/packages/google-ai-generativelanguage |
|
||||
| google-api-core | 2.25.2 | Apache Software License | https://github.com/googleapis/python-api-core |
|
||||
| google-api-python-client | 2.189.0 | Apache Software License | https://github.com/googleapis/google-api-python-client/ |
|
||||
| google-auth | 2.48.0 | Apache Software License | https://github.com/googleapis/google-auth-library-python |
|
||||
| google-auth-httplib2 | 0.3.0 | Apache Software License | https://github.com/GoogleCloudPlatform/google-auth-library-python-httplib2 |
|
||||
| google-generativeai | 0.8.6 | Apache Software License | https://github.com/google/generative-ai-python |
|
||||
| googleapis-common-protos | 1.72.0 | Apache Software License | https://github.com/googleapis/google-cloud-python/tree/main/packages/googleapis-common-protos |
|
||||
| griffe | 2.0.0 | ISC | https://mkdocstrings.github.io/griffe |
|
||||
| griffecli | 2.0.0 | ISC | UNKNOWN |
|
||||
| griffelib | 2.0.0 | ISC | UNKNOWN |
|
||||
| grpcio | 1.78.0 | Apache-2.0 | https://grpc.io |
|
||||
| grpcio-status | 1.71.2 | Apache Software License | https://grpc.io |
|
||||
| h11 | 0.16.0 | MIT License | https://github.com/python-hyper/h11 |
|
||||
| hf-xet | 1.2.0 | Apache-2.0 | https://github.com/huggingface/xet-core |
|
||||
| httpcore | 1.0.9 | BSD-3-Clause | https://www.encode.io/httpcore/ |
|
||||
| httplib2 | 0.31.2 | MIT License | https://github.com/httplib2/httplib2 |
|
||||
| httpx | 0.28.1 | BSD License | https://github.com/encode/httpx |
|
||||
| huggingface_hub | 0.36.2 | Apache Software License | https://github.com/huggingface/huggingface_hub |
|
||||
| ibm-cos-sdk | 2.14.3 | Apache Software License | https://github.com/ibm/ibm-cos-sdk-python |
|
||||
| ibm-cos-sdk-core | 2.14.3 | Apache Software License | https://github.com/ibm/ibm-cos-sdk-python-core |
|
||||
| ibm-cos-sdk-s3transfer | 2.14.3 | Apache Software License | https://github.com/IBM/ibm-cos-sdk-python-s3transfer |
|
||||
| ibm_watsonx_ai | 1.5.1 | BSD License | https://ibm.github.io/watsonx-ai-python-sdk/changelog.html |
|
||||
| identify | 2.6.16 | MIT | https://github.com/pre-commit/identify |
|
||||
| idna | 3.11 | BSD-3-Clause | https://github.com/kjd/idna |
|
||||
| iniconfig | 2.3.0 | MIT | https://github.com/pytest-dev/iniconfig |
|
||||
| ipykernel | 6.31.0 | BSD-3-Clause | https://ipython.org |
|
||||
| ipython | 9.10.0 | BSD-3-Clause | https://ipython.org |
|
||||
| ipython_pygments_lexers | 1.1.1 | BSD License | https://github.com/ipython/ipython-pygments-lexers |
|
||||
| isodate | 0.7.2 | BSD License | https://github.com/gweis/isodate/ |
|
||||
| jedi | 0.19.2 | MIT License | https://github.com/davidhalter/jedi |
|
||||
| jiter | 0.13.0 | MIT License | https://github.com/pydantic/jiter/ |
|
||||
| jmespath | 1.0.1 | MIT License | https://github.com/jmespath/jmespath.py |
|
||||
| joblib | 1.5.3 | BSD-3-Clause | https://joblib.readthedocs.io |
|
||||
| jsonschema | 4.26.0 | MIT | https://github.com/python-jsonschema/jsonschema |
|
||||
| jsonschema-specifications | 2025.9.1 | MIT | https://github.com/python-jsonschema/jsonschema-specifications |
|
||||
| jupyter_client | 8.8.0 | BSD License | https://jupyter.org |
|
||||
| jupyter_core | 5.9.1 | BSD-3-Clause | https://jupyter.org |
|
||||
| jupyterlab_pygments | 0.3.0 | BSD License | https://github.com/jupyterlab/jupyterlab_pygments |
|
||||
| jupytext | 1.19.1 | MIT License | https://github.com/mwouts/jupytext |
|
||||
| lance-namespace | 0.4.5 | Apache-2.0 | https://github.com/lance-format/lance-namespace |
|
||||
| lance-namespace-urllib3-client | 0.4.5 | Apache-2.0 | https://github.com/lance-format/lance-namespace |
|
||||
| lancedb | 0.29.2 | Apache Software License | https://github.com/lancedb/lancedb |
|
||||
| lomond | 0.3.3 | BSD License | https://github.com/wildfoundry/dataplicity-lomond |
|
||||
| markdown-it-py | 4.0.0 | MIT License | https://github.com/executablebooks/markdown-it-py |
|
||||
| matplotlib-inline | 0.2.1 | UNKNOWN | https://github.com/ipython/matplotlib-inline |
|
||||
| mdit-py-plugins | 0.5.0 | MIT License | https://github.com/executablebooks/mdit-py-plugins |
|
||||
| mdurl | 0.1.2 | MIT License | https://github.com/executablebooks/mdurl |
|
||||
| mergedeep | 1.3.4 | MIT License | https://github.com/clarketm/mergedeep |
|
||||
| mistune | 3.2.0 | BSD License | https://github.com/lepture/mistune |
|
||||
| mkdocs | 1.6.1 | BSD-2-Clause | https://github.com/mkdocs/mkdocs |
|
||||
| mkdocs-autorefs | 1.4.3 | ISC | https://mkdocstrings.github.io/autorefs |
|
||||
| mkdocs-get-deps | 0.2.0 | MIT | https://github.com/mkdocs/get-deps |
|
||||
| mkdocs-jupyter | 0.25.1 | Apache-2.0 | https://github.com/danielfrg/mkdocs-jupyter |
|
||||
| mkdocs-material | 9.7.1 | MIT | https://github.com/squidfunk/mkdocs-material |
|
||||
| mkdocs-material-extensions | 1.3.1 | MIT | https://github.com/facelessuser/mkdocs-material-extensions |
|
||||
| mkdocstrings | 1.0.3 | ISC | https://mkdocstrings.github.io |
|
||||
| mkdocstrings-python | 2.0.2 | ISC | https://mkdocstrings.github.io/python |
|
||||
| mpmath | 1.3.0 | BSD License | http://mpmath.org/ |
|
||||
| msal | 1.34.0 | MIT License | https://github.com/AzureAD/microsoft-authentication-library-for-python |
|
||||
| msal-extensions | 1.3.1 | MIT License | https://github.com/AzureAD/microsoft-authentication-extensions-for-python/releases |
|
||||
| multidict | 6.7.1 | Apache License 2.0 | https://github.com/aio-libs/multidict |
|
||||
| nbclient | 0.10.4 | BSD License | https://jupyter.org |
|
||||
| nbconvert | 7.17.0 | BSD License | https://jupyter.org |
|
||||
| nbformat | 5.10.4 | BSD License | https://jupyter.org |
|
||||
| nest-asyncio | 1.6.0 | BSD License | https://github.com/erdewit/nest_asyncio |
|
||||
| networkx | 3.6.1 | BSD-3-Clause | https://networkx.org/ |
|
||||
| nodeenv | 1.10.0 | BSD License | https://github.com/ekalinin/nodeenv |
|
||||
| numpy | 2.4.2 | BSD-3-Clause AND 0BSD AND MIT AND Zlib AND CC0-1.0 | https://numpy.org |
|
||||
| ollama | 0.6.1 | MIT | https://ollama.com |
|
||||
| open_clip_torch | 3.2.0 | MIT License | https://github.com/mlfoundations/open_clip |
|
||||
| openai | 2.18.0 | Apache Software License | https://github.com/openai/openai-python |
|
||||
| packaging | 26.0 | Apache-2.0 OR BSD-2-Clause | https://github.com/pypa/packaging |
|
||||
| paginate | 0.5.7 | MIT License | https://github.com/Signum/paginate |
|
||||
| pandas | 2.3.3 | BSD License | https://pandas.pydata.org |
|
||||
| pandocfilters | 1.5.1 | BSD License | http://github.com/jgm/pandocfilters |
|
||||
| parso | 0.8.6 | MIT License | https://github.com/davidhalter/parso |
|
||||
| pathspec | 1.0.4 | Mozilla Public License 2.0 (MPL 2.0) | UNKNOWN |
|
||||
| peft | 0.17.1 | Apache Software License | https://github.com/huggingface/peft |
|
||||
| pexpect | 4.9.0 | ISC License (ISCL) | https://pexpect.readthedocs.io/ |
|
||||
| pillow | 12.1.0 | MIT-CMU | https://python-pillow.github.io |
|
||||
| platformdirs | 4.5.1 | MIT | https://github.com/tox-dev/platformdirs |
|
||||
| pluggy | 1.6.0 | MIT License | UNKNOWN |
|
||||
| polars | 1.3.0 | MIT License | https://www.pola.rs/ |
|
||||
| pre_commit | 4.5.1 | MIT | https://github.com/pre-commit/pre-commit |
|
||||
| prompt_toolkit | 3.0.52 | BSD License | https://github.com/prompt-toolkit/python-prompt-toolkit |
|
||||
| propcache | 0.4.1 | Apache Software License | https://github.com/aio-libs/propcache |
|
||||
| proto-plus | 1.27.1 | Apache Software License | https://github.com/googleapis/proto-plus-python |
|
||||
| protobuf | 5.29.6 | 3-Clause BSD License | https://developers.google.com/protocol-buffers/ |
|
||||
| psutil | 7.2.2 | BSD-3-Clause | https://github.com/giampaolo/psutil |
|
||||
| ptyprocess | 0.7.0 | ISC License (ISCL) | https://github.com/pexpect/ptyprocess |
|
||||
| pure_eval | 0.2.3 | MIT License | http://github.com/alexmojaki/pure_eval |
|
||||
| pyarrow | 23.0.0 | Apache-2.0 | https://arrow.apache.org/ |
|
||||
| pyarrow-stubs | 20.0.0.20251215 | BSD-2-Clause | https://github.com/zen-xu/pyarrow-stubs |
|
||||
| pyasn1 | 0.6.2 | BSD-2-Clause | https://github.com/pyasn1/pyasn1 |
|
||||
| pyasn1_modules | 0.4.2 | BSD License | https://github.com/pyasn1/pyasn1-modules |
|
||||
| pycparser | 3.0 | BSD-3-Clause | https://github.com/eliben/pycparser |
|
||||
| pydantic | 2.12.5 | MIT | https://github.com/pydantic/pydantic |
|
||||
| pydantic_core | 2.41.5 | MIT | https://github.com/pydantic/pydantic-core |
|
||||
| pylance | 2.0.0 | Apache Software License | UNKNOWN |
|
||||
| pymdown-extensions | 10.20.1 | MIT | https://github.com/facelessuser/pymdown-extensions |
|
||||
| pyparsing | 3.3.2 | MIT | https://github.com/pyparsing/pyparsing/ |
|
||||
| pyright | 1.1.408 | MIT | https://github.com/RobertCraigie/pyright-python |
|
||||
| pytest | 9.0.2 | MIT | https://docs.pytest.org/en/latest/ |
|
||||
| pytest-asyncio | 1.3.0 | Apache-2.0 | https://github.com/pytest-dev/pytest-asyncio |
|
||||
| pytest-mock | 3.15.1 | MIT License | https://github.com/pytest-dev/pytest-mock/ |
|
||||
| python-dateutil | 2.9.0.post0 | Apache Software License; BSD License | https://github.com/dateutil/dateutil |
|
||||
| pytz | 2025.2 | MIT License | http://pythonhosted.org/pytz |
|
||||
| pyyaml_env_tag | 1.1 | MIT | https://github.com/waylan/pyyaml-env-tag |
|
||||
| pyzmq | 27.1.0 | BSD License | https://pyzmq.readthedocs.org |
|
||||
| referencing | 0.37.0 | MIT | https://github.com/python-jsonschema/referencing |
|
||||
| regex | 2026.1.15 | Apache-2.0 AND CNRI-Python | https://github.com/mrabarnett/mrab-regex |
|
||||
| requests | 2.32.5 | Apache Software License | https://requests.readthedocs.io |
|
||||
| rpds-py | 0.30.0 | MIT | https://github.com/crate-py/rpds |
|
||||
| rsa | 4.7.2 | Apache Software License | https://stuvel.eu/rsa |
|
||||
| ruff | 0.15.0 | MIT License | https://docs.astral.sh/ruff |
|
||||
| s3transfer | 0.16.0 | Apache Software License | https://github.com/boto/s3transfer |
|
||||
| safetensors | 0.7.0 | Apache Software License | https://github.com/huggingface/safetensors |
|
||||
| scikit-learn | 1.8.0 | BSD-3-Clause | https://scikit-learn.org |
|
||||
| scipy | 1.17.0 | BSD License | https://scipy.org/ |
|
||||
| sentence-transformers | 5.2.2 | Apache Software License | https://www.SBERT.net |
|
||||
| sentencepiece | 0.2.1 | UNKNOWN | https://github.com/google/sentencepiece |
|
||||
| six | 1.17.0 | MIT License | https://github.com/benjaminp/six |
|
||||
| sniffio | 1.3.1 | Apache Software License; MIT License | https://github.com/python-trio/sniffio |
|
||||
| soupsieve | 2.8.3 | MIT | https://github.com/facelessuser/soupsieve |
|
||||
| stack-data | 0.6.3 | MIT License | http://github.com/alexmojaki/stack_data |
|
||||
| sympy | 1.14.0 | BSD License | https://sympy.org |
|
||||
| tabulate | 0.9.0 | MIT License | https://github.com/astanin/python-tabulate |
|
||||
| tantivy | 0.25.1 | UNKNOWN | UNKNOWN |
|
||||
| threadpoolctl | 3.6.0 | BSD License | https://github.com/joblib/threadpoolctl |
|
||||
| timm | 1.0.24 | Apache Software License | https://github.com/huggingface/pytorch-image-models |
|
||||
| tinycss2 | 1.4.0 | BSD License | https://www.courtbouillon.org/tinycss2 |
|
||||
| tokenizers | 0.22.2 | Apache Software License | https://github.com/huggingface/tokenizers |
|
||||
| torch | 2.8.0 | BSD License | https://pytorch.org/ |
|
||||
| torchvision | 0.23.0 | BSD | https://github.com/pytorch/vision |
|
||||
| tornado | 6.5.4 | Apache Software License | http://www.tornadoweb.org/ |
|
||||
| tqdm | 4.67.3 | MPL-2.0 AND MIT | https://tqdm.github.io |
|
||||
| traitlets | 5.14.3 | BSD License | https://github.com/ipython/traitlets |
|
||||
| transformers | 4.57.6 | Apache Software License | https://github.com/huggingface/transformers |
|
||||
| types-requests | 2.32.4.20260107 | Apache-2.0 | https://github.com/python/typeshed |
|
||||
| typing-inspection | 0.4.2 | MIT | https://github.com/pydantic/typing-inspection |
|
||||
| typing_extensions | 4.15.0 | PSF-2.0 | https://github.com/python/typing_extensions |
|
||||
| tzdata | 2025.3 | Apache-2.0 | https://github.com/python/tzdata |
|
||||
| uritemplate | 4.2.0 | BSD 3-Clause OR Apache-2.0 | https://uritemplate.readthedocs.org |
|
||||
| urllib3 | 2.6.3 | MIT | https://github.com/urllib3/urllib3/blob/main/CHANGES.rst |
|
||||
| virtualenv | 20.36.1 | MIT | https://github.com/pypa/virtualenv |
|
||||
| watchdog | 6.0.0 | Apache Software License | https://github.com/gorakhargosh/watchdog |
|
||||
| webencodings | 0.5.1 | BSD License | https://github.com/SimonSapin/python-webencodings |
|
||||
| yarl | 1.22.0 | Apache Software License | https://github.com/aio-libs/yarl |
|
||||
14687
python/RUST_THIRD_PARTY_LICENSES.html
Normal file
14687
python/RUST_THIRD_PARTY_LICENSES.html
Normal file
File diff suppressed because it is too large
Load Diff
@@ -180,6 +180,8 @@ class Table:
|
||||
delete_unverified: Optional[bool] = None,
|
||||
) -> OptimizeStats: ...
|
||||
async def uri(self) -> str: ...
|
||||
async def initial_storage_options(self) -> Optional[Dict[str, str]]: ...
|
||||
async def latest_storage_options(self) -> Optional[Dict[str, str]]: ...
|
||||
@property
|
||||
def tags(self) -> Tags: ...
|
||||
def query(self) -> Query: ...
|
||||
|
||||
@@ -9,7 +9,7 @@ import json
|
||||
from ._lancedb import async_permutation_builder, PermutationReader
|
||||
from .table import LanceTable
|
||||
from .background_loop import LOOP
|
||||
from .util import batch_to_tensor
|
||||
from .util import batch_to_tensor, batch_to_tensor_rows
|
||||
from typing import Any, Callable, Iterator, Literal, Optional, TYPE_CHECKING, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -333,7 +333,11 @@ class Transforms:
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def arrow2python(batch: pa.RecordBatch) -> dict[str, list[Any]]:
|
||||
def arrow2python(batch: pa.RecordBatch) -> list[dict[str, Any]]:
|
||||
return batch.to_pylist()
|
||||
|
||||
@staticmethod
|
||||
def arrow2pythoncol(batch: pa.RecordBatch) -> dict[str, list[Any]]:
|
||||
return batch.to_pydict()
|
||||
|
||||
@staticmethod
|
||||
@@ -687,7 +691,17 @@ class Permutation:
|
||||
return
|
||||
|
||||
def with_format(
|
||||
self, format: Literal["numpy", "python", "pandas", "arrow", "torch", "polars"]
|
||||
self,
|
||||
format: Literal[
|
||||
"numpy",
|
||||
"python",
|
||||
"python_col",
|
||||
"pandas",
|
||||
"arrow",
|
||||
"torch",
|
||||
"torch_col",
|
||||
"polars",
|
||||
],
|
||||
) -> "Permutation":
|
||||
"""
|
||||
Set the format for batches
|
||||
@@ -696,16 +710,18 @@ class Permutation:
|
||||
|
||||
The format can be one of:
|
||||
- "numpy" - the batch will be a dict of numpy arrays (one per column)
|
||||
- "python" - the batch will be a dict of lists (one per column)
|
||||
- "python" - the batch will be a list of dicts (one per row)
|
||||
- "python_col" - the batch will be a dict of lists (one entry per column)
|
||||
- "pandas" - the batch will be a pandas DataFrame
|
||||
- "arrow" - the batch will be a pyarrow RecordBatch
|
||||
- "torch" - the batch will be a two dimensional torch tensor
|
||||
- "torch" - the batch will be a list of tensors, one per row
|
||||
- "torch_col" - the batch will be a 2D torch tensor (first dim indexes columns)
|
||||
- "polars" - the batch will be a polars DataFrame
|
||||
|
||||
Conversion may or may not involve a data copy. Lance uses Arrow internally
|
||||
and so it is able to zero-copy to the arrow and polars.
|
||||
and so it is able to zero-copy to the arrow and polars formats.
|
||||
|
||||
Conversion to torch will be zero-copy but will only support a subset of data
|
||||
Conversion to torch_col will be zero-copy but will only support a subset of data
|
||||
types (numeric types).
|
||||
|
||||
Conversion to numpy and/or pandas will typically be zero-copy for numeric
|
||||
@@ -718,6 +734,8 @@ class Permutation:
|
||||
assert format is not None, "format is required"
|
||||
if format == "python":
|
||||
return self.with_transform(Transforms.arrow2python)
|
||||
if format == "python_col":
|
||||
return self.with_transform(Transforms.arrow2pythoncol)
|
||||
elif format == "numpy":
|
||||
return self.with_transform(Transforms.arrow2numpy)
|
||||
elif format == "pandas":
|
||||
@@ -725,6 +743,8 @@ class Permutation:
|
||||
elif format == "arrow":
|
||||
return self.with_transform(Transforms.arrow2arrow)
|
||||
elif format == "torch":
|
||||
return self.with_transform(batch_to_tensor_rows)
|
||||
elif format == "torch_col":
|
||||
return self.with_transform(batch_to_tensor)
|
||||
elif format == "polars":
|
||||
return self.with_transform(Transforms.arrow2polars())
|
||||
@@ -746,15 +766,20 @@ class Permutation:
|
||||
|
||||
def __getitem__(self, index: int) -> Any:
|
||||
"""
|
||||
Return a single row from the permutation
|
||||
|
||||
The output will always be a python dictionary regardless of the format.
|
||||
|
||||
This method is mostly useful for debugging and exploration. For actual
|
||||
processing use [iter](#iter) or a torch data loader to perform batched
|
||||
processing.
|
||||
Returns a single row from the permutation by offset
|
||||
"""
|
||||
pass
|
||||
return self.__getitems__([index])
|
||||
|
||||
def __getitems__(self, indices: list[int]) -> Any:
|
||||
"""
|
||||
Returns rows from the permutation by offset
|
||||
"""
|
||||
|
||||
async def do_getitems():
|
||||
return await self.reader.take_offsets(indices, selection=self.selection)
|
||||
|
||||
batch = LOOP.run(do_getitems())
|
||||
return self.transform_fn(batch)
|
||||
|
||||
@deprecated(details="Use with_skip instead")
|
||||
def skip(self, skip: int) -> "Permutation":
|
||||
|
||||
@@ -961,22 +961,27 @@ class LanceQueryBuilder(ABC):
|
||||
>>> query = [100, 100]
|
||||
>>> plan = table.search(query).analyze_plan()
|
||||
>>> print(plan) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
AnalyzeExec verbose=true, metrics=[], cumulative_cpu=...
|
||||
TracedExec, metrics=[], cumulative_cpu=...
|
||||
ProjectionExec: expr=[...], metrics=[...], cumulative_cpu=...
|
||||
GlobalLimitExec: skip=0, fetch=10, metrics=[...], cumulative_cpu=...
|
||||
FilterExec: _distance@2 IS NOT NULL,
|
||||
metrics=[output_rows=..., elapsed_compute=...], cumulative_cpu=...
|
||||
SortExec: TopK(fetch=10), expr=[...],
|
||||
AnalyzeExec verbose=true, elapsed=..., metrics=...
|
||||
TracedExec, elapsed=..., metrics=...
|
||||
ProjectionExec: elapsed=..., expr=[...],
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
|
||||
GlobalLimitExec: elapsed=..., skip=0, fetch=10,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...]
|
||||
FilterExec: elapsed=..., _distance@2 IS NOT NULL, metrics=[...]
|
||||
SortExec: elapsed=..., TopK(fetch=10), expr=[...],
|
||||
preserve_partitioning=[...],
|
||||
metrics=[output_rows=..., elapsed_compute=..., row_replacements=...],
|
||||
cumulative_cpu=...
|
||||
KNNVectorDistance: metric=l2,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_batches=...],
|
||||
cumulative_cpu=...
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
bytes_read=..., iops=..., requests=...], cumulative_cpu=...
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
output_bytes=..., row_replacements=...]
|
||||
KNNVectorDistance: elapsed=..., metric=l2,
|
||||
metrics=[output_rows=..., elapsed_compute=...,
|
||||
output_bytes=..., output_batches=...]
|
||||
LanceRead: elapsed=..., uri=..., projection=[vector],
|
||||
num_fragments=..., range_before=None, range_after=None,
|
||||
row_id=true, row_addr=false,
|
||||
full_filter=--, refine_filter=--,
|
||||
metrics=[output_rows=..., elapsed_compute=..., output_bytes=...,
|
||||
fragments_scanned=..., ranges_scanned=1, rows_scanned=1,
|
||||
bytes_read=..., iops=..., requests=..., task_wait_time=...]
|
||||
|
||||
Returns
|
||||
-------
|
||||
@@ -1428,6 +1433,19 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
|
||||
self._bypass_vector_index = True
|
||||
return self
|
||||
|
||||
def fast_search(self) -> LanceVectorQueryBuilder:
|
||||
"""
|
||||
Skip a flat search of unindexed data. This will improve
|
||||
search performance but search results will not include unindexed data.
|
||||
|
||||
Returns
|
||||
-------
|
||||
LanceVectorQueryBuilder
|
||||
The LanceVectorQueryBuilder object.
|
||||
"""
|
||||
self._fast_search = True
|
||||
return self
|
||||
|
||||
|
||||
class LanceFtsQueryBuilder(LanceQueryBuilder):
|
||||
"""A builder for full text search for LanceDB."""
|
||||
@@ -2100,19 +2118,17 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
|
||||
""" # noqa: E501
|
||||
self._create_query_builders()
|
||||
|
||||
results = ["Vector Search Plan:"]
|
||||
results.append(
|
||||
self._table._explain_plan(
|
||||
self._vector_query.to_query_object(), verbose=verbose
|
||||
)
|
||||
reranker_label = str(self._reranker) if self._reranker else "No reranker"
|
||||
vector_plan = self._table._explain_plan(
|
||||
self._vector_query.to_query_object(), verbose=verbose
|
||||
)
|
||||
results.append("FTS Search Plan:")
|
||||
results.append(
|
||||
self._table._explain_plan(
|
||||
self._fts_query.to_query_object(), verbose=verbose
|
||||
)
|
||||
fts_plan = self._table._explain_plan(
|
||||
self._fts_query.to_query_object(), verbose=verbose
|
||||
)
|
||||
return "\n".join(results)
|
||||
# Indent sub-plans under the reranker
|
||||
indented_vector = "\n".join(" " + line for line in vector_plan.splitlines())
|
||||
indented_fts = "\n".join(" " + line for line in fts_plan.splitlines())
|
||||
return f"{reranker_label}\n {indented_vector}\n {indented_fts}"
|
||||
|
||||
def analyze_plan(self):
|
||||
"""Execute the query and display with runtime metrics.
|
||||
@@ -3146,23 +3162,20 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
... plan = await table.query().nearest_to([1.0, 2.0]).nearest_to_text("hello").explain_plan(True)
|
||||
... print(plan)
|
||||
>>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
|
||||
Vector Search Plan:
|
||||
ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
|
||||
Take: columns="vector, _rowid, _distance, (text)"
|
||||
CoalesceBatchesExec: target_batch_size=1024
|
||||
GlobalLimitExec: skip=0, fetch=10
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
<BLANKLINE>
|
||||
FTS Search Plan:
|
||||
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
|
||||
Take: columns="_rowid, _score, (vector), (text)"
|
||||
CoalesceBatchesExec: target_batch_size=1024
|
||||
GlobalLimitExec: skip=0, fetch=10
|
||||
MatchQuery: column=text, query=hello
|
||||
<BLANKLINE>
|
||||
RRFReranker(K=60)
|
||||
ProjectionExec: expr=[vector@0 as vector, text@3 as text, _distance@2 as _distance]
|
||||
Take: columns="vector, _rowid, _distance, (text)"
|
||||
CoalesceBatchesExec: target_batch_size=1024
|
||||
GlobalLimitExec: skip=0, fetch=10
|
||||
FilterExec: _distance@2 IS NOT NULL
|
||||
SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST, _rowid@1 ASC NULLS LAST], preserve_partitioning=[false]
|
||||
KNNVectorDistance: metric=l2
|
||||
LanceRead: uri=..., projection=[vector], ...
|
||||
ProjectionExec: expr=[vector@2 as vector, text@3 as text, _score@1 as _score]
|
||||
Take: columns="_rowid, _score, (vector), (text)"
|
||||
CoalesceBatchesExec: target_batch_size=1024
|
||||
GlobalLimitExec: skip=0, fetch=10
|
||||
MatchQuery: column=text, query=hello
|
||||
|
||||
Parameters
|
||||
----------
|
||||
@@ -3174,12 +3187,12 @@ class AsyncHybridQuery(AsyncStandardQuery, AsyncVectorQueryBase):
|
||||
plan : str
|
||||
""" # noqa: E501
|
||||
|
||||
results = ["Vector Search Plan:"]
|
||||
results.append(await self._inner.to_vector_query().explain_plan(verbose))
|
||||
results.append("FTS Search Plan:")
|
||||
results.append(await self._inner.to_fts_query().explain_plan(verbose))
|
||||
|
||||
return "\n".join(results)
|
||||
vector_plan = await self._inner.to_vector_query().explain_plan(verbose)
|
||||
fts_plan = await self._inner.to_fts_query().explain_plan(verbose)
|
||||
# Indent sub-plans under the reranker
|
||||
indented_vector = "\n".join(" " + line for line in vector_plan.splitlines())
|
||||
indented_fts = "\n".join(" " + line for line in fts_plan.splitlines())
|
||||
return f"{self._reranker}\n {indented_vector}\n {indented_fts}"
|
||||
|
||||
async def analyze_plan(self):
|
||||
"""
|
||||
|
||||
@@ -42,10 +42,18 @@ class AnswerdotaiRerankers(Reranker):
|
||||
rerankers = attempt_import_or_raise(
|
||||
"rerankers"
|
||||
) # import here for faster ops later
|
||||
self.model_name = model_name
|
||||
self.model_type = model_type
|
||||
self.reranker = rerankers.Reranker(
|
||||
model_name=model_name, model_type=model_type, **kwargs
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f"AnswerdotaiRerankers(model_type={self.model_type}, "
|
||||
f"model_name={self.model_name})"
|
||||
)
|
||||
|
||||
def _rerank(self, result_set: pa.Table, query: str):
|
||||
result_set = self._handle_empty_results(result_set)
|
||||
if len(result_set) == 0:
|
||||
|
||||
@@ -40,6 +40,9 @@ class Reranker(ABC):
|
||||
if ARROW_VERSION.major <= 13:
|
||||
self._concat_tables_args = {"promote": True}
|
||||
|
||||
def __str__(self):
|
||||
return self.__class__.__name__
|
||||
|
||||
def rerank_vector(
|
||||
self,
|
||||
query: str,
|
||||
|
||||
@@ -44,6 +44,9 @@ class CohereReranker(Reranker):
|
||||
self.top_n = top_n
|
||||
self.api_key = api_key
|
||||
|
||||
def __str__(self):
|
||||
return f"CohereReranker(model_name={self.model_name})"
|
||||
|
||||
@cached_property
|
||||
def _client(self):
|
||||
cohere = attempt_import_or_raise("cohere")
|
||||
|
||||
@@ -50,6 +50,9 @@ class CrossEncoderReranker(Reranker):
|
||||
if self.device is None:
|
||||
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
def __str__(self):
|
||||
return f"CrossEncoderReranker(model_name={self.model_name})"
|
||||
|
||||
@cached_property
|
||||
def model(self):
|
||||
sbert = attempt_import_or_raise("sentence_transformers")
|
||||
|
||||
@@ -45,6 +45,9 @@ class JinaReranker(Reranker):
|
||||
self.top_n = top_n
|
||||
self.api_key = api_key
|
||||
|
||||
def __str__(self):
|
||||
return f"JinaReranker(model_name={self.model_name})"
|
||||
|
||||
@cached_property
|
||||
def _client(self):
|
||||
import requests
|
||||
|
||||
@@ -38,6 +38,9 @@ class LinearCombinationReranker(Reranker):
|
||||
self.weight = weight
|
||||
self.fill = fill
|
||||
|
||||
def __str__(self):
|
||||
return f"LinearCombinationReranker(weight={self.weight}, fill={self.fill})"
|
||||
|
||||
def rerank_hybrid(
|
||||
self,
|
||||
query: str, # noqa: F821
|
||||
|
||||
@@ -54,6 +54,12 @@ class MRRReranker(Reranker):
|
||||
self.weight_vector = weight_vector
|
||||
self.weight_fts = weight_fts
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f"MRRReranker(weight_vector={self.weight_vector}, "
|
||||
f"weight_fts={self.weight_fts})"
|
||||
)
|
||||
|
||||
def rerank_hybrid(
|
||||
self,
|
||||
query: str, # noqa: F821
|
||||
|
||||
@@ -43,6 +43,9 @@ class OpenaiReranker(Reranker):
|
||||
self.column = column
|
||||
self.api_key = api_key
|
||||
|
||||
def __str__(self):
|
||||
return f"OpenaiReranker(model_name={self.model_name})"
|
||||
|
||||
def _rerank(self, result_set: pa.Table, query: str):
|
||||
result_set = self._handle_empty_results(result_set)
|
||||
if len(result_set) == 0:
|
||||
|
||||
@@ -36,6 +36,9 @@ class RRFReranker(Reranker):
|
||||
super().__init__(return_score)
|
||||
self.K = K
|
||||
|
||||
def __str__(self):
|
||||
return f"RRFReranker(K={self.K})"
|
||||
|
||||
def rerank_hybrid(
|
||||
self,
|
||||
query: str, # noqa: F821
|
||||
|
||||
@@ -52,6 +52,9 @@ class VoyageAIReranker(Reranker):
|
||||
self.api_key = api_key
|
||||
self.truncation = truncation
|
||||
|
||||
def __str__(self):
|
||||
return f"VoyageAIReranker(model_name={self.model_name})"
|
||||
|
||||
@cached_property
|
||||
def _client(self):
|
||||
voyageai = attempt_import_or_raise("voyageai")
|
||||
|
||||
@@ -904,7 +904,9 @@ class Table(ABC):
|
||||
----------
|
||||
field_names: str or list of str
|
||||
The name(s) of the field to index.
|
||||
can be only str if use_tantivy=True for now.
|
||||
If ``use_tantivy`` is False (default), only a single field name
|
||||
(str) is supported. To index multiple fields, create a separate
|
||||
FTS index for each field.
|
||||
replace: bool, default False
|
||||
If True, replace the existing index if it exists. Note that this is
|
||||
not yet an atomic operation; the index will be temporarily
|
||||
@@ -2222,6 +2224,37 @@ class LanceTable(Table):
|
||||
def uri(self) -> str:
|
||||
return LOOP.run(self._table.uri())
|
||||
|
||||
def initial_storage_options(self) -> Optional[Dict[str, str]]:
|
||||
"""Get the initial storage options that were passed in when opening this table.
|
||||
|
||||
For dynamically refreshed options (e.g., credential vending), use
|
||||
:meth:`latest_storage_options`.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Optional[Dict[str, str]]
|
||||
The storage options, or None if no storage options were configured.
|
||||
"""
|
||||
return LOOP.run(self._table.initial_storage_options())
|
||||
|
||||
def latest_storage_options(self) -> Optional[Dict[str, str]]:
|
||||
"""Get the latest storage options, refreshing from provider if configured.
|
||||
|
||||
This method is useful for credential vending scenarios where storage options
|
||||
may be refreshed dynamically. If no dynamic provider is configured, this
|
||||
returns the initial static options.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Optional[Dict[str, str]]
|
||||
The storage options, or None if no storage options were configured.
|
||||
"""
|
||||
return LOOP.run(self._table.latest_storage_options())
|
||||
|
||||
def create_scalar_index(
|
||||
self,
|
||||
column: str,
|
||||
@@ -2267,7 +2300,11 @@ class LanceTable(Table):
|
||||
):
|
||||
if not use_tantivy:
|
||||
if not isinstance(field_names, str):
|
||||
raise ValueError("field_names must be a string when use_tantivy=False")
|
||||
raise ValueError(
|
||||
"Native FTS indexes can only be created on a single field "
|
||||
"at a time. To search over multiple text fields, create a "
|
||||
"separate FTS index for each field."
|
||||
)
|
||||
|
||||
if tokenizer_name is None:
|
||||
tokenizer_configs = {
|
||||
@@ -3624,6 +3661,37 @@ class AsyncTable:
|
||||
"""
|
||||
return await self._inner.uri()
|
||||
|
||||
async def initial_storage_options(self) -> Optional[Dict[str, str]]:
|
||||
"""Get the initial storage options that were passed in when opening this table.
|
||||
|
||||
For dynamically refreshed options (e.g., credential vending), use
|
||||
:meth:`latest_storage_options`.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Optional[Dict[str, str]]
|
||||
The storage options, or None if no storage options were configured.
|
||||
"""
|
||||
return await self._inner.initial_storage_options()
|
||||
|
||||
async def latest_storage_options(self) -> Optional[Dict[str, str]]:
|
||||
"""Get the latest storage options, refreshing from provider if configured.
|
||||
|
||||
This method is useful for credential vending scenarios where storage options
|
||||
may be refreshed dynamically. If no dynamic provider is configured, this
|
||||
returns the initial static options.
|
||||
|
||||
Warning: This is an internal API and the return value is subject to change.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Optional[Dict[str, str]]
|
||||
The storage options, or None if no storage options were configured.
|
||||
"""
|
||||
return await self._inner.latest_storage_options()
|
||||
|
||||
async def add(
|
||||
self,
|
||||
data: DATA,
|
||||
|
||||
@@ -419,3 +419,22 @@ def batch_to_tensor(batch: pa.RecordBatch):
|
||||
"""
|
||||
torch = attempt_import_or_raise("torch", "torch")
|
||||
return torch.stack([torch.from_dlpack(col) for col in batch.columns])
|
||||
|
||||
|
||||
def batch_to_tensor_rows(batch: pa.RecordBatch):
|
||||
"""
|
||||
Convert a PyArrow RecordBatch to a list of PyTorch Tensor, one per row
|
||||
|
||||
Each column is converted to a tensor (using zero-copy via DLPack)
|
||||
and the columns are then stacked into a single tensor. The 2D tensor
|
||||
is then converted to a list of tensors, one per row
|
||||
|
||||
Fails if torch or numpy is not installed.
|
||||
Fails if a column's data type is not supported by PyTorch.
|
||||
"""
|
||||
torch = attempt_import_or_raise("torch", "torch")
|
||||
numpy = attempt_import_or_raise("numpy", "numpy")
|
||||
columns = [col.to_numpy(zero_copy_only=False) for col in batch.columns]
|
||||
stacked = torch.tensor(numpy.column_stack(columns))
|
||||
rows = list(stacked.unbind(dim=0))
|
||||
return rows
|
||||
|
||||
@@ -163,9 +163,7 @@ async def test_explain_plan(table: AsyncTable):
|
||||
table.query().nearest_to_text("dog").nearest_to([0.1, 0.1]).explain_plan(True)
|
||||
)
|
||||
|
||||
assert "Vector Search Plan" in plan
|
||||
assert "KNNVectorDistance" in plan
|
||||
assert "FTS Search Plan" in plan
|
||||
assert "LanceRead" in plan
|
||||
|
||||
|
||||
|
||||
@@ -664,23 +664,20 @@ def test_iter_basic(some_permutation: Permutation):
|
||||
expected_batches = (950 + batch_size - 1) // batch_size # ceiling division
|
||||
assert len(batches) == expected_batches
|
||||
|
||||
# Check that all batches are dicts (default python format)
|
||||
assert all(isinstance(batch, dict) for batch in batches)
|
||||
# Check that all batches are lists of dicts (default python format)
|
||||
assert all(isinstance(batch, list) for batch in batches)
|
||||
|
||||
# Check that batches have the correct structure
|
||||
for batch in batches:
|
||||
assert "id" in batch
|
||||
assert "value" in batch
|
||||
assert isinstance(batch["id"], list)
|
||||
assert isinstance(batch["value"], list)
|
||||
assert "id" in batch[0]
|
||||
assert "value" in batch[0]
|
||||
|
||||
# Check that all batches except the last have the correct size
|
||||
for batch in batches[:-1]:
|
||||
assert len(batch["id"]) == batch_size
|
||||
assert len(batch["value"]) == batch_size
|
||||
assert len(batch) == batch_size
|
||||
|
||||
# Last batch might be smaller
|
||||
assert len(batches[-1]["id"]) <= batch_size
|
||||
assert len(batches[-1]) <= batch_size
|
||||
|
||||
|
||||
def test_iter_skip_last_batch(some_permutation: Permutation):
|
||||
@@ -699,11 +696,11 @@ def test_iter_skip_last_batch(some_permutation: Permutation):
|
||||
if 950 % batch_size != 0:
|
||||
assert len(batches_without_skip) == num_full_batches + 1
|
||||
# Last batch should be smaller
|
||||
assert len(batches_without_skip[-1]["id"]) == 950 % batch_size
|
||||
assert len(batches_without_skip[-1]) == 950 % batch_size
|
||||
|
||||
# All batches with skip_last_batch should be full size
|
||||
for batch in batches_with_skip:
|
||||
assert len(batch["id"]) == batch_size
|
||||
assert len(batch) == batch_size
|
||||
|
||||
|
||||
def test_iter_different_batch_sizes(some_permutation: Permutation):
|
||||
@@ -720,12 +717,12 @@ def test_iter_different_batch_sizes(some_permutation: Permutation):
|
||||
# Test with batch size equal to total rows
|
||||
single_batch = list(some_permutation.iter(950, skip_last_batch=False))
|
||||
assert len(single_batch) == 1
|
||||
assert len(single_batch[0]["id"]) == 950
|
||||
assert len(single_batch[0]) == 950
|
||||
|
||||
# Test with batch size larger than total rows
|
||||
oversized_batch = list(some_permutation.iter(10000, skip_last_batch=False))
|
||||
assert len(oversized_batch) == 1
|
||||
assert len(oversized_batch[0]["id"]) == 950
|
||||
assert len(oversized_batch[0]) == 950
|
||||
|
||||
|
||||
def test_dunder_iter(some_permutation: Permutation):
|
||||
@@ -738,15 +735,13 @@ def test_dunder_iter(some_permutation: Permutation):
|
||||
|
||||
# All batches should be full size
|
||||
for batch in batches:
|
||||
assert len(batch["id"]) == 100
|
||||
assert len(batch["value"]) == 100
|
||||
assert len(batch) == 100
|
||||
|
||||
some_permutation = some_permutation.with_batch_size(400)
|
||||
batches = list(some_permutation)
|
||||
assert len(batches) == 2 # floor(950 / 400) since skip_last_batch=True
|
||||
for batch in batches:
|
||||
assert len(batch["id"]) == 400
|
||||
assert len(batch["value"]) == 400
|
||||
assert len(batch) == 400
|
||||
|
||||
|
||||
def test_iter_with_different_formats(some_permutation: Permutation):
|
||||
@@ -761,7 +756,7 @@ def test_iter_with_different_formats(some_permutation: Permutation):
|
||||
# Test with python format (default)
|
||||
python_perm = some_permutation.with_format("python")
|
||||
python_batches = list(python_perm.iter(batch_size, skip_last_batch=False))
|
||||
assert all(isinstance(batch, dict) for batch in python_batches)
|
||||
assert all(isinstance(batch, list) for batch in python_batches)
|
||||
|
||||
# Test with pandas format
|
||||
pandas_perm = some_permutation.with_format("pandas")
|
||||
@@ -780,8 +775,8 @@ def test_iter_with_column_selection(some_permutation: Permutation):
|
||||
|
||||
# Check that batches only contain the id column
|
||||
for batch in batches:
|
||||
assert "id" in batch
|
||||
assert "value" not in batch
|
||||
assert "id" in batch[0]
|
||||
assert "value" not in batch[0]
|
||||
|
||||
|
||||
def test_iter_with_column_rename(some_permutation: Permutation):
|
||||
@@ -791,9 +786,9 @@ def test_iter_with_column_rename(some_permutation: Permutation):
|
||||
|
||||
# Check that batches have the renamed column
|
||||
for batch in batches:
|
||||
assert "id" in batch
|
||||
assert "data" in batch
|
||||
assert "value" not in batch
|
||||
assert "id" in batch[0]
|
||||
assert "data" in batch[0]
|
||||
assert "value" not in batch[0]
|
||||
|
||||
|
||||
def test_iter_with_limit_offset(some_permutation: Permutation):
|
||||
@@ -812,14 +807,14 @@ def test_iter_with_limit_offset(some_permutation: Permutation):
|
||||
assert len(limit_batches) == 5
|
||||
|
||||
no_skip = some_permutation.iter(101, skip_last_batch=False)
|
||||
row_100 = next(no_skip)["id"][100]
|
||||
row_100 = next(no_skip)[100]["id"]
|
||||
|
||||
# Test with both limit and offset
|
||||
limited_perm = some_permutation.with_skip(100).with_take(300)
|
||||
limited_batches = list(limited_perm.iter(100, skip_last_batch=False))
|
||||
# Should have 3 batches (300 / 100)
|
||||
assert len(limited_batches) == 3
|
||||
assert limited_batches[0]["id"][0] == row_100
|
||||
assert limited_batches[0][0]["id"] == row_100
|
||||
|
||||
|
||||
def test_iter_empty_permutation(mem_db):
|
||||
@@ -842,7 +837,7 @@ def test_iter_single_row(mem_db):
|
||||
# With skip_last_batch=False, should get one batch
|
||||
batches = list(perm.iter(10, skip_last_batch=False))
|
||||
assert len(batches) == 1
|
||||
assert len(batches[0]["id"]) == 1
|
||||
assert len(batches[0]) == 1
|
||||
|
||||
# With skip_last_batch=True, should skip the single row (since it's < batch_size)
|
||||
batches_skip = list(perm.iter(10, skip_last_batch=True))
|
||||
@@ -860,8 +855,7 @@ def test_identity_permutation(mem_db):
|
||||
|
||||
batches = list(permutation.iter(10, skip_last_batch=False))
|
||||
assert len(batches) == 1
|
||||
assert len(batches[0]["id"]) == 10
|
||||
assert len(batches[0]["value"]) == 10
|
||||
assert len(batches[0]) == 10
|
||||
|
||||
permutation = permutation.remove_columns(["value"])
|
||||
assert permutation.num_columns == 1
|
||||
@@ -904,10 +898,10 @@ def test_transform_fn(mem_db):
|
||||
py_result = list(permutation.with_format("python").iter(10, skip_last_batch=False))[
|
||||
0
|
||||
]
|
||||
assert len(py_result) == 2
|
||||
assert len(py_result["id"]) == 10
|
||||
assert len(py_result["value"]) == 10
|
||||
assert isinstance(py_result, dict)
|
||||
assert len(py_result) == 10
|
||||
assert "id" in py_result[0]
|
||||
assert "value" in py_result[0]
|
||||
assert isinstance(py_result, list)
|
||||
|
||||
try:
|
||||
import torch
|
||||
@@ -915,9 +909,11 @@ def test_transform_fn(mem_db):
|
||||
torch_result = list(
|
||||
permutation.with_format("torch").iter(10, skip_last_batch=False)
|
||||
)[0]
|
||||
assert torch_result.shape == (2, 10)
|
||||
assert torch_result.dtype == torch.int64
|
||||
assert isinstance(torch_result, torch.Tensor)
|
||||
assert isinstance(torch_result, list)
|
||||
assert len(torch_result) == 10
|
||||
assert isinstance(torch_result[0], torch.Tensor)
|
||||
assert torch_result[0].shape == (2,)
|
||||
assert torch_result[0].dtype == torch.int64
|
||||
except ImportError:
|
||||
# Skip check if torch is not installed
|
||||
pass
|
||||
@@ -945,3 +941,113 @@ def test_custom_transform(mem_db):
|
||||
batch = batches[0]
|
||||
|
||||
assert batch == pa.record_batch([range(10)], ["id"])
|
||||
|
||||
|
||||
def test_getitems_basic(some_permutation: Permutation):
|
||||
"""Test __getitems__ returns correct rows by offset."""
|
||||
result = some_permutation.__getitems__([0, 1, 2])
|
||||
assert isinstance(result, list)
|
||||
assert "id" in result[0]
|
||||
assert "value" in result[0]
|
||||
assert len(result) == 3
|
||||
|
||||
|
||||
def test_getitems_single_index(some_permutation: Permutation):
|
||||
"""Test __getitems__ with a single index."""
|
||||
result = some_permutation.__getitems__([0])
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
def test_getitems_preserves_order(some_permutation: Permutation):
|
||||
"""Test __getitems__ returns rows in the requested order."""
|
||||
# Get rows in forward order
|
||||
forward = some_permutation.__getitems__([0, 1, 2, 3, 4])
|
||||
# Get the same rows in reverse order
|
||||
reverse = some_permutation.__getitems__([4, 3, 2, 1, 0])
|
||||
|
||||
assert [r["id"] for r in forward] == list(reversed([r["id"] for r in reverse]))
|
||||
assert [r["value"] for r in forward] == list(
|
||||
reversed([r["value"] for r in reverse])
|
||||
)
|
||||
|
||||
|
||||
def test_getitems_non_contiguous(some_permutation: Permutation):
|
||||
"""Test __getitems__ with non-contiguous indices."""
|
||||
result = some_permutation.__getitems__([0, 10, 50, 100, 500])
|
||||
assert len(result) == 5
|
||||
|
||||
# Each id/value pair should match what we'd get individually
|
||||
for i, offset in enumerate([0, 10, 50, 100, 500]):
|
||||
single = some_permutation.__getitems__([offset])
|
||||
assert result[i]["id"] == single[0]["id"]
|
||||
assert result[i]["value"] == single[0]["value"]
|
||||
|
||||
|
||||
def test_getitems_with_column_selection(some_permutation: Permutation):
|
||||
"""Test __getitems__ respects column selection."""
|
||||
id_only = some_permutation.select_columns(["id"])
|
||||
result = id_only.__getitems__([0, 1, 2])
|
||||
assert "id" in result[0]
|
||||
assert "value" not in result[0]
|
||||
assert len(result) == 3
|
||||
|
||||
|
||||
def test_getitems_with_column_rename(some_permutation: Permutation):
|
||||
"""Test __getitems__ respects column renames."""
|
||||
renamed = some_permutation.rename_column("value", "data")
|
||||
result = renamed.__getitems__([0, 1])
|
||||
assert "data" in result[0]
|
||||
assert "value" not in result[0]
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
def test_getitems_with_format(some_permutation: Permutation):
|
||||
"""Test __getitems__ applies the transform function."""
|
||||
arrow_perm = some_permutation.with_format("arrow")
|
||||
result = arrow_perm.__getitems__([0, 1, 2])
|
||||
assert isinstance(result, pa.RecordBatch)
|
||||
assert result.num_rows == 3
|
||||
|
||||
|
||||
def test_getitems_with_custom_transform(some_permutation: Permutation):
|
||||
"""Test __getitems__ with a custom transform."""
|
||||
|
||||
def transform(batch: pa.RecordBatch) -> list:
|
||||
return batch.column("id").to_pylist()
|
||||
|
||||
custom = some_permutation.with_transform(transform)
|
||||
result = custom.__getitems__([0, 1, 2])
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 3
|
||||
|
||||
|
||||
def test_getitems_identity_permutation(mem_db):
|
||||
"""Test __getitems__ on an identity permutation."""
|
||||
tbl = mem_db.create_table(
|
||||
"test_table", pa.table({"id": range(10), "value": range(10)})
|
||||
)
|
||||
perm = Permutation.identity(tbl)
|
||||
|
||||
result = perm.__getitems__([0, 5, 9])
|
||||
assert [r["id"] for r in result] == [0, 5, 9]
|
||||
assert [r["value"] for r in result] == [0, 5, 9]
|
||||
|
||||
|
||||
def test_getitems_with_limit_offset(some_permutation: Permutation):
|
||||
"""Test __getitems__ on a permutation with skip/take applied."""
|
||||
limited = some_permutation.with_skip(100).with_take(200)
|
||||
|
||||
# Should be able to access offsets within the limited range
|
||||
result = limited.__getitems__([0, 1, 199])
|
||||
assert len(result) == 3
|
||||
|
||||
# The first item of the limited permutation should match offset 100 of original
|
||||
full_result = some_permutation.__getitems__([100])
|
||||
limited_result = limited.__getitems__([0])
|
||||
assert limited_result[0]["id"] == full_result[0]["id"]
|
||||
|
||||
|
||||
def test_getitems_invalid_offset(some_permutation: Permutation):
|
||||
"""Test __getitems__ with an out-of-range offset raises an error."""
|
||||
with pytest.raises(Exception):
|
||||
some_permutation.__getitems__([999999])
|
||||
|
||||
@@ -1499,3 +1499,30 @@ def test_search_empty_table(mem_db):
|
||||
# Search on empty table should return empty results, not crash
|
||||
results = table.search([1.0, 2.0]).limit(5).to_list()
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_fast_search(tmp_path):
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
# Generate data matching the async test style
|
||||
vectors = pa.FixedShapeTensorArray.from_numpy_ndarray(
|
||||
np.random.rand(256, 32)
|
||||
).storage
|
||||
|
||||
table = db.create_table("test", pa.table({"vector": vectors}))
|
||||
|
||||
# FIX: Pass arguments directly instead of using 'config=IvfPq(...)'
|
||||
table.create_index(vector_column_name="vector", num_partitions=1, num_sub_vectors=1)
|
||||
|
||||
# Add data to ensure table has enough segments/rows
|
||||
table.add(pa.table({"vector": vectors}))
|
||||
|
||||
q = [1.0] * 32
|
||||
|
||||
# 1. Normal Search -> Should include "LanceScan" (Brute Force / Scan)
|
||||
plan = table.search(q).explain_plan(True)
|
||||
assert "LanceScan" in plan
|
||||
|
||||
# 2. Fast Search -> Should NOT include "LanceScan" (Uses Index)
|
||||
plan = table.search(q).fast_search().explain_plan(True)
|
||||
assert "LanceScan" not in plan
|
||||
|
||||
@@ -601,7 +601,6 @@ def test_head():
|
||||
def test_query_sync_minimal():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
@@ -685,7 +684,6 @@ def test_query_sync_maximal():
|
||||
def test_query_sync_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -715,7 +713,6 @@ def test_query_sync_nprobes():
|
||||
def test_query_sync_no_max_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
@@ -838,7 +835,6 @@ def test_query_sync_hybrid():
|
||||
else:
|
||||
# Vector query
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 42,
|
||||
"prefilter": True,
|
||||
"refine_factor": None,
|
||||
|
||||
@@ -1880,8 +1880,13 @@ async def test_optimize_delete_unverified(tmp_db_async: AsyncConnection, tmp_pat
|
||||
],
|
||||
)
|
||||
version = await table.version()
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version - 1}.manifest"
|
||||
assert version == 2
|
||||
|
||||
# By removing a manifest file, we make the data files we just inserted unverified
|
||||
version_name = 18446744073709551615 - (version - 1)
|
||||
path = tmp_path / "test.lance" / "_versions" / f"{version_name:020}.manifest"
|
||||
os.remove(path)
|
||||
|
||||
stats = await table.optimize(delete_unverified=False)
|
||||
assert stats.prune.old_versions_removed == 0
|
||||
stats = await table.optimize(
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from lancedb.util import tbl_to_tensor
|
||||
from lancedb.permutation import Permutation
|
||||
|
||||
torch = pytest.importorskip("torch")
|
||||
|
||||
@@ -16,3 +17,26 @@ def test_table_dataloader(mem_db):
|
||||
for batch in dataloader:
|
||||
assert batch.size(0) == 1
|
||||
assert batch.size(1) == 10
|
||||
|
||||
|
||||
def test_permutation_dataloader(mem_db):
|
||||
table = mem_db.create_table("test_table", pa.table({"a": range(1000)}))
|
||||
|
||||
permutation = Permutation.identity(table)
|
||||
dataloader = torch.utils.data.DataLoader(permutation, batch_size=10, shuffle=True)
|
||||
for batch in dataloader:
|
||||
assert batch["a"].size(0) == 10
|
||||
|
||||
permutation = permutation.with_format("torch")
|
||||
dataloader = torch.utils.data.DataLoader(permutation, batch_size=10, shuffle=True)
|
||||
for batch in dataloader:
|
||||
assert batch.size(0) == 10
|
||||
assert batch.size(1) == 1
|
||||
|
||||
permutation = permutation.with_format("torch_col")
|
||||
dataloader = torch.utils.data.DataLoader(
|
||||
permutation, collate_fn=lambda x: x, batch_size=10, shuffle=True
|
||||
)
|
||||
for batch in dataloader:
|
||||
assert batch.size(0) == 1
|
||||
assert batch.size(1) == 10
|
||||
|
||||
@@ -10,8 +10,7 @@ use arrow::{
|
||||
use futures::stream::StreamExt;
|
||||
use lancedb::arrow::SendableRecordBatchStream;
|
||||
use pyo3::{
|
||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, PyAny, PyObject, PyRef, PyResult,
|
||||
Python,
|
||||
exceptions::PyStopAsyncIteration, pyclass, pymethods, Bound, Py, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -36,8 +35,11 @@ impl RecordBatchStream {
|
||||
#[pymethods]
|
||||
impl RecordBatchStream {
|
||||
#[getter]
|
||||
pub fn schema(&self, py: Python) -> PyResult<PyObject> {
|
||||
(*self.schema).clone().into_pyarrow(py)
|
||||
pub fn schema(&self, py: Python) -> PyResult<Py<PyAny>> {
|
||||
(*self.schema)
|
||||
.clone()
|
||||
.into_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
}
|
||||
|
||||
pub fn __aiter__(self_: PyRef<'_, Self>) -> PyRef<'_, Self> {
|
||||
@@ -53,7 +55,12 @@ impl RecordBatchStream {
|
||||
.next()
|
||||
.await
|
||||
.ok_or_else(|| PyStopAsyncIteration::new_err(""))?;
|
||||
Python::with_gil(|py| inner_next.infer_error()?.to_pyarrow(py))
|
||||
Python::attach(|py| {
|
||||
inner_next
|
||||
.infer_error()?
|
||||
.to_pyarrow(py)
|
||||
.map(|obj| obj.unbind())
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use pyo3::{
|
||||
exceptions::{PyRuntimeError, PyValueError},
|
||||
pyclass, pyfunction, pymethods,
|
||||
types::{PyDict, PyDictMethods},
|
||||
Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult, Python,
|
||||
Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
|
||||
};
|
||||
use pyo3_async_runtimes::tokio::future_into_py;
|
||||
|
||||
@@ -114,14 +114,15 @@ impl Connection {
|
||||
data: Bound<'_, PyAny>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
|
||||
let mode = Self::parse_create_mode_str(mode)?;
|
||||
|
||||
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
||||
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
|
||||
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
|
||||
|
||||
let mut builder = inner.create_table(name, batches).mode(mode);
|
||||
|
||||
@@ -152,7 +153,7 @@ impl Connection {
|
||||
schema: Bound<'_, PyAny>,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let inner = self_.get_inner()?.clone();
|
||||
@@ -187,7 +188,7 @@ impl Connection {
|
||||
name: String,
|
||||
namespace: Vec<String>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
storage_options_provider: Option<PyObject>,
|
||||
storage_options_provider: Option<Py<PyAny>>,
|
||||
index_cache_size: Option<u32>,
|
||||
location: Option<String>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
@@ -307,7 +308,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_namespaces(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("namespaces", response.namespaces)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
@@ -345,7 +346,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.create_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -386,7 +387,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.drop_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
dict.set_item("transaction_id", response.transaction_id)?;
|
||||
@@ -413,7 +414,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.describe_namespace(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("properties", response.properties)?;
|
||||
Ok(dict.unbind())
|
||||
@@ -443,7 +444,7 @@ impl Connection {
|
||||
..Default::default()
|
||||
};
|
||||
let response = inner.list_tables(request).await.infer_error()?;
|
||||
Python::with_gil(|py| -> PyResult<Py<PyDict>> {
|
||||
Python::attach(|py| -> PyResult<Py<PyDict>> {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("tables", response.tables)?;
|
||||
dict.set_item("page_token", response.page_token)?;
|
||||
|
||||
@@ -40,7 +40,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
request_id,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::with_gil(|py| {
|
||||
} => Python::attach(|py| {
|
||||
let message = err.to_string();
|
||||
let http_err_cls = py
|
||||
.import(intern!(py, "lancedb.remote.errors"))?
|
||||
@@ -75,7 +75,7 @@ impl<T> PythonErrorExt<T> for std::result::Result<T, LanceError> {
|
||||
max_read_failures,
|
||||
source,
|
||||
status_code,
|
||||
} => Python::with_gil(|py| {
|
||||
} => Python::attach(|py| {
|
||||
let cause_err = http_from_rust_error(
|
||||
py,
|
||||
source.as_ref(),
|
||||
|
||||
@@ -12,7 +12,7 @@ pub struct PyHeaderProvider {
|
||||
|
||||
impl Clone for PyHeaderProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
provider: self.provider.clone_ref(py),
|
||||
})
|
||||
}
|
||||
@@ -25,7 +25,7 @@ impl PyHeaderProvider {
|
||||
|
||||
/// Get headers from the Python provider (internal implementation)
|
||||
fn get_headers_internal(&self) -> Result<HashMap<String, String>, String> {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the get_headers method
|
||||
let result = self.provider.call_method0(py, "get_headers");
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::{Arc, Mutex};
|
||||
use crate::{
|
||||
arrow::RecordBatchStream, connection::Connection, error::PythonErrorExt, table::Table,
|
||||
};
|
||||
use arrow::pyarrow::ToPyArrow;
|
||||
use arrow::pyarrow::{PyArrowType, ToPyArrow};
|
||||
use lancedb::{
|
||||
dataloader::permutation::{
|
||||
builder::{PermutationBuilder as LancePermutationBuilder, ShuffleStrategy},
|
||||
@@ -281,7 +281,7 @@ impl PyPermutationReader {
|
||||
let reader = slf.reader.clone();
|
||||
future_into_py(slf.py(), async move {
|
||||
let schema = reader.output_schema(selection).await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -328,4 +328,21 @@ impl PyPermutationReader {
|
||||
Ok(RecordBatchStream::new(stream))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyo3(signature = (indices, *, selection=None))]
|
||||
pub fn take_offsets<'py>(
|
||||
slf: PyRef<'py, Self>,
|
||||
indices: Vec<u64>,
|
||||
selection: Option<Bound<'py, PyAny>>,
|
||||
) -> PyResult<Bound<'py, PyAny>> {
|
||||
let selection = Self::parse_selection(selection)?;
|
||||
let reader = slf.reader.clone();
|
||||
future_into_py(slf.py(), async move {
|
||||
let batch = reader
|
||||
.take_offsets(&indices, selection)
|
||||
.await
|
||||
.infer_error()?;
|
||||
Ok(PyArrowType(batch))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -453,7 +453,7 @@ impl Query {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -532,7 +532,7 @@ impl TakeQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -627,7 +627,7 @@ impl FTSQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -806,7 +806,7 @@ impl VectorQuery {
|
||||
let inner = self_.inner.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.output_schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -17,20 +17,20 @@ use pyo3::types::PyDict;
|
||||
/// Internal wrapper around a Python object implementing StorageOptionsProvider
|
||||
pub struct PyStorageOptionsProvider {
|
||||
/// The Python object implementing fetch_storage_options()
|
||||
inner: PyObject,
|
||||
inner: Py<PyAny>,
|
||||
}
|
||||
|
||||
impl Clone for PyStorageOptionsProvider {
|
||||
fn clone(&self) -> Self {
|
||||
Python::with_gil(|py| Self {
|
||||
Python::attach(|py| Self {
|
||||
inner: self.inner.clone_ref(py),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl PyStorageOptionsProvider {
|
||||
pub fn new(obj: PyObject) -> PyResult<Self> {
|
||||
Python::with_gil(|py| {
|
||||
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
|
||||
Python::attach(|py| {
|
||||
// Verify the object has a fetch_storage_options method
|
||||
if !obj.bind(py).hasattr("fetch_storage_options")? {
|
||||
return Err(pyo3::exceptions::PyTypeError::new_err(
|
||||
@@ -60,7 +60,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
let py_provider = self.py_provider.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call the Python fetch_storage_options method
|
||||
let result = py_provider
|
||||
.inner
|
||||
@@ -119,7 +119,7 @@ impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
|
||||
}
|
||||
|
||||
fn provider_id(&self) -> String {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
// Call provider_id() method on the Python object
|
||||
let obj = self.py_provider.inner.bind(py);
|
||||
obj.call_method0("provider_id")
|
||||
@@ -143,7 +143,7 @@ impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
|
||||
/// This is the main entry point for converting Python StorageOptionsProvider objects
|
||||
/// to Rust trait objects that can be used by the Lance ecosystem.
|
||||
pub fn py_object_to_storage_options_provider(
|
||||
py_obj: PyObject,
|
||||
py_obj: Py<PyAny>,
|
||||
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
|
||||
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
|
||||
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
|
||||
|
||||
@@ -287,7 +287,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let schema = inner.schema().await.infer_error()?;
|
||||
Python::with_gil(|py| schema.to_pyarrow(py))
|
||||
Python::attach(|py| schema.to_pyarrow(py).map(|obj| obj.unbind()))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -296,7 +296,8 @@ impl Table {
|
||||
data: Bound<'_, PyAny>,
|
||||
mode: String,
|
||||
) -> PyResult<Bound<'a, PyAny>> {
|
||||
let batches = ArrowArrayStreamReader::from_pyarrow_bound(&data)?;
|
||||
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
|
||||
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
|
||||
let mut op = self_.inner_ref()?.add(batches);
|
||||
if mode == "append" {
|
||||
op = op.mode(AddDataMode::Append);
|
||||
@@ -437,7 +438,7 @@ impl Table {
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
@@ -467,7 +468,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.stats().await.infer_error()?;
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let dict = PyDict::new(py);
|
||||
dict.set_item("total_bytes", stats.total_bytes)?;
|
||||
dict.set_item("num_rows", stats.num_rows)?;
|
||||
@@ -502,6 +503,20 @@ impl Table {
|
||||
future_into_py(self_.py(), async move { inner.uri().await.infer_error() })
|
||||
}
|
||||
|
||||
pub fn initial_storage_options(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
Ok(inner.initial_storage_options().await)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn latest_storage_options(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
inner.latest_storage_options().await.infer_error()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn __repr__(&self) -> String {
|
||||
match &self.inner {
|
||||
None => format!("ClosedTable({})", self.name),
|
||||
@@ -521,7 +536,7 @@ impl Table {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let versions = inner.list_versions().await.infer_error()?;
|
||||
let versions_as_dict = Python::with_gil(|py| {
|
||||
let versions_as_dict = Python::attach(|py| {
|
||||
versions
|
||||
.iter()
|
||||
.map(|v| {
|
||||
@@ -872,7 +887,7 @@ impl Tags {
|
||||
let tags = inner.tags().await.infer_error()?;
|
||||
let res = tags.list().await.infer_error()?;
|
||||
|
||||
Python::with_gil(|py| {
|
||||
Python::attach(|py| {
|
||||
let py_dict = PyDict::new(py);
|
||||
for (key, contents) in res {
|
||||
let value_dict = PyDict::new(py);
|
||||
|
||||
5349
python/uv.lock
generated
Normal file
5349
python/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "lancedb"
|
||||
version = "0.24.1"
|
||||
version = "0.26.2"
|
||||
edition.workspace = true
|
||||
description = "LanceDB: A serverless, low-latency vector database for AI applications"
|
||||
license.workspace = true
|
||||
@@ -25,6 +25,7 @@ datafusion-catalog.workspace = true
|
||||
datafusion-common.workspace = true
|
||||
datafusion-execution.workspace = true
|
||||
datafusion-expr.workspace = true
|
||||
datafusion-physical-expr.workspace = true
|
||||
datafusion-physical-plan.workspace = true
|
||||
datafusion.workspace = true
|
||||
object_store = { workspace = true }
|
||||
|
||||
@@ -3,13 +3,12 @@
|
||||
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_array::{Float64Array, Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use aws_config::Region;
|
||||
use aws_sdk_bedrockruntime::Client;
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
arrow::IntoArrow,
|
||||
connect,
|
||||
embeddings::{bedrock::BedrockEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
@@ -67,7 +66,7 @@ async fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
fn make_data() -> RecordBatch {
|
||||
let schema = Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, true),
|
||||
Field::new("text", DataType::Utf8, false),
|
||||
@@ -83,10 +82,9 @@ fn make_data() -> impl IntoArrow {
|
||||
]);
|
||||
let price = Float64Array::from(vec![10.0, 50.0, 100.0, 30.0]);
|
||||
let schema = Arc::new(schema);
|
||||
let rb = RecordBatch::try_new(
|
||||
RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(id), Arc::new(text), Arc::new(price)],
|
||||
)
|
||||
.unwrap();
|
||||
Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -3,12 +3,13 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray};
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use futures::TryStreamExt;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lancedb::connection::Connection;
|
||||
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
@@ -29,7 +30,7 @@ async fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send>> {
|
||||
const TOTAL: usize = 1000;
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
@@ -66,7 +67,7 @@ fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<Table> {
|
||||
let initial_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||
let initial_data = create_some_records()?;
|
||||
let tbl = db.create_table("my_table", initial_data).execute().await?;
|
||||
Ok(tbl)
|
||||
}
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_array::{RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lance_index::scalar::FullTextSearchQuery;
|
||||
use lancedb::index::scalar::FtsIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::{
|
||||
arrow::IntoArrow,
|
||||
connect,
|
||||
embeddings::{
|
||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
||||
@@ -70,7 +69,7 @@ async fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
fn make_data() -> RecordBatch {
|
||||
let schema = Schema::new(vec![Field::new("facts", DataType::Utf8, false)]);
|
||||
|
||||
let facts = StringArray::from_iter_values(vec![
|
||||
@@ -101,8 +100,7 @@ fn make_data() -> impl IntoArrow {
|
||||
"The first chatbot was ELIZA, created in the 1960s.",
|
||||
]);
|
||||
let schema = Arc::new(schema);
|
||||
let rb = RecordBatch::try_new(schema.clone(), vec![Arc::new(facts)]).unwrap();
|
||||
Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(facts)]).unwrap()
|
||||
}
|
||||
|
||||
async fn create_index(table: &Table) -> Result<()> {
|
||||
|
||||
@@ -8,13 +8,12 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::types::Float32Type;
|
||||
use arrow_array::{
|
||||
FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator, RecordBatchReader,
|
||||
};
|
||||
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use futures::TryStreamExt;
|
||||
use lancedb::connection::Connection;
|
||||
|
||||
use lancedb::index::vector::IvfPqIndexBuilder;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
@@ -34,7 +33,7 @@ async fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
fn create_some_records() -> Result<Box<dyn arrow_array::RecordBatchReader + Send>> {
|
||||
const TOTAL: usize = 1000;
|
||||
const DIM: usize = 128;
|
||||
|
||||
@@ -73,9 +72,9 @@ fn create_some_records() -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<Table> {
|
||||
let initial_data: Box<dyn RecordBatchReader + Send> = create_some_records()?;
|
||||
let initial_data = create_some_records()?;
|
||||
let tbl = db
|
||||
.create_table("my_table", Box::new(initial_data))
|
||||
.create_table("my_table", initial_data)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -5,11 +5,9 @@
|
||||
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
use arrow_array::{Float64Array, Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use arrow_array::{RecordBatch, StringArray};
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
arrow::IntoArrow,
|
||||
connect,
|
||||
embeddings::{openai::OpenAIEmbeddingFunction, EmbeddingDefinition, EmbeddingFunction},
|
||||
query::{ExecutableQuery, QueryBase},
|
||||
@@ -64,26 +62,20 @@ async fn main() -> Result<()> {
|
||||
}
|
||||
// --8<-- [end:openai_embeddings]
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
let schema = Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, true),
|
||||
Field::new("text", DataType::Utf8, false),
|
||||
Field::new("price", DataType::Float64, false),
|
||||
]);
|
||||
|
||||
let id = Int32Array::from(vec![1, 2, 3, 4]);
|
||||
let text = StringArray::from_iter_values(vec![
|
||||
"Black T-Shirt",
|
||||
"Leather Jacket",
|
||||
"Winter Parka",
|
||||
"Hooded Sweatshirt",
|
||||
]);
|
||||
let price = Float64Array::from(vec![10.0, 50.0, 100.0, 30.0]);
|
||||
let schema = Arc::new(schema);
|
||||
let rb = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(id), Arc::new(text), Arc::new(price)],
|
||||
fn make_data() -> RecordBatch {
|
||||
arrow_array::record_batch!(
|
||||
("id", Int32, [1, 2, 3, 4]),
|
||||
(
|
||||
"text",
|
||||
Utf8,
|
||||
[
|
||||
"Black T-Shirt",
|
||||
"Leather Jacket",
|
||||
"Winter Parka",
|
||||
"Hooded Sweatshirt"
|
||||
]
|
||||
),
|
||||
("price", Float64, [10.0, 50.0, 100.0, 30.0])
|
||||
)
|
||||
.unwrap();
|
||||
Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -3,11 +3,10 @@
|
||||
|
||||
use std::{iter::once, sync::Arc};
|
||||
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_array::{RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::StreamExt;
|
||||
use lancedb::{
|
||||
arrow::IntoArrow,
|
||||
connect,
|
||||
embeddings::{
|
||||
sentence_transformers::SentenceTransformersEmbeddings, EmbeddingDefinition,
|
||||
@@ -59,7 +58,7 @@ async fn main() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn make_data() -> impl IntoArrow {
|
||||
fn make_data() -> RecordBatch {
|
||||
let schema = Schema::new(vec![Field::new("facts", DataType::Utf8, false)]);
|
||||
|
||||
let facts = StringArray::from_iter_values(vec![
|
||||
@@ -90,6 +89,5 @@ fn make_data() -> impl IntoArrow {
|
||||
"The first chatbot was ELIZA, created in the 1960s.",
|
||||
]);
|
||||
let schema = Arc::new(schema);
|
||||
let rb = RecordBatch::try_new(schema.clone(), vec![Arc::new(facts)]).unwrap();
|
||||
Box::new(RecordBatchIterator::new(vec![Ok(rb)], schema))
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(facts)]).unwrap()
|
||||
}
|
||||
|
||||
@@ -8,11 +8,9 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::types::Float32Type;
|
||||
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_array::{FixedSizeListArray, Int32Array, RecordBatch};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
|
||||
use lancedb::arrow::IntoArrow;
|
||||
use lancedb::connection::Connection;
|
||||
use lancedb::index::Index;
|
||||
use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
@@ -59,7 +57,7 @@ async fn open_with_existing_tbl() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_some_records() -> Result<impl IntoArrow> {
|
||||
fn create_some_records() -> Result<RecordBatch> {
|
||||
const TOTAL: usize = 1000;
|
||||
const DIM: usize = 128;
|
||||
|
||||
@@ -76,25 +74,18 @@ fn create_some_records() -> Result<impl IntoArrow> {
|
||||
]));
|
||||
|
||||
// Create a RecordBatch stream.
|
||||
let batches = RecordBatchIterator::new(
|
||||
vec![RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
.unwrap()]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
Ok(RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
);
|
||||
Ok(Box::new(batches))
|
||||
vec![
|
||||
Arc::new(Int32Array::from_iter_values(0..TOTAL as i32)),
|
||||
Arc::new(
|
||||
FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
(0..TOTAL).map(|_| Some(vec![Some(1.0); DIM])),
|
||||
DIM as i32,
|
||||
),
|
||||
),
|
||||
],
|
||||
)?)
|
||||
}
|
||||
|
||||
async fn create_table(db: &Connection) -> Result<LanceDbTable> {
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{Field, SchemaRef};
|
||||
use arrow_array::RecordBatch;
|
||||
use arrow_schema::SchemaRef;
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
@@ -17,24 +17,20 @@ use lance_namespace::models::{
|
||||
#[cfg(feature = "aws")]
|
||||
use object_store::aws::AwsCredential;
|
||||
|
||||
use crate::arrow::{IntoArrow, IntoArrowStream, SendableRecordBatchStream};
|
||||
use crate::database::listing::{
|
||||
ListingDatabase, OPT_NEW_TABLE_STORAGE_VERSION, OPT_NEW_TABLE_V2_MANIFEST_PATHS,
|
||||
};
|
||||
use crate::connection::create_table::CreateTableBuilder;
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::database::listing::ListingDatabase;
|
||||
use crate::database::{
|
||||
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
};
|
||||
use crate::embeddings::{
|
||||
EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry, MemoryRegistry, WithEmbeddings,
|
||||
CloneTableRequest, Database, DatabaseOptions, OpenTableRequest, ReadConsistency,
|
||||
TableNamesRequest,
|
||||
};
|
||||
use crate::embeddings::{EmbeddingRegistry, MemoryRegistry};
|
||||
use crate::error::{Error, Result};
|
||||
#[cfg(feature = "remote")]
|
||||
use crate::remote::{
|
||||
client::ClientConfig,
|
||||
db::{OPT_REMOTE_API_KEY, OPT_REMOTE_HOST_OVERRIDE, OPT_REMOTE_REGION},
|
||||
};
|
||||
use crate::table::{TableDefinition, WriteOptions};
|
||||
use crate::Table;
|
||||
use lance::io::ObjectStoreParams;
|
||||
pub use lance_encoding::version::LanceFileVersion;
|
||||
@@ -42,6 +38,8 @@ pub use lance_encoding::version::LanceFileVersion;
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use lance_io::object_store::{StorageOptionsAccessor, StorageOptionsProvider};
|
||||
|
||||
mod create_table;
|
||||
|
||||
fn merge_storage_options(
|
||||
store_params: &mut ObjectStoreParams,
|
||||
pairs: impl IntoIterator<Item = (String, String)>,
|
||||
@@ -116,337 +114,6 @@ impl TableNamesBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NoData {}
|
||||
|
||||
impl IntoArrow for NoData {
|
||||
fn into_arrow(self) -> Result<Box<dyn arrow_array::RecordBatchReader + Send>> {
|
||||
unreachable!("NoData should never be converted to Arrow")
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the value given from the initial CreateTableBuilder::new call
|
||||
// and defers errors until `execute` is called
|
||||
enum CreateTableBuilderInitialData {
|
||||
None,
|
||||
Iterator(Result<Box<dyn RecordBatchReader + Send>>),
|
||||
Stream(Result<SendableRecordBatchStream>),
|
||||
}
|
||||
|
||||
/// A builder for configuring a [`Connection::create_table`] operation
|
||||
pub struct CreateTableBuilder<const HAS_DATA: bool> {
|
||||
parent: Arc<dyn Database>,
|
||||
embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
request: CreateTableRequest,
|
||||
// This is a bit clumsy but we defer errors until `execute` is called
|
||||
// to maintain backwards compatibility
|
||||
data: CreateTableBuilderInitialData,
|
||||
}
|
||||
|
||||
// Builder methods that only apply when we have initial data
|
||||
impl CreateTableBuilder<true> {
|
||||
fn new<T: IntoArrow>(
|
||||
parent: Arc<dyn Database>,
|
||||
name: String,
|
||||
data: T,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
) -> Self {
|
||||
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
||||
Self {
|
||||
parent,
|
||||
request: CreateTableRequest::new(
|
||||
name,
|
||||
CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||
),
|
||||
embeddings: Vec::new(),
|
||||
embedding_registry,
|
||||
data: CreateTableBuilderInitialData::Iterator(data.into_arrow()),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_streaming<T: IntoArrowStream>(
|
||||
parent: Arc<dyn Database>,
|
||||
name: String,
|
||||
data: T,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
) -> Self {
|
||||
let dummy_schema = Arc::new(arrow_schema::Schema::new(Vec::<Field>::default()));
|
||||
Self {
|
||||
parent,
|
||||
request: CreateTableRequest::new(
|
||||
name,
|
||||
CreateTableData::Empty(TableDefinition::new_from_schema(dummy_schema)),
|
||||
),
|
||||
embeddings: Vec::new(),
|
||||
embedding_registry,
|
||||
data: CreateTableBuilderInitialData::Stream(data.into_arrow()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the create table operation
|
||||
pub async fn execute(self) -> Result<Table> {
|
||||
let embedding_registry = self.embedding_registry.clone();
|
||||
let parent = self.parent.clone();
|
||||
let request = self.into_request()?;
|
||||
Ok(Table::new_with_embedding_registry(
|
||||
parent.create_table(request).await?,
|
||||
parent,
|
||||
embedding_registry,
|
||||
))
|
||||
}
|
||||
|
||||
fn into_request(self) -> Result<CreateTableRequest> {
|
||||
if self.embeddings.is_empty() {
|
||||
match self.data {
|
||||
CreateTableBuilderInitialData::Iterator(maybe_iter) => {
|
||||
let data = maybe_iter?;
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Data(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
CreateTableBuilderInitialData::None => {
|
||||
unreachable!("No data provided for CreateTableBuilder<true>")
|
||||
}
|
||||
CreateTableBuilderInitialData::Stream(maybe_stream) => {
|
||||
let data = maybe_stream?;
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::StreamingData(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let CreateTableBuilderInitialData::Iterator(maybe_iter) = self.data else {
|
||||
return Err(Error::NotSupported { message: "Creating a table with embeddings is currently not support when the input is streaming".to_string() });
|
||||
};
|
||||
let data = maybe_iter?;
|
||||
let data = Box::new(WithEmbeddings::new(data, self.embeddings));
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Data(data),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Builder methods that only apply when we do not have initial data
|
||||
impl CreateTableBuilder<false> {
|
||||
fn new(
|
||||
parent: Arc<dyn Database>,
|
||||
name: String,
|
||||
schema: SchemaRef,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
) -> Self {
|
||||
let table_definition = TableDefinition::new_from_schema(schema);
|
||||
Self {
|
||||
parent,
|
||||
request: CreateTableRequest::new(name, CreateTableData::Empty(table_definition)),
|
||||
data: CreateTableBuilderInitialData::None,
|
||||
embeddings: Vec::default(),
|
||||
embedding_registry,
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the create table operation
|
||||
pub async fn execute(self) -> Result<Table> {
|
||||
let parent = self.parent.clone();
|
||||
let embedding_registry = self.embedding_registry.clone();
|
||||
let request = self.into_request()?;
|
||||
Ok(Table::new_with_embedding_registry(
|
||||
parent.create_table(request).await?,
|
||||
parent,
|
||||
embedding_registry,
|
||||
))
|
||||
}
|
||||
|
||||
fn into_request(self) -> Result<CreateTableRequest> {
|
||||
if self.embeddings.is_empty() {
|
||||
return Ok(self.request);
|
||||
}
|
||||
|
||||
let CreateTableData::Empty(table_def) = self.request.data else {
|
||||
unreachable!("CreateTableBuilder<false> should always have Empty data")
|
||||
};
|
||||
|
||||
let schema = table_def.schema.clone();
|
||||
let empty_batch = arrow_array::RecordBatch::new_empty(schema.clone());
|
||||
|
||||
let reader = Box::new(std::iter::once(Ok(empty_batch)).collect::<Vec<_>>());
|
||||
let reader = arrow_array::RecordBatchIterator::new(reader.into_iter(), schema);
|
||||
let with_embeddings = WithEmbeddings::new(reader, self.embeddings);
|
||||
let table_definition = with_embeddings.table_definition()?;
|
||||
|
||||
Ok(CreateTableRequest {
|
||||
data: CreateTableData::Empty(table_definition),
|
||||
..self.request
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<const HAS_DATA: bool> CreateTableBuilder<HAS_DATA> {
|
||||
/// Set the mode for creating the table
|
||||
///
|
||||
/// This controls what happens if a table with the given name already exists
|
||||
pub fn mode(mut self, mode: CreateTableMode) -> Self {
|
||||
self.request.mode = mode;
|
||||
self
|
||||
}
|
||||
|
||||
/// Apply the given write options when writing the initial data
|
||||
pub fn write_options(mut self, write_options: WriteOptions) -> Self {
|
||||
self.request.write_options = write_options;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an embedding definition to the table.
|
||||
///
|
||||
/// The `embedding_name` must match the name of an embedding function that
|
||||
/// was previously registered with the connection's [`EmbeddingRegistry`].
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.embedding_registry
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Set whether to use V2 manifest paths for the table. (default: false)
|
||||
///
|
||||
/// These paths provide more efficient opening of tables with many
|
||||
/// versions on object stores.
|
||||
///
|
||||
/// <div class="warning">Turning this on will make the dataset unreadable
|
||||
/// for older versions of LanceDB (prior to 0.10.0).</div>
|
||||
///
|
||||
/// To migrate an existing dataset, instead use the
|
||||
/// [[NativeTable::migrate_manifest_paths_v2]].
|
||||
///
|
||||
/// This has no effect in LanceDB Cloud.
|
||||
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
|
||||
pub fn enable_v2_manifest_paths(mut self, use_v2_manifest_paths: bool) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
let value = if use_v2_manifest_paths {
|
||||
"true".to_string()
|
||||
} else {
|
||||
"false".to_string()
|
||||
};
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(OPT_NEW_TABLE_V2_MANIFEST_PATHS.to_string(), value)],
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the data storage version.
|
||||
///
|
||||
/// The default is `LanceFileVersion::Stable`.
|
||||
#[deprecated(since = "0.15.1", note = "Use `database_options` instead")]
|
||||
pub fn data_storage_version(mut self, data_storage_version: LanceFileVersion) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert_with(Default::default)
|
||||
.store_params
|
||||
.get_or_insert_with(Default::default);
|
||||
merge_storage_options(
|
||||
store_params,
|
||||
[(
|
||||
OPT_NEW_TABLE_STORAGE_VERSION.to_string(),
|
||||
data_storage_version.to_string(),
|
||||
)],
|
||||
);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the namespace for the table
|
||||
pub fn namespace(mut self, namespace: Vec<String>) -> Self {
|
||||
self.request.namespace = namespace;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a custom location for the table.
|
||||
///
|
||||
/// If not set, the database will derive a location from its URI and the table name.
|
||||
/// This is useful when integrating with namespace systems that manage table locations.
|
||||
pub fn location(mut self, location: impl Into<String>) -> Self {
|
||||
self.request.location = Some(location.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a storage options provider for automatic credential refresh.
|
||||
///
|
||||
/// This allows tables to automatically refresh cloud storage credentials
|
||||
/// when they expire, enabling long-running operations on remote storage.
|
||||
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct OpenTableBuilder {
|
||||
parent: Arc<dyn Database>,
|
||||
@@ -684,35 +351,17 @@ impl Connection {
|
||||
///
|
||||
/// * `name` - The name of the table
|
||||
/// * `initial_data` - The initial data to write to the table
|
||||
pub fn create_table<T: IntoArrow>(
|
||||
pub fn create_table<T: Scannable + 'static>(
|
||||
&self,
|
||||
name: impl Into<String>,
|
||||
initial_data: T,
|
||||
) -> CreateTableBuilder<true> {
|
||||
CreateTableBuilder::<true>::new(
|
||||
) -> CreateTableBuilder {
|
||||
let initial_data = Box::new(initial_data);
|
||||
CreateTableBuilder::new(
|
||||
self.internal.clone(),
|
||||
self.embedding_registry.clone(),
|
||||
name.into(),
|
||||
initial_data,
|
||||
self.embedding_registry.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a new table from a stream of data
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// * `name` - The name of the table
|
||||
/// * `initial_data` - The initial data to write to the table
|
||||
pub fn create_table_streaming<T: IntoArrowStream>(
|
||||
&self,
|
||||
name: impl Into<String>,
|
||||
initial_data: T,
|
||||
) -> CreateTableBuilder<true> {
|
||||
CreateTableBuilder::<true>::new_streaming(
|
||||
self.internal.clone(),
|
||||
name.into(),
|
||||
initial_data,
|
||||
self.embedding_registry.clone(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -726,13 +375,9 @@ impl Connection {
|
||||
&self,
|
||||
name: impl Into<String>,
|
||||
schema: SchemaRef,
|
||||
) -> CreateTableBuilder<false> {
|
||||
CreateTableBuilder::<false>::new(
|
||||
self.internal.clone(),
|
||||
name.into(),
|
||||
schema,
|
||||
self.embedding_registry.clone(),
|
||||
)
|
||||
) -> CreateTableBuilder {
|
||||
let empty_batch = RecordBatch::new_empty(schema);
|
||||
self.create_table(name, empty_batch)
|
||||
}
|
||||
|
||||
/// Open an existing table in the database
|
||||
@@ -1349,20 +994,11 @@ mod test_utils {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::database::listing::{ListingDatabaseOptions, NewTableConfig};
|
||||
use crate::query::QueryBase;
|
||||
use crate::query::{ExecutableQuery, QueryExecutionOptions};
|
||||
use crate::test_utils::connection::new_test_connection;
|
||||
use arrow::compute::concat_batches;
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::{stream, TryStreamExt};
|
||||
use lance_core::error::{ArrowResult, DataFusionResult};
|
||||
use lance_testing::datagen::{BatchGenerator, IncrementingInt32};
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::arrow::SimpleRecordBatchStream;
|
||||
use crate::test_utils::connection::new_test_connection;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -1478,139 +1114,6 @@ mod tests {
|
||||
assert_eq!(tables, vec!["table1".to_owned()]);
|
||||
}
|
||||
|
||||
fn make_data() -> Box<dyn RecordBatchReader + Send + 'static> {
|
||||
let id = Box::new(IncrementingInt32::new().named("id".to_string()));
|
||||
Box::new(BatchGenerator::new().col(id).batches(10, 2000))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_v2() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri)
|
||||
.database_options(&ListingDatabaseOptions {
|
||||
new_table_config: NewTableConfig {
|
||||
data_storage_version: Some(LanceFileVersion::Legacy),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl = db
|
||||
.create_table("v1_test", make_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// In v1 the row group size will trump max_batch_length
|
||||
let batches = tbl
|
||||
.query()
|
||||
.limit(20000)
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 50000,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(batches.len(), 20);
|
||||
|
||||
let db = connect(uri)
|
||||
.database_options(&ListingDatabaseOptions {
|
||||
new_table_config: NewTableConfig {
|
||||
data_storage_version: Some(LanceFileVersion::Stable),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl = db
|
||||
.create_table("v2_test", make_data())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// In v2 the page size is much bigger than 50k so we should get a single batch
|
||||
let batches = tbl
|
||||
.query()
|
||||
.execute_with_options(QueryExecutionOptions {
|
||||
max_batch_length: 50000,
|
||||
..Default::default()
|
||||
})
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(batches.len(), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_streaming() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
let batches = make_data().collect::<ArrowResult<Vec<_>>>().unwrap();
|
||||
|
||||
let schema = batches.first().unwrap().schema();
|
||||
let one_batch = concat_batches(&schema, batches.iter()).unwrap();
|
||||
|
||||
let ldb_stream = stream::iter(batches.clone().into_iter().map(Result::Ok));
|
||||
let ldb_stream: SendableRecordBatchStream =
|
||||
Box::pin(SimpleRecordBatchStream::new(ldb_stream, schema.clone()));
|
||||
|
||||
let tbl1 = db
|
||||
.create_table_streaming("one", ldb_stream)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let df_stream = stream::iter(batches.into_iter().map(DataFusionResult::Ok));
|
||||
let df_stream: datafusion_physical_plan::SendableRecordBatchStream =
|
||||
Box::pin(RecordBatchStreamAdapter::new(schema.clone(), df_stream));
|
||||
|
||||
let tbl2 = db
|
||||
.create_table_streaming("two", df_stream)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl1_data = tbl1
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl1_data = concat_batches(&schema, tbl1_data.iter()).unwrap();
|
||||
assert_eq!(tbl1_data, one_batch);
|
||||
|
||||
let tbl2_data = tbl2
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tbl2_data = concat_batches(&schema, tbl2_data.iter()).unwrap();
|
||||
assert_eq!(tbl2_data, one_batch);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn drop_table() {
|
||||
let tc = new_test_connection().await.unwrap();
|
||||
@@ -1640,41 +1143,6 @@ mod tests {
|
||||
assert_eq!(tables.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_already_exists() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
db.create_empty_table("test", schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
// TODO: None of the open table options are "inspectable" right now but once one is we
|
||||
// should assert we are passing these options in correctly
|
||||
db.create_empty_table("test", schema)
|
||||
.mode(CreateTableMode::exist_ok(|mut req| {
|
||||
req.index_cache_size = Some(16);
|
||||
req
|
||||
}))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
|
||||
assert!(db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.is_err());
|
||||
let overwritten = db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(other_schema, overwritten.schema().await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_clone_table() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
@@ -1685,7 +1153,8 @@ mod tests {
|
||||
let mut batch_gen = BatchGenerator::new()
|
||||
.col(Box::new(IncrementingInt32::new().named("id")))
|
||||
.col(Box::new(IncrementingInt32::new().named("value")));
|
||||
let reader = batch_gen.batches(5, 100);
|
||||
let reader: Box<dyn arrow_array::RecordBatchReader + Send> =
|
||||
Box::new(batch_gen.batches(5, 100));
|
||||
|
||||
let source_table = db
|
||||
.create_table("source_table", reader)
|
||||
@@ -1720,128 +1189,4 @@ mod tests {
|
||||
let cloned_count = cloned_table.count_rows(None).await.unwrap();
|
||||
assert_eq!(source_count, cloned_count);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_empty_table_with_embeddings() {
|
||||
use crate::embeddings::{EmbeddingDefinition, EmbeddingFunction};
|
||||
use arrow_array::{
|
||||
Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator, StringArray,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct MockEmbedding {
|
||||
dim: usize,
|
||||
}
|
||||
|
||||
impl EmbeddingFunction for MockEmbedding {
|
||||
fn name(&self) -> &str {
|
||||
"test_embedding"
|
||||
}
|
||||
|
||||
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
||||
Ok(Cow::Owned(DataType::Utf8))
|
||||
}
|
||||
|
||||
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||
DataType::Float32,
|
||||
self.dim as i32,
|
||||
true,
|
||||
)))
|
||||
}
|
||||
|
||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||
let len = source.len();
|
||||
let values = vec![1.0f32; len * self.dim];
|
||||
let values = Arc::new(Float32Array::from(values));
|
||||
let field = Arc::new(Field::new("item", DataType::Float32, true));
|
||||
Ok(Arc::new(FixedSizeListArray::new(
|
||||
field,
|
||||
self.dim as i32,
|
||||
values,
|
||||
None,
|
||||
)))
|
||||
}
|
||||
|
||||
fn compute_query_embeddings(&self, _input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
let embed_func = Arc::new(MockEmbedding { dim: 128 });
|
||||
db.embedding_registry()
|
||||
.register("test_embedding", embed_func.clone())
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
|
||||
let ed = EmbeddingDefinition {
|
||||
source_column: "name".to_owned(),
|
||||
dest_column: Some("name_embedding".to_owned()),
|
||||
embedding_name: "test_embedding".to_owned(),
|
||||
};
|
||||
|
||||
let table = db
|
||||
.create_empty_table("test", schema)
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.add_embedding(ed)
|
||||
.unwrap()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_schema = table.schema().await.unwrap();
|
||||
assert!(table_schema.column_with_name("name").is_some());
|
||||
assert!(table_schema.column_with_name("name_embedding").is_some());
|
||||
|
||||
let embedding_field = table_schema.field_with_name("name_embedding").unwrap();
|
||||
assert_eq!(
|
||||
embedding_field.data_type(),
|
||||
&DataType::new_fixed_size_list(DataType::Float32, 128, true)
|
||||
);
|
||||
|
||||
let input_schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
|
||||
let input_batch = RecordBatch::try_new(
|
||||
input_schema.clone(),
|
||||
vec![Arc::new(StringArray::from(vec![
|
||||
Some("Alice"),
|
||||
Some("Bob"),
|
||||
Some("Charlie"),
|
||||
]))],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let input_reader = Box::new(RecordBatchIterator::new(
|
||||
vec![Ok(input_batch)].into_iter(),
|
||||
input_schema,
|
||||
));
|
||||
|
||||
table.add(input_reader).execute().await.unwrap();
|
||||
|
||||
let results = table
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
let batch = &results[0];
|
||||
assert_eq!(batch.num_rows(), 3);
|
||||
assert!(batch.column_by_name("name_embedding").is_some());
|
||||
|
||||
let embedding_col = batch
|
||||
.column_by_name("name_embedding")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<FixedSizeListArray>()
|
||||
.unwrap();
|
||||
assert_eq!(embedding_col.len(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
612
rust/lancedb/src/connection/create_table.rs
Normal file
612
rust/lancedb/src/connection/create_table.rs
Normal file
@@ -0,0 +1,612 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use lance_io::object_store::StorageOptionsProvider;
|
||||
|
||||
use crate::{
|
||||
connection::{merge_storage_options, set_storage_options_provider},
|
||||
data::scannable::{Scannable, WithEmbeddingsScannable},
|
||||
database::{CreateTableMode, CreateTableRequest, Database},
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, EmbeddingRegistry},
|
||||
table::WriteOptions,
|
||||
Error, Result, Table,
|
||||
};
|
||||
|
||||
pub struct CreateTableBuilder {
|
||||
parent: Arc<dyn Database>,
|
||||
embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
request: CreateTableRequest,
|
||||
}
|
||||
|
||||
impl CreateTableBuilder {
|
||||
pub(super) fn new(
|
||||
parent: Arc<dyn Database>,
|
||||
embedding_registry: Arc<dyn EmbeddingRegistry>,
|
||||
name: String,
|
||||
data: Box<dyn Scannable>,
|
||||
) -> Self {
|
||||
Self {
|
||||
parent,
|
||||
embeddings: Vec::new(),
|
||||
embedding_registry,
|
||||
request: CreateTableRequest::new(name, data),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the mode for creating the table
|
||||
///
|
||||
/// This controls what happens if a table with the given name already exists
|
||||
pub fn mode(mut self, mode: CreateTableMode) -> Self {
|
||||
self.request.mode = mode;
|
||||
self
|
||||
}
|
||||
|
||||
/// Apply the given write options when writing the initial data
|
||||
pub fn write_options(mut self, write_options: WriteOptions) -> Self {
|
||||
self.request.write_options = write_options;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set an option for the storage layer.
|
||||
///
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_option(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
merge_storage_options(store_params, [(key.into(), value.into())]);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set multiple options for the storage layer.
|
||||
///
|
||||
/// Options already set on the connection will be inherited by the table,
|
||||
/// but can be overridden here.
|
||||
///
|
||||
/// See available options at <https://lancedb.com/docs/storage/>
|
||||
pub fn storage_options(
|
||||
mut self,
|
||||
pairs: impl IntoIterator<Item = (impl Into<String>, impl Into<String>)>,
|
||||
) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
let updates = pairs
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key.into(), value.into()));
|
||||
merge_storage_options(store_params, updates);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an embedding definition to the table.
|
||||
///
|
||||
/// The `embedding_name` must match the name of an embedding function that
|
||||
/// was previously registered with the connection's [`EmbeddingRegistry`].
|
||||
pub fn add_embedding(mut self, definition: EmbeddingDefinition) -> Result<Self> {
|
||||
// Early verification of the embedding name
|
||||
let embedding_func = self
|
||||
.embedding_registry
|
||||
.get(&definition.embedding_name)
|
||||
.ok_or_else(|| Error::EmbeddingFunctionNotFound {
|
||||
name: definition.embedding_name.clone(),
|
||||
reason: "No embedding function found in the connection's embedding_registry"
|
||||
.to_string(),
|
||||
})?;
|
||||
|
||||
self.embeddings.push((definition, embedding_func));
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Set the namespace for the table
|
||||
pub fn namespace(mut self, namespace: Vec<String>) -> Self {
|
||||
self.request.namespace = namespace;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a custom location for the table.
|
||||
///
|
||||
/// If not set, the database will derive a location from its URI and the table name.
|
||||
/// This is useful when integrating with namespace systems that manage table locations.
|
||||
pub fn location(mut self, location: impl Into<String>) -> Self {
|
||||
self.request.location = Some(location.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a storage options provider for automatic credential refresh.
|
||||
///
|
||||
/// This allows tables to automatically refresh cloud storage credentials
|
||||
/// when they expire, enabling long-running operations on remote storage.
|
||||
pub fn storage_options_provider(mut self, provider: Arc<dyn StorageOptionsProvider>) -> Self {
|
||||
let store_params = self
|
||||
.request
|
||||
.write_options
|
||||
.lance_write_params
|
||||
.get_or_insert(Default::default())
|
||||
.store_params
|
||||
.get_or_insert(Default::default());
|
||||
set_storage_options_provider(store_params, provider);
|
||||
self
|
||||
}
|
||||
|
||||
/// Execute the create table operation
|
||||
pub async fn execute(mut self) -> Result<Table> {
|
||||
let embedding_registry = self.embedding_registry.clone();
|
||||
let parent = self.parent.clone();
|
||||
|
||||
// If embeddings were configured via add_embedding(), wrap the data
|
||||
if !self.embeddings.is_empty() {
|
||||
let wrapped_data: Box<dyn Scannable> = Box::new(WithEmbeddingsScannable::try_new(
|
||||
self.request.data,
|
||||
self.embeddings,
|
||||
)?);
|
||||
self.request.data = wrapped_data;
|
||||
}
|
||||
|
||||
Ok(Table::new_with_embedding_registry(
|
||||
parent.create_table(self.request).await?,
|
||||
parent,
|
||||
embedding_registry,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_array::{
|
||||
record_batch, Array, FixedSizeListArray, Float32Array, RecordBatch, RecordBatchIterator,
|
||||
};
|
||||
use arrow_schema::{ArrowError, DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use lance_file::version::LanceFileVersion;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SimpleRecordBatchStream},
|
||||
connect,
|
||||
database::listing::{ListingDatabaseOptions, NewTableConfig},
|
||||
embeddings::{EmbeddingDefinition, EmbeddingFunction, MemoryRegistry},
|
||||
query::{ExecutableQuery, QueryBase, Select},
|
||||
test_utils::embeddings::MockEmbed,
|
||||
};
|
||||
use std::borrow::Cow;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_empty_table() {
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int64, false),
|
||||
Field::new("value", DataType::Float64, false),
|
||||
]));
|
||||
db.create_empty_table("name", schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let table = db.open_table("name").execute().await.unwrap();
|
||||
assert_eq!(table.schema().await.unwrap(), schema);
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 0);
|
||||
}
|
||||
|
||||
async fn test_create_table_with_data<T>(data: T)
|
||||
where
|
||||
T: Scannable + 'static,
|
||||
{
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let schema = data.schema();
|
||||
db.create_table("data_table", data).execute().await.unwrap();
|
||||
let table = db.open_table("data_table").execute().await.unwrap();
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
assert_eq!(table.schema().await.unwrap(), schema);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_table_with_batch() {
|
||||
let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
|
||||
test_create_table_with_data(batch).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_vec_batch() {
|
||||
let data = vec![
|
||||
record_batch!(("id", Int64, [1, 2])).unwrap(),
|
||||
record_batch!(("id", Int64, [3])).unwrap(),
|
||||
];
|
||||
test_create_table_with_data(data).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_record_batch_reader() {
|
||||
let data = vec![
|
||||
record_batch!(("id", Int64, [1, 2])).unwrap(),
|
||||
record_batch!(("id", Int64, [3])).unwrap(),
|
||||
];
|
||||
let schema = data[0].schema();
|
||||
let reader: Box<dyn arrow_array::RecordBatchReader + Send> = Box::new(
|
||||
RecordBatchIterator::new(data.into_iter().map(Ok), schema.clone()),
|
||||
);
|
||||
test_create_table_with_data(reader).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_stream() {
|
||||
let data = vec![
|
||||
record_batch!(("id", Int64, [1, 2])).unwrap(),
|
||||
record_batch!(("id", Int64, [3])).unwrap(),
|
||||
];
|
||||
let schema = data[0].schema();
|
||||
let inner = futures::stream::iter(data.into_iter().map(Ok));
|
||||
let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
|
||||
schema,
|
||||
stream: inner,
|
||||
});
|
||||
test_create_table_with_data(stream).await;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MyError;
|
||||
|
||||
impl std::fmt::Display for MyError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "MyError occurred")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MyError {}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_preserves_reader_error() {
|
||||
let first_batch = record_batch!(("id", Int64, [1, 2])).unwrap();
|
||||
let schema = first_batch.schema();
|
||||
let iterator = vec![
|
||||
Ok(first_batch),
|
||||
Err(ArrowError::ExternalError(Box::new(MyError))),
|
||||
];
|
||||
let reader: Box<dyn arrow_array::RecordBatchReader + Send> = Box::new(
|
||||
RecordBatchIterator::new(iterator.into_iter(), schema.clone()),
|
||||
);
|
||||
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let result = db.create_table("failing_table", reader).execute().await;
|
||||
|
||||
assert!(result.is_err());
|
||||
// TODO: when we upgrade to Lance 2.0.0, this should pass
|
||||
// assert!(matches!(result, Err(Error::External { source})
|
||||
// if source.downcast_ref::<MyError>().is_some()
|
||||
// ));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_preserves_stream_error() {
|
||||
let first_batch = record_batch!(("id", Int64, [1, 2])).unwrap();
|
||||
let schema = first_batch.schema();
|
||||
let iterator = vec![
|
||||
Ok(first_batch),
|
||||
Err(Error::External {
|
||||
source: Box::new(MyError),
|
||||
}),
|
||||
];
|
||||
let stream = futures::stream::iter(iterator);
|
||||
let stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
|
||||
schema: schema.clone(),
|
||||
stream,
|
||||
});
|
||||
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let result = db
|
||||
.create_table("failing_stream_table", stream)
|
||||
.execute()
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
// TODO: when we upgrade to Lance 2.0.0, this should pass
|
||||
// assert!(matches!(result, Err(Error::External { source})
|
||||
// if source.downcast_ref::<MyError>().is_some()
|
||||
// ));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[allow(deprecated)]
|
||||
async fn test_create_table_with_storage_options() {
|
||||
let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
|
||||
let table = db
|
||||
.create_table("options_table", batch)
|
||||
.storage_option("timeout", "30s")
|
||||
.storage_options([("retry_count", "3")])
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let final_options = table.storage_options().await.unwrap();
|
||||
assert_eq!(final_options.get("timeout"), Some(&"30s".to_string()));
|
||||
assert_eq!(final_options.get("retry_count"), Some(&"3".to_string()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_unregistered_embedding() {
|
||||
let db = connect("memory://").execute().await.unwrap();
|
||||
let batch = record_batch!(("text", Utf8, ["hello", "world"])).unwrap();
|
||||
|
||||
// Try to add an embedding that doesn't exist in the registry
|
||||
let result = db
|
||||
.create_table("embed_table", batch)
|
||||
.add_embedding(EmbeddingDefinition::new(
|
||||
"text",
|
||||
"nonexistent_embedding_function",
|
||||
None::<&str>,
|
||||
));
|
||||
|
||||
match result {
|
||||
Err(Error::EmbeddingFunctionNotFound { name, .. }) => {
|
||||
assert_eq!(name, "nonexistent_embedding_function");
|
||||
}
|
||||
Err(other) => panic!("Expected EmbeddingFunctionNotFound error, got: {:?}", other),
|
||||
Ok(_) => panic!("Expected error, but got Ok"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_already_exists() {
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("x", DataType::Int32, false)]));
|
||||
db.create_empty_table("test", schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
db.create_empty_table("test", schema)
|
||||
.mode(CreateTableMode::exist_ok(|mut req| {
|
||||
req.index_cache_size = Some(16);
|
||||
req
|
||||
}))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
let other_schema = Arc::new(Schema::new(vec![Field::new("y", DataType::Int32, false)]));
|
||||
assert!(db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.execute()
|
||||
.await
|
||||
.is_err()); // TODO: assert what this error is
|
||||
let overwritten = db
|
||||
.create_empty_table("test", other_schema.clone())
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(other_schema, overwritten.schema().await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[rstest::rstest]
|
||||
#[case(LanceFileVersion::Legacy)]
|
||||
#[case(LanceFileVersion::Stable)]
|
||||
async fn test_create_table_with_storage_version(
|
||||
#[case] data_storage_version: LanceFileVersion,
|
||||
) {
|
||||
let db = connect("memory://")
|
||||
.database_options(&ListingDatabaseOptions {
|
||||
new_table_config: NewTableConfig {
|
||||
data_storage_version: Some(data_storage_version),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
})
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
|
||||
let table = db
|
||||
.create_table("legacy_table", batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let native_table = table.as_native().unwrap();
|
||||
let storage_format = native_table
|
||||
.manifest()
|
||||
.await
|
||||
.unwrap()
|
||||
.data_storage_format
|
||||
.lance_file_version()
|
||||
.unwrap();
|
||||
// Compare resolved versions since Stable/Next are aliases that resolve at storage time
|
||||
assert_eq!(storage_format.resolve(), data_storage_version.resolve());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_table_with_embedding() {
|
||||
// Register the mock embedding function
|
||||
let registry = Arc::new(MemoryRegistry::new());
|
||||
let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
|
||||
registry.register("mock", mock_embedding).unwrap();
|
||||
|
||||
// Connect with the custom registry
|
||||
let conn = connect("memory://")
|
||||
.embedding_registry(registry)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Create data without the embedding column
|
||||
let batch = record_batch!(("text", Utf8, ["hello", "world", "test"])).unwrap();
|
||||
|
||||
// Create table with add_embedding - embeddings should be computed automatically
|
||||
let table = conn
|
||||
.create_table("embed_test", batch)
|
||||
.add_embedding(EmbeddingDefinition::new(
|
||||
"text",
|
||||
"mock",
|
||||
Some("text_embedding"),
|
||||
))
|
||||
.unwrap()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify row count
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 3);
|
||||
|
||||
// Verify the schema includes the embedding column
|
||||
let result_schema = table.schema().await.unwrap();
|
||||
assert_eq!(result_schema.fields().len(), 2);
|
||||
assert_eq!(result_schema.field(0).name(), "text");
|
||||
assert_eq!(result_schema.field(1).name(), "text_embedding");
|
||||
|
||||
// Verify the embedding column has the correct type
|
||||
assert!(matches!(
|
||||
result_schema.field(1).data_type(),
|
||||
DataType::FixedSizeList(_, 4)
|
||||
));
|
||||
|
||||
// Query to verify the embeddings were computed
|
||||
let results: Vec<RecordBatch> = table
|
||||
.query()
|
||||
.select(Select::columns(&["text", "text_embedding"]))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let total_rows: usize = results.iter().map(|b| b.num_rows()).sum();
|
||||
assert_eq!(total_rows, 3);
|
||||
|
||||
// Check that all rows have embedding values (not null)
|
||||
for batch in &results {
|
||||
let embedding_col = batch.column(1);
|
||||
assert_eq!(embedding_col.null_count(), 0);
|
||||
assert_eq!(embedding_col.len(), batch.num_rows());
|
||||
}
|
||||
|
||||
// Verify the schema metadata contains the column definitions
|
||||
assert!(
|
||||
result_schema
|
||||
.metadata
|
||||
.contains_key("lancedb::column_definitions"),
|
||||
"Schema metadata should contain column definitions"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_empty_table_with_embeddings() {
|
||||
#[derive(Debug, Clone)]
|
||||
struct MockEmbedding {
|
||||
dim: usize,
|
||||
}
|
||||
|
||||
impl EmbeddingFunction for MockEmbedding {
|
||||
fn name(&self) -> &str {
|
||||
"test_embedding"
|
||||
}
|
||||
|
||||
fn source_type(&self) -> Result<Cow<'_, DataType>> {
|
||||
Ok(Cow::Owned(DataType::Utf8))
|
||||
}
|
||||
|
||||
fn dest_type(&self) -> Result<Cow<'_, DataType>> {
|
||||
Ok(Cow::Owned(DataType::new_fixed_size_list(
|
||||
DataType::Float32,
|
||||
self.dim as i32,
|
||||
true,
|
||||
)))
|
||||
}
|
||||
|
||||
fn compute_source_embeddings(&self, source: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||
let len = source.len();
|
||||
let values = vec![1.0f32; len * self.dim];
|
||||
let values = Arc::new(Float32Array::from(values));
|
||||
let field = Arc::new(Field::new("item", DataType::Float32, true));
|
||||
Ok(Arc::new(FixedSizeListArray::new(
|
||||
field,
|
||||
self.dim as i32,
|
||||
values,
|
||||
None,
|
||||
)))
|
||||
}
|
||||
|
||||
fn compute_query_embeddings(&self, _input: Arc<dyn Array>) -> Result<Arc<dyn Array>> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
|
||||
let tmp_dir = tempdir().unwrap();
|
||||
let uri = tmp_dir.path().to_str().unwrap();
|
||||
let db = connect(uri).execute().await.unwrap();
|
||||
|
||||
let embed_func = Arc::new(MockEmbedding { dim: 128 });
|
||||
db.embedding_registry()
|
||||
.register("test_embedding", embed_func.clone())
|
||||
.unwrap();
|
||||
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)]));
|
||||
let ed = EmbeddingDefinition {
|
||||
source_column: "name".to_owned(),
|
||||
dest_column: Some("name_embedding".to_owned()),
|
||||
embedding_name: "test_embedding".to_owned(),
|
||||
};
|
||||
|
||||
let table = db
|
||||
.create_empty_table("test", schema)
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.add_embedding(ed)
|
||||
.unwrap()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let table_schema = table.schema().await.unwrap();
|
||||
assert!(table_schema.column_with_name("name").is_some());
|
||||
assert!(table_schema.column_with_name("name_embedding").is_some());
|
||||
|
||||
let embedding_field = table_schema.field_with_name("name_embedding").unwrap();
|
||||
assert_eq!(
|
||||
embedding_field.data_type(),
|
||||
&DataType::new_fixed_size_list(DataType::Float32, 128, true)
|
||||
);
|
||||
|
||||
let input_batch = record_batch!(("name", Utf8, ["Alice", "Bob", "Charlie"])).unwrap();
|
||||
table.add(input_batch).execute().await.unwrap();
|
||||
|
||||
let results = table
|
||||
.query()
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
.try_collect::<Vec<_>>()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
let batch = &results[0];
|
||||
assert_eq!(batch.num_rows(), 3);
|
||||
assert!(batch.column_by_name("name_embedding").is_some());
|
||||
|
||||
let embedding_col = batch
|
||||
.column_by_name("name_embedding")
|
||||
.unwrap()
|
||||
.as_any()
|
||||
.downcast_ref::<FixedSizeListArray>()
|
||||
.unwrap();
|
||||
assert_eq!(embedding_col.len(), 3);
|
||||
}
|
||||
}
|
||||
@@ -5,3 +5,4 @@
|
||||
|
||||
pub mod inspect;
|
||||
pub mod sanitize;
|
||||
pub mod scannable;
|
||||
|
||||
580
rust/lancedb/src/data/scannable.rs
Normal file
580
rust/lancedb/src/data/scannable.rs
Normal file
@@ -0,0 +1,580 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
//! Data source abstraction for LanceDB.
|
||||
//!
|
||||
//! This module provides a [`Scannable`] trait that allows input data sources to express
|
||||
//! capabilities (row count, rescannability) so the insert pipeline can make
|
||||
//! better decisions about write parallelism and retry strategies.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::{RecordBatch, RecordBatchIterator, RecordBatchReader};
|
||||
use arrow_schema::{ArrowError, SchemaRef};
|
||||
use async_trait::async_trait;
|
||||
use futures::stream::once;
|
||||
use futures::StreamExt;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
|
||||
use crate::arrow::{
|
||||
SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream,
|
||||
};
|
||||
use crate::embeddings::{
|
||||
compute_embeddings_for_batch, compute_output_schema, EmbeddingDefinition, EmbeddingFunction,
|
||||
EmbeddingRegistry,
|
||||
};
|
||||
use crate::table::{ColumnDefinition, ColumnKind, TableDefinition};
|
||||
use crate::{Error, Result};
|
||||
|
||||
pub trait Scannable: Send {
|
||||
/// Returns the schema of the data.
|
||||
fn schema(&self) -> SchemaRef;
|
||||
|
||||
/// Read data as a stream of record batches.
|
||||
///
|
||||
/// For rescannable sources (in-memory data like RecordBatch, Vec<RecordBatch>),
|
||||
/// this can be called multiple times and returns cloned data each time.
|
||||
///
|
||||
/// For non-rescannable sources (streams, readers), this can only be called once.
|
||||
/// Calling it a second time returns a stream whose first item is an error.
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream;
|
||||
|
||||
/// Optional hint about the number of rows.
|
||||
///
|
||||
/// When available, this allows the pipeline to estimate total data size
|
||||
/// and choose appropriate partitioning.
|
||||
fn num_rows(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Whether the source can be re-read from the beginning.
|
||||
///
|
||||
/// `true` for in-memory data (Tables, DataFrames) and disk-based sources (Datasets).
|
||||
/// `false` for streaming sources (DuckDB results, network streams).
|
||||
///
|
||||
/// When true, the pipeline can retry failed writes by rescanning.
|
||||
fn rescannable(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for dyn Scannable {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("Scannable")
|
||||
.field("schema", &self.schema())
|
||||
.field("num_rows", &self.num_rows())
|
||||
.field("rescannable", &self.rescannable())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Scannable for RecordBatch {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
Self::schema(self)
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
let batch = self.clone();
|
||||
let schema = batch.schema();
|
||||
Box::pin(SimpleRecordBatchStream {
|
||||
schema,
|
||||
stream: once(async move { Ok(batch) }),
|
||||
})
|
||||
}
|
||||
|
||||
fn num_rows(&self) -> Option<usize> {
|
||||
Some(Self::num_rows(self))
|
||||
}
|
||||
|
||||
fn rescannable(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Scannable for Vec<RecordBatch> {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
if self.is_empty() {
|
||||
Arc::new(arrow_schema::Schema::empty())
|
||||
} else {
|
||||
self[0].schema()
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
if self.is_empty() {
|
||||
let schema = Scannable::schema(self);
|
||||
return Box::pin(SimpleRecordBatchStream {
|
||||
schema,
|
||||
stream: once(async {
|
||||
Err(Error::InvalidInput {
|
||||
message: "Cannot scan an empty Vec<RecordBatch>".to_string(),
|
||||
})
|
||||
}),
|
||||
});
|
||||
}
|
||||
let schema = Scannable::schema(self);
|
||||
let batches = self.clone();
|
||||
let stream = futures::stream::iter(batches.into_iter().map(Ok));
|
||||
Box::pin(SimpleRecordBatchStream { schema, stream })
|
||||
}
|
||||
|
||||
fn num_rows(&self) -> Option<usize> {
|
||||
Some(self.iter().map(|b| b.num_rows()).sum())
|
||||
}
|
||||
|
||||
fn rescannable(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Scannable for Box<dyn RecordBatchReader + Send> {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
RecordBatchReader::schema(self.as_ref())
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
let schema = Scannable::schema(self);
|
||||
|
||||
// Swap self with a reader that errors on iteration, so a second call
|
||||
// produces a clear error instead of silently returning empty data.
|
||||
let err_reader: Box<dyn RecordBatchReader + Send> = Box::new(RecordBatchIterator::new(
|
||||
vec![Err(ArrowError::InvalidArgumentError(
|
||||
"Reader has already been consumed".into(),
|
||||
))],
|
||||
schema.clone(),
|
||||
));
|
||||
let reader = std::mem::replace(self, err_reader);
|
||||
|
||||
// Bridge the blocking RecordBatchReader to an async stream via a channel.
|
||||
let (tx, rx) = tokio::sync::mpsc::channel::<crate::Result<RecordBatch>>(2);
|
||||
tokio::task::spawn_blocking(move || {
|
||||
for batch_result in reader {
|
||||
let result = batch_result.map_err(Into::into);
|
||||
if tx.blocking_send(result).is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let stream = futures::stream::unfold(rx, |mut rx| async move {
|
||||
rx.recv().await.map(|batch| (batch, rx))
|
||||
})
|
||||
.fuse();
|
||||
|
||||
Box::pin(SimpleRecordBatchStream { schema, stream })
|
||||
}
|
||||
}
|
||||
|
||||
impl Scannable for SendableRecordBatchStream {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.as_ref().schema()
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
let schema = Scannable::schema(self);
|
||||
|
||||
// Swap self with an error stream so a second call produces a clear error.
|
||||
let error_stream = Box::pin(SimpleRecordBatchStream {
|
||||
schema: schema.clone(),
|
||||
stream: once(async {
|
||||
Err(Error::InvalidInput {
|
||||
message: "Stream has already been consumed".to_string(),
|
||||
})
|
||||
}),
|
||||
});
|
||||
std::mem::replace(self, error_stream)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StreamingWriteSource for Box<dyn Scannable> {
|
||||
fn arrow_schema(&self) -> SchemaRef {
|
||||
self.schema()
|
||||
}
|
||||
|
||||
fn into_stream(mut self) -> datafusion_physical_plan::SendableRecordBatchStream {
|
||||
self.scan_as_stream().into_df_stream()
|
||||
}
|
||||
}
|
||||
|
||||
/// A scannable that applies embeddings to the stream.
|
||||
pub struct WithEmbeddingsScannable {
|
||||
inner: Box<dyn Scannable>,
|
||||
embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
|
||||
output_schema: SchemaRef,
|
||||
}
|
||||
|
||||
impl WithEmbeddingsScannable {
|
||||
/// Create a new WithEmbeddingsScannable.
|
||||
///
|
||||
/// The embeddings are applied to the inner scannable's data as new columns.
|
||||
pub fn try_new(
|
||||
inner: Box<dyn Scannable>,
|
||||
embeddings: Vec<(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)>,
|
||||
) -> Result<Self> {
|
||||
let output_schema = compute_output_schema(&inner.schema(), &embeddings)?;
|
||||
|
||||
// Build column definitions: Physical for base columns, Embedding for new ones
|
||||
let base_col_count = inner.schema().fields().len();
|
||||
let column_definitions: Vec<ColumnDefinition> = (0..base_col_count)
|
||||
.map(|_| ColumnDefinition {
|
||||
kind: ColumnKind::Physical,
|
||||
})
|
||||
.chain(embeddings.iter().map(|(ed, _)| ColumnDefinition {
|
||||
kind: ColumnKind::Embedding(ed.clone()),
|
||||
}))
|
||||
.collect();
|
||||
|
||||
let table_definition = TableDefinition::new(output_schema, column_definitions);
|
||||
let output_schema = table_definition.into_rich_schema();
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
embeddings,
|
||||
output_schema,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Scannable for WithEmbeddingsScannable {
|
||||
fn schema(&self) -> SchemaRef {
|
||||
self.output_schema.clone()
|
||||
}
|
||||
|
||||
fn scan_as_stream(&mut self) -> SendableRecordBatchStream {
|
||||
let inner_stream = self.inner.scan_as_stream();
|
||||
let embeddings = self.embeddings.clone();
|
||||
let output_schema = self.output_schema.clone();
|
||||
|
||||
let mapped_stream = inner_stream.then(move |batch_result| {
|
||||
let embeddings = embeddings.clone();
|
||||
async move {
|
||||
let batch = batch_result?;
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
compute_embeddings_for_batch(batch, &embeddings)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| Error::Runtime {
|
||||
message: format!("Task panicked during embedding computation: {}", e),
|
||||
})??;
|
||||
Ok(result)
|
||||
}
|
||||
});
|
||||
|
||||
Box::pin(SimpleRecordBatchStream {
|
||||
schema: output_schema,
|
||||
stream: mapped_stream,
|
||||
})
|
||||
}
|
||||
|
||||
fn num_rows(&self) -> Option<usize> {
|
||||
self.inner.num_rows()
|
||||
}
|
||||
|
||||
fn rescannable(&self) -> bool {
|
||||
self.inner.rescannable()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scannable_with_embeddings(
|
||||
inner: Box<dyn Scannable>,
|
||||
table_definition: &TableDefinition,
|
||||
registry: Option<&Arc<dyn EmbeddingRegistry>>,
|
||||
) -> Result<Box<dyn Scannable>> {
|
||||
if let Some(registry) = registry {
|
||||
let mut embeddings = Vec::with_capacity(table_definition.column_definitions.len());
|
||||
for cd in table_definition.column_definitions.iter() {
|
||||
if let ColumnKind::Embedding(embedding_def) = &cd.kind {
|
||||
match registry.get(&embedding_def.embedding_name) {
|
||||
Some(func) => {
|
||||
embeddings.push((embedding_def.clone(), func));
|
||||
}
|
||||
None => {
|
||||
return Err(Error::EmbeddingFunctionNotFound {
|
||||
name: embedding_def.embedding_name.clone(),
|
||||
reason: format!(
|
||||
"Table was defined with an embedding column `{}` but no embedding function was found with that name within the registry.",
|
||||
embedding_def.embedding_name
|
||||
),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !embeddings.is_empty() {
|
||||
return Ok(Box::new(WithEmbeddingsScannable::try_new(
|
||||
inner, embeddings,
|
||||
)?));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(inner)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use arrow_array::record_batch;
|
||||
use futures::TryStreamExt;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_record_batch_rescannable() {
|
||||
let mut batch = record_batch!(("id", Int64, [0, 1, 2])).unwrap();
|
||||
|
||||
let stream1 = batch.scan_as_stream();
|
||||
let batches1: Vec<RecordBatch> = stream1.try_collect().await.unwrap();
|
||||
assert_eq!(batches1.len(), 1);
|
||||
assert_eq!(batches1[0], batch);
|
||||
|
||||
assert!(batch.rescannable());
|
||||
let stream2 = batch.scan_as_stream();
|
||||
let batches2: Vec<RecordBatch> = stream2.try_collect().await.unwrap();
|
||||
assert_eq!(batches2.len(), 1);
|
||||
assert_eq!(batches2[0], batch);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_vec_batch_rescannable() {
|
||||
let mut batches = vec![
|
||||
record_batch!(("id", Int64, [0, 1])).unwrap(),
|
||||
record_batch!(("id", Int64, [2, 3, 4])).unwrap(),
|
||||
];
|
||||
|
||||
let stream1 = batches.scan_as_stream();
|
||||
let result1: Vec<RecordBatch> = stream1.try_collect().await.unwrap();
|
||||
assert_eq!(result1.len(), 2);
|
||||
assert_eq!(result1[0], batches[0]);
|
||||
assert_eq!(result1[1], batches[1]);
|
||||
|
||||
assert!(batches.rescannable());
|
||||
let stream2 = batches.scan_as_stream();
|
||||
let result2: Vec<RecordBatch> = stream2.try_collect().await.unwrap();
|
||||
assert_eq!(result2.len(), 2);
|
||||
assert_eq!(result2[0], batches[0]);
|
||||
assert_eq!(result2[1], batches[1]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_vec_batch_empty_errors() {
|
||||
let mut empty: Vec<RecordBatch> = vec![];
|
||||
let mut stream = empty.scan_as_stream();
|
||||
let result = stream.next().await;
|
||||
assert!(result.is_some());
|
||||
assert!(result.unwrap().is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_reader_not_rescannable() {
|
||||
let batch = record_batch!(("id", Int64, [0, 1, 2])).unwrap();
|
||||
let schema = batch.schema();
|
||||
let mut reader: Box<dyn arrow_array::RecordBatchReader + Send> = Box::new(
|
||||
RecordBatchIterator::new(vec![Ok(batch.clone())], schema.clone()),
|
||||
);
|
||||
|
||||
let stream1 = reader.scan_as_stream();
|
||||
let result1: Vec<RecordBatch> = stream1.try_collect().await.unwrap();
|
||||
assert_eq!(result1.len(), 1);
|
||||
assert_eq!(result1[0], batch);
|
||||
|
||||
assert!(!reader.rescannable());
|
||||
// Second call returns a stream whose first item is an error
|
||||
let mut stream2 = reader.scan_as_stream();
|
||||
let result2 = stream2.next().await;
|
||||
assert!(result2.is_some());
|
||||
assert!(result2.unwrap().is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stream_not_rescannable() {
|
||||
let batch = record_batch!(("id", Int64, [0, 1, 2])).unwrap();
|
||||
let schema = batch.schema();
|
||||
let inner_stream = futures::stream::iter(vec![Ok(batch.clone())]);
|
||||
let mut stream: SendableRecordBatchStream = Box::pin(SimpleRecordBatchStream {
|
||||
schema: schema.clone(),
|
||||
stream: inner_stream,
|
||||
});
|
||||
|
||||
let stream1 = stream.scan_as_stream();
|
||||
let result1: Vec<RecordBatch> = stream1.try_collect().await.unwrap();
|
||||
assert_eq!(result1.len(), 1);
|
||||
assert_eq!(result1[0], batch);
|
||||
|
||||
assert!(!stream.rescannable());
|
||||
// Second call returns a stream whose first item is an error
|
||||
let mut stream2 = stream.scan_as_stream();
|
||||
let result2 = stream2.next().await;
|
||||
assert!(result2.is_some());
|
||||
assert!(result2.unwrap().is_err());
|
||||
}
|
||||
|
||||
mod embedding_tests {
|
||||
use super::*;
|
||||
use crate::embeddings::MemoryRegistry;
|
||||
use crate::table::{ColumnDefinition, ColumnKind};
|
||||
use crate::test_utils::embeddings::MockEmbed;
|
||||
use arrow_array::Array as _;
|
||||
use arrow_array::{ArrayRef, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_with_embeddings_scannable() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("text", DataType::Utf8, false)]));
|
||||
let text_array = StringArray::from(vec!["hello", "world", "test"]);
|
||||
let batch =
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(text_array) as ArrayRef])
|
||||
.unwrap();
|
||||
|
||||
let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
|
||||
let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_embedding"));
|
||||
|
||||
let mut scannable = WithEmbeddingsScannable::try_new(
|
||||
Box::new(batch.clone()),
|
||||
vec![(embedding_def, mock_embedding)],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Check that schema has the embedding column
|
||||
let output_schema = scannable.schema();
|
||||
assert_eq!(output_schema.fields().len(), 2);
|
||||
assert_eq!(output_schema.field(0).name(), "text");
|
||||
assert_eq!(output_schema.field(1).name(), "text_embedding");
|
||||
|
||||
// Check num_rows and rescannable are preserved
|
||||
assert_eq!(scannable.num_rows(), Some(3));
|
||||
assert!(scannable.rescannable());
|
||||
|
||||
// Read the data
|
||||
let stream = scannable.scan_as_stream();
|
||||
let results: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
|
||||
let result_batch = &results[0];
|
||||
assert_eq!(result_batch.num_rows(), 3);
|
||||
assert_eq!(result_batch.num_columns(), 2);
|
||||
|
||||
// Verify the embedding column is present and has the right shape
|
||||
let embedding_col = result_batch.column(1);
|
||||
assert_eq!(embedding_col.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_maybe_embedded_scannable_no_embeddings() {
|
||||
let batch = record_batch!(("id", Int64, [1, 2, 3])).unwrap();
|
||||
|
||||
// Create a table definition with no embedding columns
|
||||
let table_def = TableDefinition::new_from_schema(batch.schema());
|
||||
|
||||
// Even with a registry, if there are no embedding columns, it's a passthrough
|
||||
let registry: Arc<dyn EmbeddingRegistry> = Arc::new(MemoryRegistry::new());
|
||||
let mut scannable =
|
||||
scannable_with_embeddings(Box::new(batch.clone()), &table_def, Some(®istry))
|
||||
.unwrap();
|
||||
|
||||
// Check that data passes through unchanged
|
||||
let stream = scannable.scan_as_stream();
|
||||
let results: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0], batch);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_maybe_embedded_scannable_with_embeddings() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("text", DataType::Utf8, false)]));
|
||||
let text_array = StringArray::from(vec!["hello", "world"]);
|
||||
let batch =
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(text_array) as ArrayRef])
|
||||
.unwrap();
|
||||
|
||||
// Create a table definition with an embedding column
|
||||
let embedding_def = EmbeddingDefinition::new("text", "mock", Some("text_embedding"));
|
||||
let embedding_schema = Arc::new(Schema::new(vec![
|
||||
Field::new("text", DataType::Utf8, false),
|
||||
Field::new(
|
||||
"text_embedding",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
4,
|
||||
),
|
||||
false,
|
||||
),
|
||||
]));
|
||||
let table_def = TableDefinition::new(
|
||||
embedding_schema,
|
||||
vec![
|
||||
ColumnDefinition {
|
||||
kind: ColumnKind::Physical,
|
||||
},
|
||||
ColumnDefinition {
|
||||
kind: ColumnKind::Embedding(embedding_def.clone()),
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Register the mock embedding function
|
||||
let registry: Arc<dyn EmbeddingRegistry> = Arc::new(MemoryRegistry::new());
|
||||
let mock_embedding: Arc<dyn EmbeddingFunction> = Arc::new(MockEmbed::new("mock", 4));
|
||||
registry.register("mock", mock_embedding).unwrap();
|
||||
|
||||
let mut scannable =
|
||||
scannable_with_embeddings(Box::new(batch), &table_def, Some(®istry)).unwrap();
|
||||
|
||||
// Read and verify the data has embeddings
|
||||
let stream = scannable.scan_as_stream();
|
||||
let results: Vec<RecordBatch> = stream.try_collect().await.unwrap();
|
||||
assert_eq!(results.len(), 1);
|
||||
|
||||
let result_batch = &results[0];
|
||||
assert_eq!(result_batch.num_columns(), 2);
|
||||
assert_eq!(result_batch.schema().field(1).name(), "text_embedding");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_maybe_embedded_scannable_missing_function() {
|
||||
let schema = Arc::new(Schema::new(vec![Field::new("text", DataType::Utf8, false)]));
|
||||
let text_array = StringArray::from(vec!["hello"]);
|
||||
let batch =
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(text_array) as ArrayRef])
|
||||
.unwrap();
|
||||
|
||||
// Create a table definition with an embedding column
|
||||
let embedding_def =
|
||||
EmbeddingDefinition::new("text", "nonexistent", Some("text_embedding"));
|
||||
let embedding_schema = Arc::new(Schema::new(vec![
|
||||
Field::new("text", DataType::Utf8, false),
|
||||
Field::new(
|
||||
"text_embedding",
|
||||
DataType::FixedSizeList(
|
||||
Arc::new(Field::new("item", DataType::Float32, true)),
|
||||
4,
|
||||
),
|
||||
false,
|
||||
),
|
||||
]));
|
||||
let table_def = TableDefinition::new(
|
||||
embedding_schema,
|
||||
vec![
|
||||
ColumnDefinition {
|
||||
kind: ColumnKind::Physical,
|
||||
},
|
||||
ColumnDefinition {
|
||||
kind: ColumnKind::Embedding(embedding_def),
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
// Registry has no embedding functions registered
|
||||
let registry: Arc<dyn EmbeddingRegistry> = Arc::new(MemoryRegistry::new());
|
||||
|
||||
let result = scannable_with_embeddings(Box::new(batch), &table_def, Some(®istry));
|
||||
|
||||
// Should fail because the embedding function is not found
|
||||
assert!(result.is_err());
|
||||
let err = result.err().unwrap();
|
||||
assert!(
|
||||
matches!(err, Error::EmbeddingFunctionNotFound { .. }),
|
||||
"Expected EmbeddingFunctionNotFound"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -18,12 +18,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use async_trait::async_trait;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use futures::stream;
|
||||
use lance::dataset::ReadParams;
|
||||
use lance_datafusion::utils::StreamingWriteSource;
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse, DropNamespaceRequest, DropNamespaceResponse, ListNamespacesRequest,
|
||||
@@ -31,9 +26,9 @@ use lance_namespace::models::{
|
||||
};
|
||||
use lance_namespace::LanceNamespace;
|
||||
|
||||
use crate::arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt};
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::error::Result;
|
||||
use crate::table::{BaseTable, TableDefinition, WriteOptions};
|
||||
use crate::table::{BaseTable, WriteOptions};
|
||||
|
||||
pub mod listing;
|
||||
pub mod namespace;
|
||||
@@ -115,51 +110,14 @@ impl Default for CreateTableMode {
|
||||
}
|
||||
}
|
||||
|
||||
/// The data to start a table or a schema to create an empty table
|
||||
pub enum CreateTableData {
|
||||
/// Creates a table using an iterator of data, the schema will be obtained from the data
|
||||
Data(Box<dyn RecordBatchReader + Send>),
|
||||
/// Creates a table using a stream of data, the schema will be obtained from the data
|
||||
StreamingData(SendableRecordBatchStream),
|
||||
/// Creates an empty table, the definition / schema must be provided separately
|
||||
Empty(TableDefinition),
|
||||
}
|
||||
|
||||
impl CreateTableData {
|
||||
pub fn schema(&self) -> Arc<arrow_schema::Schema> {
|
||||
match self {
|
||||
Self::Data(reader) => reader.schema(),
|
||||
Self::StreamingData(stream) => stream.schema(),
|
||||
Self::Empty(definition) => definition.schema.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl StreamingWriteSource for CreateTableData {
|
||||
fn arrow_schema(&self) -> Arc<arrow_schema::Schema> {
|
||||
self.schema()
|
||||
}
|
||||
fn into_stream(self) -> datafusion_physical_plan::SendableRecordBatchStream {
|
||||
match self {
|
||||
Self::Data(reader) => reader.into_stream(),
|
||||
Self::StreamingData(stream) => stream.into_df_stream(),
|
||||
Self::Empty(table_definition) => {
|
||||
let schema = table_definition.schema.clone();
|
||||
Box::pin(RecordBatchStreamAdapter::new(schema, stream::empty()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A request to create a table
|
||||
pub struct CreateTableRequest {
|
||||
/// The name of the new table
|
||||
pub name: String,
|
||||
/// The namespace to create the table in. Empty list represents root namespace.
|
||||
pub namespace: Vec<String>,
|
||||
/// Initial data to write to the table, can be None to create an empty table
|
||||
pub data: CreateTableData,
|
||||
/// Initial data to write to the table, can be empty.
|
||||
pub data: Box<dyn Scannable>,
|
||||
/// The mode to use when creating the table
|
||||
pub mode: CreateTableMode,
|
||||
/// Options to use when writing data (only used if `data` is not None)
|
||||
@@ -173,7 +131,7 @@ pub struct CreateTableRequest {
|
||||
}
|
||||
|
||||
impl CreateTableRequest {
|
||||
pub fn new(name: String, data: CreateTableData) -> Self {
|
||||
pub fn new(name: String, data: Box<dyn Scannable>) -> Self {
|
||||
Self {
|
||||
name,
|
||||
namespace: vec![],
|
||||
|
||||
@@ -922,7 +922,7 @@ impl Database for ListingDatabase {
|
||||
.with_read_params(read_params.clone())
|
||||
.load()
|
||||
.await
|
||||
.map_err(|e| Error::Lance { source: e })?;
|
||||
.map_err(|e| -> Error { e.into() })?;
|
||||
|
||||
let version_ref = match (request.source_version, request.source_tag) {
|
||||
(Some(v), None) => Ok(Ref::Version(None, Some(v))),
|
||||
@@ -937,7 +937,7 @@ impl Database for ListingDatabase {
|
||||
source_dataset
|
||||
.shallow_clone(&target_uri, version_ref, Some(storage_params))
|
||||
.await
|
||||
.map_err(|e| Error::Lance { source: e })?;
|
||||
.map_err(|e| -> Error { e.into() })?;
|
||||
|
||||
let cloned_table = NativeTable::open_with_params(
|
||||
&target_uri,
|
||||
@@ -1098,8 +1098,10 @@ impl Database for ListingDatabase {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::connection::ConnectRequest;
|
||||
use crate::database::{CreateTableData, CreateTableMode, CreateTableRequest, WriteOptions};
|
||||
use crate::table::{Table, TableDefinition};
|
||||
use crate::data::scannable::Scannable;
|
||||
use crate::database::{CreateTableMode, CreateTableRequest};
|
||||
use crate::table::WriteOptions;
|
||||
use crate::Table;
|
||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use std::path::PathBuf;
|
||||
@@ -1139,7 +1141,7 @@ mod tests {
|
||||
.create_table(CreateTableRequest {
|
||||
name: "source_table".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
|
||||
data: Box::new(RecordBatch::new_empty(schema.clone())) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1196,16 +1198,11 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let source_table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "source_with_data".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1264,7 +1261,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1300,7 +1297,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1340,7 +1337,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1380,7 +1377,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1435,7 +1432,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1484,16 +1481,11 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch1)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let source_table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "versioned_source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch1) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1517,14 +1509,7 @@ mod tests {
|
||||
|
||||
let db = Arc::new(db);
|
||||
let source_table_obj = Table::new(source_table.clone(), db.clone());
|
||||
source_table_obj
|
||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch2)],
|
||||
schema.clone(),
|
||||
)))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
source_table_obj.add(batch2).execute().await.unwrap();
|
||||
|
||||
// Verify source table now has 4 rows
|
||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4);
|
||||
@@ -1570,16 +1555,11 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch1)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let source_table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "tagged_source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch1),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1607,14 +1587,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let source_table_obj = Table::new(source_table.clone(), db.clone());
|
||||
source_table_obj
|
||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch2)],
|
||||
schema.clone(),
|
||||
)))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
source_table_obj.add(batch2).execute().await.unwrap();
|
||||
|
||||
// Source table should have 4 rows
|
||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4);
|
||||
@@ -1657,16 +1630,11 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch1)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let source_table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "independent_source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch1),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1706,14 +1674,7 @@ mod tests {
|
||||
|
||||
let db = Arc::new(db);
|
||||
let cloned_table_obj = Table::new(cloned_table.clone(), db.clone());
|
||||
cloned_table_obj
|
||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch_clone)],
|
||||
schema.clone(),
|
||||
)))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
cloned_table_obj.add(batch_clone).execute().await.unwrap();
|
||||
|
||||
// Add different data to the source table
|
||||
let batch_source = RecordBatch::try_new(
|
||||
@@ -1726,14 +1687,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let source_table_obj = Table::new(source_table.clone(), db);
|
||||
source_table_obj
|
||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch_source)],
|
||||
schema.clone(),
|
||||
)))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
source_table_obj.add(batch_source).execute().await.unwrap();
|
||||
|
||||
// Verify they have evolved independently
|
||||
assert_eq!(source_table.count_rows(None).await.unwrap(), 4); // 2 + 2
|
||||
@@ -1751,16 +1705,11 @@ mod tests {
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![1, 2]))])
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch1)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let source_table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "latest_version_source".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch1),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1779,14 +1728,7 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let source_table_obj = Table::new(source_table.clone(), db.clone());
|
||||
source_table_obj
|
||||
.add(Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
)))
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
source_table_obj.add(batch).execute().await.unwrap();
|
||||
}
|
||||
|
||||
// Source should have 8 rows total (2 + 2 + 2 + 2)
|
||||
@@ -1849,16 +1791,11 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let table = db
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_stable".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -1887,11 +1824,6 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let mut storage_options = HashMap::new();
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
@@ -1914,7 +1846,7 @@ mod tests {
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_stable_table_level".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options,
|
||||
location: None,
|
||||
@@ -1963,11 +1895,6 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let reader = Box::new(arrow_array::RecordBatchIterator::new(
|
||||
vec![Ok(batch)],
|
||||
schema.clone(),
|
||||
));
|
||||
|
||||
let mut storage_options = HashMap::new();
|
||||
storage_options.insert(
|
||||
OPT_NEW_TABLE_ENABLE_STABLE_ROW_IDS.to_string(),
|
||||
@@ -1990,7 +1917,7 @@ mod tests {
|
||||
.create_table(CreateTableRequest {
|
||||
name: "test_override".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Data(reader),
|
||||
data: Box::new(batch),
|
||||
mode: CreateTableMode::Create,
|
||||
write_options,
|
||||
location: None,
|
||||
@@ -2108,7 +2035,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "table1".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
|
||||
data: Box::new(RecordBatch::new_empty(schema.clone())) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
@@ -2120,7 +2047,7 @@ mod tests {
|
||||
db.create_table(CreateTableRequest {
|
||||
name: "table2".to_string(),
|
||||
namespace: vec![],
|
||||
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
|
||||
data: Box::new(RecordBatch::new_empty(schema)) as Box<dyn Scannable>,
|
||||
mode: CreateTableMode::Create,
|
||||
write_options: Default::default(),
|
||||
location: None,
|
||||
|
||||
@@ -354,15 +354,13 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::connect_namespace;
|
||||
use crate::query::ExecutableQuery;
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator, StringArray};
|
||||
use arrow_array::{Int32Array, RecordBatch, StringArray};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
use futures::TryStreamExt;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// Helper function to create test data
|
||||
fn create_test_data() -> RecordBatchIterator<
|
||||
std::vec::IntoIter<std::result::Result<RecordBatch, arrow_schema::ArrowError>>,
|
||||
> {
|
||||
fn create_test_data() -> RecordBatch {
|
||||
let schema = Arc::new(Schema::new(vec![
|
||||
Field::new("id", DataType::Int32, false),
|
||||
Field::new("name", DataType::Utf8, false),
|
||||
@@ -371,12 +369,7 @@ mod tests {
|
||||
let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
|
||||
let name_array = StringArray::from(vec!["Alice", "Bob", "Charlie", "David", "Eve"]);
|
||||
|
||||
let batch = RecordBatch::try_new(
|
||||
schema.clone(),
|
||||
vec![Arc::new(id_array), Arc::new(name_array)],
|
||||
)
|
||||
.unwrap();
|
||||
RecordBatchIterator::new(vec![std::result::Result::Ok(batch)].into_iter(), schema)
|
||||
RecordBatch::try_new(schema, vec![Arc::new(id_array), Arc::new(name_array)]).unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -618,13 +611,7 @@ mod tests {
|
||||
|
||||
// Test: Overwrite the table
|
||||
let table2 = conn
|
||||
.create_table(
|
||||
"overwrite_test",
|
||||
RecordBatchIterator::new(
|
||||
vec![std::result::Result::Ok(test_data2)].into_iter(),
|
||||
schema,
|
||||
),
|
||||
)
|
||||
.create_table("overwrite_test", test_data2)
|
||||
.namespace(vec!["test_ns".into()])
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.execute()
|
||||
|
||||
@@ -13,7 +13,7 @@ use lance_datafusion::exec::SessionContextExt;
|
||||
use crate::{
|
||||
arrow::{SendableRecordBatchStream, SendableRecordBatchStreamExt, SimpleRecordBatchStream},
|
||||
connect,
|
||||
database::{CreateTableData, CreateTableRequest, Database},
|
||||
database::{CreateTableRequest, Database},
|
||||
dataloader::permutation::{
|
||||
shuffle::{Shuffler, ShufflerConfig},
|
||||
split::{SplitStrategy, Splitter, SPLIT_ID_COLUMN},
|
||||
@@ -313,10 +313,8 @@ impl PermutationBuilder {
|
||||
}
|
||||
};
|
||||
|
||||
let create_table_request = CreateTableRequest::new(
|
||||
name.to_string(),
|
||||
CreateTableData::StreamingData(streaming_data),
|
||||
);
|
||||
let create_table_request =
|
||||
CreateTableRequest::new(name.to_string(), Box::new(streaming_data));
|
||||
|
||||
let table = database.create_table(create_table_request).await?;
|
||||
|
||||
@@ -347,7 +345,7 @@ mod tests {
|
||||
.col("col_b", lance_datagen::array::step::<Int32Type>())
|
||||
.into_ldb_stream(RowCount::from(100), BatchCount::from(10));
|
||||
let data_table = db
|
||||
.create_table_streaming("base_tbl", initial_data)
|
||||
.create_table("base_tbl", initial_data)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -387,7 +385,7 @@ mod tests {
|
||||
.col("some_value", lance_datagen::array::step::<Int32Type>())
|
||||
.into_ldb_stream(RowCount::from(100), BatchCount::from(10));
|
||||
let data_table = db
|
||||
.create_table_streaming("mytbl", initial_data)
|
||||
.create_table("mytbl", initial_data)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
@@ -39,6 +39,9 @@ pub struct PermutationReader {
|
||||
limit: Option<u64>,
|
||||
available_rows: u64,
|
||||
split: u64,
|
||||
// Cached map of offset to row id for the split
|
||||
#[allow(clippy::type_complexity)]
|
||||
offset_map: Arc<tokio::sync::Mutex<Option<Arc<HashMap<u64, u64>>>>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for PermutationReader {
|
||||
@@ -72,6 +75,7 @@ impl PermutationReader {
|
||||
limit: None,
|
||||
available_rows: 0,
|
||||
split,
|
||||
offset_map: Arc::new(tokio::sync::Mutex::new(None)),
|
||||
};
|
||||
slf.validate().await?;
|
||||
// Calculate the number of available rows
|
||||
@@ -157,6 +161,7 @@ impl PermutationReader {
|
||||
let available_rows = self.verify_limit_offset(self.limit, Some(offset)).await?;
|
||||
self.offset = Some(offset);
|
||||
self.available_rows = available_rows;
|
||||
self.offset_map = Arc::new(tokio::sync::Mutex::new(None));
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
@@ -164,6 +169,7 @@ impl PermutationReader {
|
||||
let available_rows = self.verify_limit_offset(Some(limit), self.offset).await?;
|
||||
self.available_rows = available_rows;
|
||||
self.limit = Some(limit);
|
||||
self.offset_map = Arc::new(tokio::sync::Mutex::new(None));
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
@@ -180,8 +186,9 @@ impl PermutationReader {
|
||||
base_table: &Arc<dyn BaseTable>,
|
||||
row_ids: RecordBatch,
|
||||
selection: Select,
|
||||
has_row_id: bool,
|
||||
) -> Result<RecordBatch> {
|
||||
let has_row_id = Self::has_row_id(&selection)?;
|
||||
|
||||
let num_rows = row_ids.num_rows();
|
||||
let row_ids = row_ids
|
||||
.column(0)
|
||||
@@ -282,14 +289,13 @@ impl PermutationReader {
|
||||
row_ids: DatasetRecordBatchStream,
|
||||
selection: Select,
|
||||
) -> Result<SendableRecordBatchStream> {
|
||||
let has_row_id = Self::has_row_id(&selection)?;
|
||||
let mut stream = row_ids
|
||||
.map_err(Error::from)
|
||||
.try_filter_map(move |batch| {
|
||||
let selection = selection.clone();
|
||||
let base_table = base_table.clone();
|
||||
async move {
|
||||
Self::load_batch(&base_table, batch, selection, has_row_id)
|
||||
Self::load_batch(&base_table, batch, selection)
|
||||
.await
|
||||
.map(Some)
|
||||
}
|
||||
@@ -397,6 +403,82 @@ impl PermutationReader {
|
||||
Self::row_ids_to_batches(self.base_table.clone(), row_ids, selection).await
|
||||
}
|
||||
|
||||
/// If we are going to use `take` then we load the offset -> row id map once for the split and cache it
|
||||
///
|
||||
/// This method fetches the map with find-or-create semantics.
|
||||
async fn get_offset_map(
|
||||
&self,
|
||||
permutation_table: &Arc<dyn BaseTable>,
|
||||
) -> Result<Arc<HashMap<u64, u64>>> {
|
||||
let mut offset_map_ref = self.offset_map.lock().await;
|
||||
if let Some(offset_map) = &*offset_map_ref {
|
||||
return Ok(offset_map.clone());
|
||||
}
|
||||
let mut offset_map = HashMap::new();
|
||||
let mut row_ids_query = Table::from(permutation_table.clone())
|
||||
.query()
|
||||
.select(Select::Columns(vec![SRC_ROW_ID_COL.to_string()]))
|
||||
.only_if(format!("{} = {}", SPLIT_ID_COLUMN, self.split));
|
||||
if let Some(offset) = self.offset {
|
||||
row_ids_query = row_ids_query.offset(offset as usize);
|
||||
}
|
||||
if let Some(limit) = self.limit {
|
||||
row_ids_query = row_ids_query.limit(limit as usize);
|
||||
}
|
||||
let mut row_ids = row_ids_query.execute().await?;
|
||||
while let Some(batch) = row_ids.try_next().await? {
|
||||
let row_ids = batch
|
||||
.column(0)
|
||||
.as_primitive::<UInt64Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
for (i, row_id) in row_ids.iter().enumerate() {
|
||||
offset_map.insert(i as u64, *row_id);
|
||||
}
|
||||
}
|
||||
let offset_map = Arc::new(offset_map);
|
||||
*offset_map_ref = Some(offset_map.clone());
|
||||
Ok(offset_map)
|
||||
}
|
||||
|
||||
pub async fn take_offsets(&self, offsets: &[u64], selection: Select) -> Result<RecordBatch> {
|
||||
if let Some(permutation_table) = &self.permutation_table {
|
||||
let offset_map = self.get_offset_map(permutation_table).await?;
|
||||
let row_ids = offsets
|
||||
.iter()
|
||||
.map(|o| offset_map.get(o).copied().expect_ok().map_err(Error::from))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
let row_ids = RecordBatch::try_new(
|
||||
Arc::new(arrow_schema::Schema::new(vec![arrow_schema::Field::new(
|
||||
"row_id",
|
||||
arrow_schema::DataType::UInt64,
|
||||
false,
|
||||
)])),
|
||||
vec![Arc::new(UInt64Array::from(row_ids))],
|
||||
)?;
|
||||
Self::load_batch(&self.base_table, row_ids, selection).await
|
||||
} else {
|
||||
let table = Table::from(self.base_table.clone());
|
||||
let batches = table
|
||||
.take_offsets(offsets.to_vec())
|
||||
.select(selection.clone())
|
||||
.execute()
|
||||
.await?
|
||||
.try_collect::<Vec<_>>()
|
||||
.await?;
|
||||
if let Some(first_batch) = batches.first() {
|
||||
let schema = first_batch.schema();
|
||||
let batch = arrow::compute::concat_batches(&schema, &batches)?;
|
||||
Ok(batch)
|
||||
} else {
|
||||
Ok(RecordBatch::try_new(
|
||||
self.output_schema(selection).await?,
|
||||
vec![],
|
||||
)?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn output_schema(&self, selection: Select) -> Result<SchemaRef> {
|
||||
let table = Table::from(self.base_table.clone());
|
||||
table.query().select(selection).output_schema().await
|
||||
@@ -543,4 +625,224 @@ mod tests {
|
||||
check_batch(&mut stream, &row_ids[7..9]).await;
|
||||
assert!(stream.try_next().await.unwrap().is_none());
|
||||
}
|
||||
|
||||
/// Helper to create a base table and permutation table for take_offsets tests.
|
||||
/// Returns (base_table, row_ids_table, shuffled_row_ids).
|
||||
async fn setup_permutation_tables(num_rows: usize) -> (Table, Table, Vec<u64>) {
|
||||
let base_table = lance_datagen::gen_batch()
|
||||
.col("idx", lance_datagen::array::step::<Int32Type>())
|
||||
.col("other_col", lance_datagen::array::step::<UInt64Type>())
|
||||
.into_mem_table("tbl", RowCount::from(num_rows as u64), BatchCount::from(1))
|
||||
.await;
|
||||
|
||||
let mut row_ids = collect_column::<UInt64Type>(&base_table, "_rowid").await;
|
||||
row_ids.shuffle(&mut rand::rng());
|
||||
|
||||
let split_ids = UInt64Array::from_iter_values(std::iter::repeat_n(0u64, row_ids.len()));
|
||||
let permutation_batch = RecordBatch::try_new(
|
||||
Arc::new(Schema::new(vec![
|
||||
Field::new("row_id", DataType::UInt64, false),
|
||||
Field::new(SPLIT_ID_COLUMN, DataType::UInt64, false),
|
||||
])),
|
||||
vec![
|
||||
Arc::new(UInt64Array::from(row_ids.clone())),
|
||||
Arc::new(split_ids),
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let row_ids_table = virtual_table("row_ids", &permutation_batch).await;
|
||||
|
||||
(base_table, row_ids_table, row_ids)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_with_permutation_table() {
|
||||
let (base_table, row_ids_table, row_ids) = setup_permutation_tables(10).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Take specific offsets and verify the returned rows match the permutation order
|
||||
let offsets = vec![0, 2, 4];
|
||||
let batch = reader.take_offsets(&offsets, Select::All).await.unwrap();
|
||||
|
||||
assert_eq!(batch.num_rows(), 3);
|
||||
|
||||
let idx_values = batch
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
let expected: Vec<i32> = offsets
|
||||
.iter()
|
||||
.map(|&o| row_ids[o as usize] as i32)
|
||||
.collect();
|
||||
assert_eq!(idx_values, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_preserves_order() {
|
||||
let (base_table, row_ids_table, row_ids) = setup_permutation_tables(10).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Take offsets in reverse order and verify returned rows match that order
|
||||
let offsets = vec![5, 3, 1, 0];
|
||||
let batch = reader.take_offsets(&offsets, Select::All).await.unwrap();
|
||||
|
||||
assert_eq!(batch.num_rows(), 4);
|
||||
|
||||
let idx_values = batch
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
let expected: Vec<i32> = offsets
|
||||
.iter()
|
||||
.map(|&o| row_ids[o as usize] as i32)
|
||||
.collect();
|
||||
assert_eq!(idx_values, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_with_column_selection() {
|
||||
let (base_table, row_ids_table, row_ids) = setup_permutation_tables(10).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let offsets = vec![1, 3];
|
||||
let batch = reader
|
||||
.take_offsets(&offsets, Select::Columns(vec!["idx".to_string()]))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(batch.num_rows(), 2);
|
||||
assert_eq!(batch.num_columns(), 1);
|
||||
assert_eq!(batch.schema().field(0).name(), "idx");
|
||||
|
||||
let idx_values = batch
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
let expected: Vec<i32> = offsets
|
||||
.iter()
|
||||
.map(|&o| row_ids[o as usize] as i32)
|
||||
.collect();
|
||||
assert_eq!(idx_values, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_invalid_offset() {
|
||||
let (base_table, row_ids_table, _) = setup_permutation_tables(5).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Offset 999 doesn't exist in the offset map
|
||||
let result = reader.take_offsets(&[0, 999], Select::All).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_identity_reader() {
|
||||
let base_table = lance_datagen::gen_batch()
|
||||
.col("idx", lance_datagen::array::step::<Int32Type>())
|
||||
.into_mem_table("tbl", RowCount::from(10), BatchCount::from(1))
|
||||
.await;
|
||||
|
||||
let reader = PermutationReader::identity(base_table.base_table().clone()).await;
|
||||
|
||||
// With no permutation table, take_offsets uses the base table directly
|
||||
let offsets = vec![0, 2, 4, 6];
|
||||
let batch = reader.take_offsets(&offsets, Select::All).await.unwrap();
|
||||
|
||||
assert_eq!(batch.num_rows(), 4);
|
||||
|
||||
let idx_values = batch
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
assert_eq!(idx_values, vec![0, 2, 4, 6]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_caches_offset_map() {
|
||||
let (base_table, row_ids_table, row_ids) = setup_permutation_tables(10).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// First call populates the cache
|
||||
let batch1 = reader.take_offsets(&[0, 1], Select::All).await.unwrap();
|
||||
|
||||
// Second call should use the cached offset map and produce consistent results
|
||||
let batch2 = reader.take_offsets(&[0, 1], Select::All).await.unwrap();
|
||||
|
||||
let values1 = batch1
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
let values2 = batch2
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
assert_eq!(values1, values2);
|
||||
|
||||
let expected: Vec<i32> = vec![row_ids[0] as i32, row_ids[1] as i32];
|
||||
assert_eq!(values1, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_take_offsets_single_offset() {
|
||||
let (base_table, row_ids_table, row_ids) = setup_permutation_tables(5).await;
|
||||
|
||||
let reader = PermutationReader::try_from_tables(
|
||||
base_table.base_table().clone(),
|
||||
row_ids_table.base_table().clone(),
|
||||
0,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let batch = reader.take_offsets(&[2], Select::All).await.unwrap();
|
||||
|
||||
assert_eq!(batch.num_rows(), 1);
|
||||
let idx_values = batch
|
||||
.column(0)
|
||||
.as_primitive::<Int32Type>()
|
||||
.values()
|
||||
.to_vec();
|
||||
assert_eq!(idx_values, vec![row_ids[2] as i32]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use std::{
|
||||
};
|
||||
|
||||
use arrow_array::{Array, RecordBatch, RecordBatchReader};
|
||||
use arrow_schema::{DataType, Field, SchemaBuilder};
|
||||
use arrow_schema::{DataType, Field, SchemaBuilder, SchemaRef};
|
||||
// use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
@@ -190,6 +190,112 @@ impl<R: RecordBatchReader> WithEmbeddings<R> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute embedding arrays for a batch.
|
||||
///
|
||||
/// When multiple embedding functions are defined, they are computed in parallel using
|
||||
/// scoped threads. For a single embedding function, computation is done inline.
|
||||
fn compute_embedding_arrays(
|
||||
batch: &RecordBatch,
|
||||
embeddings: &[(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)],
|
||||
) -> Result<Vec<Arc<dyn Array>>> {
|
||||
if embeddings.len() == 1 {
|
||||
let (fld, func) = &embeddings[0];
|
||||
let src_column =
|
||||
batch
|
||||
.column_by_name(&fld.source_column)
|
||||
.ok_or_else(|| Error::InvalidInput {
|
||||
message: format!("Source column '{}' not found", fld.source_column),
|
||||
})?;
|
||||
return Ok(vec![func.compute_source_embeddings(src_column.clone())?]);
|
||||
}
|
||||
|
||||
// Parallel path: multiple embeddings
|
||||
std::thread::scope(|s| {
|
||||
let handles: Vec<_> = embeddings
|
||||
.iter()
|
||||
.map(|(fld, func)| {
|
||||
let src_column = batch.column_by_name(&fld.source_column).ok_or_else(|| {
|
||||
Error::InvalidInput {
|
||||
message: format!("Source column '{}' not found", fld.source_column),
|
||||
}
|
||||
})?;
|
||||
|
||||
let handle = s.spawn(move || func.compute_source_embeddings(src_column.clone()));
|
||||
|
||||
Ok(handle)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
handles
|
||||
.into_iter()
|
||||
.map(|h| {
|
||||
h.join().map_err(|e| Error::Runtime {
|
||||
message: format!("Thread panicked during embedding computation: {:?}", e),
|
||||
})?
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
/// Compute the output schema when embeddings are applied to a base schema.
|
||||
///
|
||||
/// This returns the schema with embedding columns appended.
|
||||
pub fn compute_output_schema(
|
||||
base_schema: &SchemaRef,
|
||||
embeddings: &[(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)],
|
||||
) -> Result<SchemaRef> {
|
||||
let mut sb: SchemaBuilder = base_schema.as_ref().into();
|
||||
|
||||
for (ed, func) in embeddings {
|
||||
let src_field = base_schema
|
||||
.field_with_name(&ed.source_column)
|
||||
.map_err(|_| Error::InvalidInput {
|
||||
message: format!("Source column '{}' not found in schema", ed.source_column),
|
||||
})?;
|
||||
|
||||
let field_name = ed
|
||||
.dest_column
|
||||
.clone()
|
||||
.unwrap_or_else(|| format!("{}_embedding", &ed.source_column));
|
||||
|
||||
sb.push(Field::new(
|
||||
field_name,
|
||||
func.dest_type()?.into_owned(),
|
||||
src_field.is_nullable(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(Arc::new(sb.finish()))
|
||||
}
|
||||
|
||||
/// Compute embeddings for a batch and append as new columns.
|
||||
///
|
||||
/// This function computes embeddings using the provided embedding functions and
|
||||
/// appends them as new columns to the batch.
|
||||
pub fn compute_embeddings_for_batch(
|
||||
batch: RecordBatch,
|
||||
embeddings: &[(EmbeddingDefinition, Arc<dyn EmbeddingFunction>)],
|
||||
) -> Result<RecordBatch> {
|
||||
let embedding_arrays = compute_embedding_arrays(&batch, embeddings)?;
|
||||
|
||||
let mut result = batch;
|
||||
for ((fld, _), embedding) in embeddings.iter().zip(embedding_arrays.iter()) {
|
||||
let dst_field_name = fld
|
||||
.dest_column
|
||||
.clone()
|
||||
.unwrap_or_else(|| format!("{}_embedding", &fld.source_column));
|
||||
|
||||
let dst_field = Field::new(
|
||||
dst_field_name,
|
||||
embedding.data_type().clone(),
|
||||
embedding.nulls().is_some(),
|
||||
);
|
||||
|
||||
result = result.try_with_column(dst_field, embedding.clone())?;
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
impl<R: RecordBatchReader> WithEmbeddings<R> {
|
||||
fn dest_fields(&self) -> Result<Vec<Field>> {
|
||||
let schema = self.inner.schema();
|
||||
@@ -240,48 +346,6 @@ impl<R: RecordBatchReader> WithEmbeddings<R> {
|
||||
column_definitions,
|
||||
})
|
||||
}
|
||||
|
||||
fn compute_embeddings_parallel(&self, batch: &RecordBatch) -> Result<Vec<Arc<dyn Array>>> {
|
||||
if self.embeddings.len() == 1 {
|
||||
let (fld, func) = &self.embeddings[0];
|
||||
let src_column =
|
||||
batch
|
||||
.column_by_name(&fld.source_column)
|
||||
.ok_or_else(|| Error::InvalidInput {
|
||||
message: format!("Source column '{}' not found", fld.source_column),
|
||||
})?;
|
||||
return Ok(vec![func.compute_source_embeddings(src_column.clone())?]);
|
||||
}
|
||||
|
||||
// Parallel path: multiple embeddings
|
||||
std::thread::scope(|s| {
|
||||
let handles: Vec<_> = self
|
||||
.embeddings
|
||||
.iter()
|
||||
.map(|(fld, func)| {
|
||||
let src_column = batch.column_by_name(&fld.source_column).ok_or_else(|| {
|
||||
Error::InvalidInput {
|
||||
message: format!("Source column '{}' not found", fld.source_column),
|
||||
}
|
||||
})?;
|
||||
|
||||
let handle =
|
||||
s.spawn(move || func.compute_source_embeddings(src_column.clone()));
|
||||
|
||||
Ok(handle)
|
||||
})
|
||||
.collect::<Result<_>>()?;
|
||||
|
||||
handles
|
||||
.into_iter()
|
||||
.map(|h| {
|
||||
h.join().map_err(|e| Error::Runtime {
|
||||
message: format!("Thread panicked during embedding computation: {:?}", e),
|
||||
})?
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: RecordBatchReader> Iterator for MaybeEmbedded<R> {
|
||||
@@ -309,37 +373,13 @@ impl<R: RecordBatchReader> Iterator for WithEmbeddings<R> {
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let batch = self.inner.next()?;
|
||||
match batch {
|
||||
Ok(batch) => {
|
||||
let embeddings = match self.compute_embeddings_parallel(&batch) {
|
||||
Ok(emb) => emb,
|
||||
Err(e) => {
|
||||
return Some(Err(arrow_schema::ArrowError::ComputeError(format!(
|
||||
"Error computing embedding: {}",
|
||||
e
|
||||
))))
|
||||
}
|
||||
};
|
||||
|
||||
let mut batch = batch;
|
||||
for ((fld, _), embedding) in self.embeddings.iter().zip(embeddings.iter()) {
|
||||
let dst_field_name = fld
|
||||
.dest_column
|
||||
.clone()
|
||||
.unwrap_or_else(|| format!("{}_embedding", &fld.source_column));
|
||||
|
||||
let dst_field = Field::new(
|
||||
dst_field_name,
|
||||
embedding.data_type().clone(),
|
||||
embedding.nulls().is_some(),
|
||||
);
|
||||
|
||||
match batch.try_with_column(dst_field.clone(), embedding.clone()) {
|
||||
Ok(b) => batch = b,
|
||||
Err(e) => return Some(Err(e)),
|
||||
};
|
||||
}
|
||||
Some(Ok(batch))
|
||||
}
|
||||
Ok(batch) => match compute_embeddings_for_batch(batch, &self.embeddings) {
|
||||
Ok(batch_with_embeddings) => Some(Ok(batch_with_embeddings)),
|
||||
Err(e) => Some(Err(arrow_schema::ArrowError::ComputeError(format!(
|
||||
"Error computing embedding: {}",
|
||||
e
|
||||
)))),
|
||||
},
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::sync::PoisonError;
|
||||
use arrow_schema::ArrowError;
|
||||
use snafu::Snafu;
|
||||
|
||||
type BoxError = Box<dyn std::error::Error + Send + Sync>;
|
||||
pub(crate) type BoxError = Box<dyn std::error::Error + Send + Sync>;
|
||||
|
||||
#[derive(Debug, Snafu)]
|
||||
#[snafu(visibility(pub(crate)))]
|
||||
@@ -80,6 +80,9 @@ pub enum Error {
|
||||
Arrow { source: ArrowError },
|
||||
#[snafu(display("LanceDBError: not supported: {message}"))]
|
||||
NotSupported { message: String },
|
||||
/// External error pass through from user code.
|
||||
#[snafu(transparent)]
|
||||
External { source: BoxError },
|
||||
#[snafu(whatever, display("{message}"))]
|
||||
Other {
|
||||
message: String,
|
||||
@@ -92,15 +95,26 @@ pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl From<ArrowError> for Error {
|
||||
fn from(source: ArrowError) -> Self {
|
||||
Self::Arrow { source }
|
||||
match source {
|
||||
ArrowError::ExternalError(source) => match source.downcast::<Self>() {
|
||||
Ok(e) => *e,
|
||||
Err(source) => Self::External { source },
|
||||
},
|
||||
_ => Self::Arrow { source },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lance::Error> for Error {
|
||||
fn from(source: lance::Error) -> Self {
|
||||
// TODO: Once Lance is changed to preserve ObjectStore, DataFusion, and Arrow errors, we can
|
||||
// pass those variants through here as well.
|
||||
Self::Lance { source }
|
||||
// Try to unwrap external errors that were wrapped by lance
|
||||
match source {
|
||||
lance::Error::Wrapped { error, .. } => match error.downcast::<Self>() {
|
||||
Ok(e) => *e,
|
||||
Err(source) => Self::External { source },
|
||||
},
|
||||
_ => Self::Lance { source },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -218,8 +218,9 @@ mod test {
|
||||
datagen = datagen.col(Box::<IncrementingInt32>::default());
|
||||
datagen = datagen.col(Box::new(RandomVector::default().named("vector".into())));
|
||||
|
||||
let data: Box<dyn arrow_array::RecordBatchReader + Send> = Box::new(datagen.batch(100));
|
||||
let res = db
|
||||
.create_table("test", Box::new(datagen.batch(100)))
|
||||
.create_table("test", data)
|
||||
.write_options(WriteOptions {
|
||||
lance_write_params: Some(param),
|
||||
})
|
||||
|
||||
@@ -12,10 +12,10 @@ use arrow_schema::Schema;
|
||||
use crate::{Error, Result};
|
||||
|
||||
/// Convert a Arrow IPC file to a batch reader
|
||||
pub fn ipc_file_to_batches(buf: Vec<u8>) -> Result<impl RecordBatchReader> {
|
||||
pub fn ipc_file_to_batches(buf: Vec<u8>) -> Result<Box<dyn RecordBatchReader + Send>> {
|
||||
let buf_reader = Cursor::new(buf);
|
||||
let reader = FileReader::try_new(buf_reader, None)?;
|
||||
Ok(reader)
|
||||
Ok(Box::new(reader))
|
||||
}
|
||||
|
||||
/// Convert record batches to Arrow IPC file
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
//! #### Connect to a database.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use arrow_schema::{Field, Schema};
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! let db = lancedb::connect("data/sample-lancedb").execute().await.unwrap();
|
||||
//! # });
|
||||
@@ -74,7 +73,10 @@
|
||||
//!
|
||||
//! #### Create a table
|
||||
//!
|
||||
//! To create a Table, you need to provide a [`arrow_schema::Schema`] and a [`arrow_array::RecordBatch`] stream.
|
||||
//! To create a Table, you need to provide an [`arrow_array::RecordBatch`]. The
|
||||
//! schema of the `RecordBatch` determines the schema of the table.
|
||||
//!
|
||||
//! Vector columns should be represented as `FixedSizeList<Float16/Float32>` data type.
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use std::sync::Arc;
|
||||
@@ -85,34 +87,29 @@
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! # let tmpdir = tempfile::tempdir().unwrap();
|
||||
//! # let db = lancedb::connect(tmpdir.path().to_str().unwrap()).execute().await.unwrap();
|
||||
//! let ndims = 128;
|
||||
//! let schema = Arc::new(Schema::new(vec![
|
||||
//! Field::new("id", DataType::Int32, false),
|
||||
//! Field::new(
|
||||
//! "vector",
|
||||
//! DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), 128),
|
||||
//! DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Float32, true)), ndims),
|
||||
//! true,
|
||||
//! ),
|
||||
//! ]));
|
||||
//! // Create a RecordBatch stream.
|
||||
//! let batches = RecordBatchIterator::new(
|
||||
//! vec![RecordBatch::try_new(
|
||||
//! let data = RecordBatch::try_new(
|
||||
//! schema.clone(),
|
||||
//! vec![
|
||||
//! Arc::new(Int32Array::from_iter_values(0..256)),
|
||||
//! Arc::new(
|
||||
//! FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
//! (0..256).map(|_| Some(vec![Some(1.0); 128])),
|
||||
//! 128,
|
||||
//! (0..256).map(|_| Some(vec![Some(1.0); ndims as usize])),
|
||||
//! ndims,
|
||||
//! ),
|
||||
//! ),
|
||||
//! ],
|
||||
//! )
|
||||
//! .unwrap()]
|
||||
//! .into_iter()
|
||||
//! .map(Ok),
|
||||
//! schema.clone(),
|
||||
//! );
|
||||
//! db.create_table("my_table", Box::new(batches))
|
||||
//! .unwrap();
|
||||
//! db.create_table("my_table", data)
|
||||
//! .execute()
|
||||
//! .await
|
||||
//! .unwrap();
|
||||
@@ -151,42 +148,18 @@
|
||||
//! #### Open table and search
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use std::sync::Arc;
|
||||
//! # use futures::TryStreamExt;
|
||||
//! # use arrow_schema::{DataType, Schema, Field};
|
||||
//! # use arrow_array::{RecordBatch, RecordBatchIterator};
|
||||
//! # use arrow_array::{FixedSizeListArray, Float32Array, Int32Array, types::Float32Type};
|
||||
//! # use lancedb::query::{ExecutableQuery, QueryBase};
|
||||
//! # tokio::runtime::Runtime::new().unwrap().block_on(async {
|
||||
//! # let tmpdir = tempfile::tempdir().unwrap();
|
||||
//! # let db = lancedb::connect(tmpdir.path().to_str().unwrap()).execute().await.unwrap();
|
||||
//! # let schema = Arc::new(Schema::new(vec![
|
||||
//! # Field::new("id", DataType::Int32, false),
|
||||
//! # Field::new("vector", DataType::FixedSizeList(
|
||||
//! # Arc::new(Field::new("item", DataType::Float32, true)), 128), true),
|
||||
//! # ]));
|
||||
//! # let batches = RecordBatchIterator::new(vec![
|
||||
//! # RecordBatch::try_new(schema.clone(),
|
||||
//! # vec![
|
||||
//! # Arc::new(Int32Array::from_iter_values(0..10)),
|
||||
//! # Arc::new(FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(
|
||||
//! # (0..10).map(|_| Some(vec![Some(1.0); 128])), 128)),
|
||||
//! # ]).unwrap()
|
||||
//! # ].into_iter().map(Ok),
|
||||
//! # schema.clone());
|
||||
//! # db.create_table("my_table", Box::new(batches)).execute().await.unwrap();
|
||||
//! # let table = db.open_table("my_table").execute().await.unwrap();
|
||||
//! # async fn example(table: &lancedb::Table) -> lancedb::Result<()> {
|
||||
//! let results = table
|
||||
//! .query()
|
||||
//! .nearest_to(&[1.0; 128])
|
||||
//! .unwrap()
|
||||
//! .nearest_to(&[1.0; 128])?
|
||||
//! .execute()
|
||||
//! .await
|
||||
//! .unwrap()
|
||||
//! .await?
|
||||
//! .try_collect::<Vec<_>>()
|
||||
//! .await
|
||||
//! .unwrap();
|
||||
//! # });
|
||||
//! .await?;
|
||||
//! # Ok(())
|
||||
//! # }
|
||||
//! ```
|
||||
|
||||
pub mod arrow;
|
||||
|
||||
@@ -1381,7 +1381,7 @@ mod tests {
|
||||
use arrow::{array::downcast_array, compute::concat_batches, datatypes::Int32Type};
|
||||
use arrow_array::{
|
||||
cast::AsArray, types::Float32Type, FixedSizeListArray, Float32Array, Int32Array,
|
||||
RecordBatch, RecordBatchIterator, RecordBatchReader, StringArray,
|
||||
RecordBatch, StringArray,
|
||||
};
|
||||
use arrow_schema::{DataType, Field as ArrowField, Schema as ArrowSchema};
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
@@ -1402,7 +1402,7 @@ mod tests {
|
||||
let batches = make_test_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", Box::new(batches))
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1463,7 +1463,7 @@ mod tests {
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", Box::new(batches))
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1525,7 +1525,7 @@ mod tests {
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", Box::new(batches))
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1578,7 +1578,7 @@ mod tests {
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
let table = conn
|
||||
.create_table("my_table", Box::new(batches))
|
||||
.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1599,13 +1599,13 @@ mod tests {
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
fn make_non_empty_batches() -> impl RecordBatchReader + Send + 'static {
|
||||
fn make_non_empty_batches() -> Box<dyn arrow_array::RecordBatchReader + Send> {
|
||||
let vec = Box::new(RandomVector::new().named("vector".to_string()));
|
||||
let id = Box::new(IncrementingInt32::new().named("id".to_string()));
|
||||
BatchGenerator::new().col(vec).col(id).batch(512)
|
||||
Box::new(BatchGenerator::new().col(vec).col(id).batch(512))
|
||||
}
|
||||
|
||||
fn make_test_batches() -> impl RecordBatchReader + Send + 'static {
|
||||
fn make_test_batches() -> RecordBatch {
|
||||
let dim: usize = 128;
|
||||
let schema = Arc::new(ArrowSchema::new(vec![
|
||||
ArrowField::new("key", DataType::Int32, false),
|
||||
@@ -1619,12 +1619,7 @@ mod tests {
|
||||
),
|
||||
ArrowField::new("uri", DataType::Utf8, true),
|
||||
]));
|
||||
RecordBatchIterator::new(
|
||||
vec![RecordBatch::new_empty(schema.clone())]
|
||||
.into_iter()
|
||||
.map(Ok),
|
||||
schema,
|
||||
)
|
||||
RecordBatch::new_empty(schema)
|
||||
}
|
||||
|
||||
async fn make_test_table(tmp_dir: &tempfile::TempDir) -> Table {
|
||||
@@ -1633,7 +1628,7 @@ mod tests {
|
||||
|
||||
let batches = make_non_empty_batches();
|
||||
let conn = connect(uri).execute().await.unwrap();
|
||||
conn.create_table("my_table", Box::new(batches))
|
||||
conn.create_table("my_table", batches)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap()
|
||||
@@ -1862,10 +1857,8 @@ mod tests {
|
||||
|
||||
let record_batch =
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(text), Arc::new(vector)]).unwrap();
|
||||
let record_batch_iter =
|
||||
RecordBatchIterator::new(vec![record_batch].into_iter().map(Ok), schema.clone());
|
||||
let table = conn
|
||||
.create_table("my_table", record_batch_iter)
|
||||
.create_table("my_table", record_batch)
|
||||
.execute()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -1949,10 +1942,8 @@ mod tests {
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let record_batch_iter =
|
||||
RecordBatchIterator::new(vec![record_batch].into_iter().map(Ok), schema.clone());
|
||||
let table = conn
|
||||
.create_table("my_table", record_batch_iter)
|
||||
.create_table("my_table", record_batch)
|
||||
.mode(CreateTableMode::Overwrite)
|
||||
.execute()
|
||||
.await
|
||||
@@ -2062,8 +2053,6 @@ mod tests {
|
||||
async fn test_pagination_with_fts() {
|
||||
let db = connect("memory://test").execute().await.unwrap();
|
||||
let data = fts_test_data(400);
|
||||
let schema = data.schema();
|
||||
let data = RecordBatchIterator::new(vec![Ok(data)], schema);
|
||||
let table = db.create_table("test_table", data).execute().await.unwrap();
|
||||
|
||||
table
|
||||
|
||||
@@ -491,7 +491,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
}
|
||||
|
||||
/// Apply dynamic headers from the header provider if configured
|
||||
async fn apply_dynamic_headers(&self, mut request: Request) -> Result<Request> {
|
||||
pub(crate) async fn apply_dynamic_headers(&self, mut request: Request) -> Result<Request> {
|
||||
if let Some(ref provider) = self.header_provider {
|
||||
let headers = provider.get_headers().await?;
|
||||
let request_headers = request.headers_mut();
|
||||
@@ -555,7 +555,9 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
message: "Attempted to retry a request that cannot be cloned".to_string(),
|
||||
})?;
|
||||
let (_, r) = tmp_req.build_split();
|
||||
let mut r = r.unwrap();
|
||||
let mut r = r.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to build request: {}", e),
|
||||
})?;
|
||||
let request_id = self.extract_request_id(&mut r);
|
||||
let mut retry_counter = RetryCounter::new(retry_config, request_id.clone());
|
||||
|
||||
@@ -571,7 +573,9 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
}
|
||||
|
||||
let (c, request) = req_builder.build_split();
|
||||
let mut request = request.unwrap();
|
||||
let mut request = request.map_err(|e| Error::Runtime {
|
||||
message: format!("Failed to build request: {}", e),
|
||||
})?;
|
||||
self.set_request_id(&mut request, &request_id.clone());
|
||||
|
||||
// Apply dynamic headers before each retry attempt
|
||||
@@ -621,7 +625,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
|
||||
}
|
||||
}
|
||||
|
||||
fn log_request(&self, request: &Request, request_id: &String) {
|
||||
pub(crate) fn log_request(&self, request: &Request, request_id: &String) {
|
||||
if log::log_enabled!(log::Level::Debug) {
|
||||
let content_type = request
|
||||
.headers()
|
||||
|
||||
@@ -4,13 +4,11 @@
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use arrow_array::RecordBatchIterator;
|
||||
use async_trait::async_trait;
|
||||
use http::StatusCode;
|
||||
use lance_io::object_store::StorageOptions;
|
||||
use moka::future::Cache;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use tokio::task::spawn_blocking;
|
||||
|
||||
use lance_namespace::models::{
|
||||
CreateNamespaceRequest, CreateNamespaceResponse, DescribeNamespaceRequest,
|
||||
@@ -19,16 +17,17 @@ use lance_namespace::models::{
|
||||
};
|
||||
|
||||
use crate::database::{
|
||||
CloneTableRequest, CreateTableData, CreateTableMode, CreateTableRequest, Database,
|
||||
DatabaseOptions, OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
CloneTableRequest, CreateTableMode, CreateTableRequest, Database, DatabaseOptions,
|
||||
OpenTableRequest, ReadConsistency, TableNamesRequest,
|
||||
};
|
||||
use crate::error::Result;
|
||||
use crate::remote::util::stream_as_body;
|
||||
use crate::table::BaseTable;
|
||||
use crate::Error;
|
||||
|
||||
use super::client::{ClientConfig, HttpSend, RequestResultExt, RestfulLanceDbClient, Sender};
|
||||
use super::table::RemoteTable;
|
||||
use super::util::{batches_to_ipc_bytes, parse_server_version};
|
||||
use super::util::parse_server_version;
|
||||
use super::ARROW_STREAM_CONTENT_TYPE;
|
||||
|
||||
// Request structure for the remote clone table API
|
||||
@@ -436,26 +435,8 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn create_table(&self, request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let data = match request.data {
|
||||
CreateTableData::Data(data) => data,
|
||||
CreateTableData::StreamingData(_) => {
|
||||
return Err(Error::NotSupported {
|
||||
message: "Creating a remote table from a streaming source".to_string(),
|
||||
})
|
||||
}
|
||||
CreateTableData::Empty(table_definition) => {
|
||||
let schema = table_definition.schema.clone();
|
||||
Box::new(RecordBatchIterator::new(vec![], schema))
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: https://github.com/lancedb/lancedb/issues/1026
|
||||
// We should accept data from an async source. In the meantime, spawn this as blocking
|
||||
// to make sure we don't block the tokio runtime if the source is slow.
|
||||
let data_buffer = spawn_blocking(move || batches_to_ipc_bytes(data))
|
||||
.await
|
||||
.unwrap()?;
|
||||
async fn create_table(&self, mut request: CreateTableRequest) -> Result<Arc<dyn BaseTable>> {
|
||||
let body = stream_as_body(request.data.scan_as_stream())?;
|
||||
|
||||
let identifier =
|
||||
build_table_identifier(&request.name, &request.namespace, &self.client.id_delimiter);
|
||||
@@ -463,7 +444,7 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
|
||||
.client
|
||||
.post(&format!("/v1/table/{}/create/", identifier))
|
||||
.query(&[("mode", Into::<&str>::into(&request.mode))])
|
||||
.body(data_buffer)
|
||||
.body(body)
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
|
||||
let (request_id, rsp) = self.client.send(req).await?;
|
||||
@@ -813,7 +794,7 @@ mod tests {
|
||||
use std::collections::HashMap;
|
||||
use std::sync::{Arc, OnceLock};
|
||||
|
||||
use arrow_array::{Int32Array, RecordBatch, RecordBatchIterator};
|
||||
use arrow_array::{Int32Array, RecordBatch};
|
||||
use arrow_schema::{DataType, Field, Schema};
|
||||
|
||||
use crate::connection::ConnectBuilder;
|
||||
@@ -993,8 +974,7 @@ mod tests {
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let table = conn.create_table("table1", reader).execute().await.unwrap();
|
||||
let table = conn.create_table("table1", data).execute().await.unwrap();
|
||||
assert_eq!(table.name(), "table1");
|
||||
}
|
||||
|
||||
@@ -1011,8 +991,7 @@ mod tests {
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let result = conn.create_table("table1", reader).execute().await;
|
||||
let result = conn.create_table("table1", data).execute().await;
|
||||
assert!(result.is_err());
|
||||
assert!(
|
||||
matches!(result, Err(crate::Error::TableAlreadyExists { name }) if name == "table1")
|
||||
@@ -1045,8 +1024,7 @@ mod tests {
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let mut builder = conn.create_table("table1", reader);
|
||||
let mut builder = conn.create_table("table1", data.clone());
|
||||
if let Some(mode) = mode {
|
||||
builder = builder.mode(mode);
|
||||
}
|
||||
@@ -1071,9 +1049,8 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
let called: Arc<OnceLock<bool>> = Arc::new(OnceLock::new());
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let called_in_cb = called.clone();
|
||||
conn.create_table("table1", reader)
|
||||
conn.create_table("table1", data)
|
||||
.mode(CreateTableMode::ExistOk(Box::new(move |b| {
|
||||
called_in_cb.clone().set(true).unwrap();
|
||||
b
|
||||
@@ -1262,9 +1239,8 @@ mod tests {
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], data.schema());
|
||||
let table = conn
|
||||
.create_table("table1", reader)
|
||||
.create_table("table1", data)
|
||||
.namespace(vec!["ns1".to_string()])
|
||||
.execute()
|
||||
.await
|
||||
@@ -1730,10 +1706,8 @@ mod tests {
|
||||
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
|
||||
)
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
|
||||
|
||||
let table = conn
|
||||
.create_table("test_table", reader)
|
||||
.create_table("test_table", data)
|
||||
.namespace(namespace.clone())
|
||||
.execute()
|
||||
.await;
|
||||
@@ -1806,9 +1780,7 @@ mod tests {
|
||||
let data =
|
||||
RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![i]))])
|
||||
.unwrap();
|
||||
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
|
||||
|
||||
conn.create_table(format!("table{}", i), reader)
|
||||
conn.create_table(format!("table{}", i), data)
|
||||
.namespace(namespace.clone())
|
||||
.execute()
|
||||
.await
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
438
rust/lancedb/src/remote/table/insert.rs
Normal file
438
rust/lancedb/src/remote/table/insert.rs
Normal file
@@ -0,0 +1,438 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
//! DataFusion ExecutionPlan for inserting data into remote LanceDB tables.
|
||||
|
||||
use std::any::Any;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use arrow_array::{ArrayRef, RecordBatch, UInt64Array};
|
||||
use arrow_ipc::CompressionType;
|
||||
use arrow_schema::ArrowError;
|
||||
use datafusion_common::{DataFusionError, Result as DataFusionResult};
|
||||
use datafusion_execution::{SendableRecordBatchStream, TaskContext};
|
||||
use datafusion_physical_expr::EquivalenceProperties;
|
||||
use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
|
||||
use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties};
|
||||
use futures::StreamExt;
|
||||
use http::header::CONTENT_TYPE;
|
||||
|
||||
use crate::remote::client::{HttpSend, RestfulLanceDbClient, Sender};
|
||||
use crate::remote::table::RemoteTable;
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::table::datafusion::insert::COUNT_SCHEMA;
|
||||
use crate::table::AddResult;
|
||||
use crate::Error;
|
||||
|
||||
/// ExecutionPlan for inserting data into a remote LanceDB table.
|
||||
///
|
||||
/// This plan:
|
||||
/// 1. Requires single partition (no parallel remote inserts yet)
|
||||
/// 2. Streams data as Arrow IPC to `/v1/table/{id}/insert/` endpoint
|
||||
/// 3. Stores AddResult for retrieval after execution
|
||||
#[derive(Debug)]
|
||||
pub struct RemoteInsertExec<S: HttpSend = Sender> {
|
||||
table_name: String,
|
||||
identifier: String,
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
properties: PlanProperties,
|
||||
add_result: Arc<Mutex<Option<AddResult>>>,
|
||||
}
|
||||
|
||||
impl<S: HttpSend + 'static> RemoteInsertExec<S> {
|
||||
/// Create a new RemoteInsertExec.
|
||||
pub fn new(
|
||||
table_name: String,
|
||||
identifier: String,
|
||||
client: RestfulLanceDbClient<S>,
|
||||
input: Arc<dyn ExecutionPlan>,
|
||||
overwrite: bool,
|
||||
) -> Self {
|
||||
let schema = COUNT_SCHEMA.clone();
|
||||
let properties = PlanProperties::new(
|
||||
EquivalenceProperties::new(schema),
|
||||
datafusion_physical_plan::Partitioning::UnknownPartitioning(1),
|
||||
datafusion_physical_plan::execution_plan::EmissionType::Final,
|
||||
datafusion_physical_plan::execution_plan::Boundedness::Bounded,
|
||||
);
|
||||
|
||||
Self {
|
||||
table_name,
|
||||
identifier,
|
||||
client,
|
||||
input,
|
||||
overwrite,
|
||||
properties,
|
||||
add_result: Arc::new(Mutex::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the add result after execution.
|
||||
// TODO: this will be used when we wire this up to Table::add().
|
||||
#[allow(dead_code)]
|
||||
pub fn add_result(&self) -> Option<AddResult> {
|
||||
self.add_result.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
fn stream_as_body(data: SendableRecordBatchStream) -> DataFusionResult<reqwest::Body> {
|
||||
let options = arrow_ipc::writer::IpcWriteOptions::default()
|
||||
.try_with_compression(Some(CompressionType::LZ4_FRAME))?;
|
||||
let writer = arrow_ipc::writer::StreamWriter::try_new_with_options(
|
||||
Vec::new(),
|
||||
&data.schema(),
|
||||
options,
|
||||
)?;
|
||||
|
||||
let stream = futures::stream::try_unfold((data, writer), move |(mut data, mut writer)| {
|
||||
async move {
|
||||
match data.next().await {
|
||||
Some(Ok(batch)) => {
|
||||
writer.write(&batch)?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
Ok(Some((buffer, (data, writer))))
|
||||
}
|
||||
Some(Err(e)) => Err(e),
|
||||
None => {
|
||||
if let Err(ArrowError::IpcError(_msg)) = writer.finish() {
|
||||
// Will error if already closed.
|
||||
return Ok(None);
|
||||
};
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
Ok(Some((buffer, (data, writer))))
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(reqwest::Body::wrap_stream(stream))
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: HttpSend + 'static> DisplayAs for RemoteInsertExec<S> {
|
||||
fn fmt_as(&self, t: DisplayFormatType, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match t {
|
||||
DisplayFormatType::Default | DisplayFormatType::Verbose => {
|
||||
write!(
|
||||
f,
|
||||
"RemoteInsertExec: table={}, overwrite={}",
|
||||
self.table_name, self.overwrite
|
||||
)
|
||||
}
|
||||
DisplayFormatType::TreeRender => {
|
||||
write!(f, "RemoteInsertExec")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: HttpSend + 'static> ExecutionPlan for RemoteInsertExec<S> {
|
||||
fn name(&self) -> &str {
|
||||
Self::static_name()
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.properties
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
|
||||
vec![&self.input]
|
||||
}
|
||||
|
||||
fn maintains_input_order(&self) -> Vec<bool> {
|
||||
vec![false]
|
||||
}
|
||||
|
||||
fn required_input_distribution(&self) -> Vec<datafusion_physical_plan::Distribution> {
|
||||
// Until we have a separate commit endpoint, we need to do all inserts in a single partition
|
||||
vec![datafusion_physical_plan::Distribution::SinglePartition]
|
||||
}
|
||||
|
||||
fn benefits_from_input_partitioning(&self) -> Vec<bool> {
|
||||
vec![false]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
children: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
|
||||
if children.len() != 1 {
|
||||
return Err(DataFusionError::Internal(
|
||||
"RemoteInsertExec requires exactly one child".to_string(),
|
||||
));
|
||||
}
|
||||
Ok(Arc::new(Self::new(
|
||||
self.table_name.clone(),
|
||||
self.identifier.clone(),
|
||||
self.client.clone(),
|
||||
children[0].clone(),
|
||||
self.overwrite,
|
||||
)))
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
partition: usize,
|
||||
context: Arc<TaskContext>,
|
||||
) -> DataFusionResult<SendableRecordBatchStream> {
|
||||
if partition != 0 {
|
||||
return Err(DataFusionError::Internal(
|
||||
"RemoteInsertExec only supports single partition execution".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let input_stream = self.input.execute(0, context)?;
|
||||
let client = self.client.clone();
|
||||
let identifier = self.identifier.clone();
|
||||
let overwrite = self.overwrite;
|
||||
let add_result = self.add_result.clone();
|
||||
let table_name = self.table_name.clone();
|
||||
|
||||
let stream = futures::stream::once(async move {
|
||||
let mut request = client
|
||||
.post(&format!("/v1/table/{}/insert/", identifier))
|
||||
.header(CONTENT_TYPE, ARROW_STREAM_CONTENT_TYPE);
|
||||
|
||||
if overwrite {
|
||||
request = request.query(&[("mode", "overwrite")]);
|
||||
}
|
||||
|
||||
let body = Self::stream_as_body(input_stream)?;
|
||||
let request = request.body(body);
|
||||
|
||||
let (request_id, response) = client
|
||||
.send(request)
|
||||
.await
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
let response =
|
||||
RemoteTable::<Sender>::handle_table_not_found(&table_name, response, &request_id)
|
||||
.await
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
let response = client
|
||||
.check_response(&request_id, response)
|
||||
.await
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)))?;
|
||||
|
||||
let body_text = response.text().await.map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: Box::new(e),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?;
|
||||
|
||||
let parsed_result = if body_text.trim().is_empty() {
|
||||
// Backward compatible with old servers
|
||||
AddResult { version: 0 }
|
||||
} else {
|
||||
serde_json::from_str(&body_text).map_err(|e| {
|
||||
DataFusionError::External(Box::new(Error::Http {
|
||||
source: format!("Failed to parse add response: {}", e).into(),
|
||||
request_id: request_id.clone(),
|
||||
status_code: None,
|
||||
}))
|
||||
})?
|
||||
};
|
||||
|
||||
{
|
||||
let mut res_lock = add_result.lock().map_err(|_| {
|
||||
DataFusionError::Execution("Failed to acquire lock for add_result".to_string())
|
||||
})?;
|
||||
*res_lock = Some(parsed_result);
|
||||
}
|
||||
|
||||
// Return a single batch with count 0 (actual count is tracked in add_result)
|
||||
let count_array: ArrayRef = Arc::new(UInt64Array::from(vec![0u64]));
|
||||
let batch = RecordBatch::try_new(COUNT_SCHEMA.clone(), vec![count_array])?;
|
||||
Ok::<_, DataFusionError>(batch)
|
||||
});
|
||||
|
||||
Ok(Box::pin(RecordBatchStreamAdapter::new(
|
||||
COUNT_SCHEMA.clone(),
|
||||
stream,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use arrow_array::record_batch;
|
||||
use arrow_schema::{DataType, Field, Schema as ArrowSchema};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_catalog::MemTable;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::remote::ARROW_STREAM_CONTENT_TYPE;
|
||||
use crate::table::datafusion::BaseTableAdapter;
|
||||
use crate::Table;
|
||||
|
||||
fn schema_json() -> &'static str {
|
||||
r#"{"fields": [{"name": "id", "type": {"type": "int32"}, "nullable": true}]}"#
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_remote_insert_exec_execute_empty() {
|
||||
let request_count = Arc::new(AtomicUsize::new(0));
|
||||
let request_count_clone = request_count.clone();
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
let path = request.url().path();
|
||||
|
||||
if path == "/v1/table/my_table/describe/" {
|
||||
// Return schema for BaseTableAdapter::try_new
|
||||
return http::Response::builder()
|
||||
.status(200)
|
||||
.body(format!(r#"{{"version": 1, "schema": {}}}"#, schema_json()))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
if path == "/v1/table/my_table/insert/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
);
|
||||
request_count_clone.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
return http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 2}"#.to_string())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
panic!("Unexpected request path: {}", path);
|
||||
});
|
||||
|
||||
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
"id",
|
||||
DataType::Int32,
|
||||
true,
|
||||
)]));
|
||||
|
||||
// Create empty MemTable (no batches)
|
||||
let source_table = MemTable::try_new(schema, vec![vec![]]).unwrap();
|
||||
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
// Register the remote table as insert target
|
||||
let provider = BaseTableAdapter::try_new(table.base_table().clone())
|
||||
.await
|
||||
.unwrap();
|
||||
ctx.register_table("my_table", Arc::new(provider)).unwrap();
|
||||
|
||||
// Register empty source
|
||||
ctx.register_table("empty_source", Arc::new(source_table))
|
||||
.unwrap();
|
||||
|
||||
// Execute the INSERT
|
||||
ctx.sql("INSERT INTO my_table SELECT * FROM empty_source")
|
||||
.await
|
||||
.unwrap()
|
||||
.collect()
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Verify: should have made exactly one HTTP request even with empty input
|
||||
assert_eq!(request_count.load(Ordering::SeqCst), 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_remote_insert_exec_multi_partition() {
|
||||
let request_count = Arc::new(AtomicUsize::new(0));
|
||||
let request_count_clone = request_count.clone();
|
||||
|
||||
let table = Table::new_with_handler("my_table", move |request| {
|
||||
let path = request.url().path();
|
||||
|
||||
if path == "/v1/table/my_table/describe/" {
|
||||
// Return schema for BaseTableAdapter::try_new
|
||||
return http::Response::builder()
|
||||
.status(200)
|
||||
.body(format!(r#"{{"version": 1, "schema": {}}}"#, schema_json()))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
if path == "/v1/table/my_table/insert/" {
|
||||
assert_eq!(request.method(), "POST");
|
||||
assert_eq!(
|
||||
request.headers().get("Content-Type").unwrap(),
|
||||
ARROW_STREAM_CONTENT_TYPE
|
||||
);
|
||||
request_count_clone.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
return http::Response::builder()
|
||||
.status(200)
|
||||
.body(r#"{"version": 2}"#.to_string())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
panic!("Unexpected request path: {}", path);
|
||||
});
|
||||
|
||||
let schema = Arc::new(ArrowSchema::new(vec![Field::new(
|
||||
"id",
|
||||
DataType::Int32,
|
||||
true,
|
||||
)]));
|
||||
|
||||
// Create MemTable with multiple partitions and multiple batches
|
||||
let source_table = MemTable::try_new(
|
||||
schema,
|
||||
vec![
|
||||
// Partition 0
|
||||
vec![
|
||||
record_batch!(("id", Int32, [1, 2])).unwrap(),
|
||||
record_batch!(("id", Int32, [3, 4])).unwrap(),
|
||||
],
|
||||
// Partition 1
|
||||
vec![record_batch!(("id", Int32, [5, 6, 7])).unwrap()],
|
||||
// Partition 2
|
||||
vec![record_batch!(("id", Int32, [8])).unwrap()],
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ctx = SessionContext::new();
|
||||
|
||||
// Register the remote table as insert target
|
||||
let provider = BaseTableAdapter::try_new(table.base_table().clone())
|
||||
.await
|
||||
.unwrap();
|
||||
ctx.register_table("my_table", Arc::new(provider)).unwrap();
|
||||
|
||||
// Register multi-partition source
|
||||
ctx.register_table("multi_partition_source", Arc::new(source_table))
|
||||
.unwrap();
|
||||
|
||||
// Get the physical plan and verify it includes a repartition to 1
|
||||
let df = ctx
|
||||
.sql("INSERT INTO my_table SELECT * FROM multi_partition_source")
|
||||
.await
|
||||
.unwrap();
|
||||
let plan = df.clone().create_physical_plan().await.unwrap();
|
||||
let plan_str = datafusion::physical_plan::displayable(plan.as_ref())
|
||||
.indent(true)
|
||||
.to_string();
|
||||
|
||||
// The plan should include a CoalescePartitionsExec to merge partitions
|
||||
assert!(
|
||||
plan_str.contains("CoalescePartitionsExec"),
|
||||
"Expected CoalescePartitionsExec in plan:\n{}",
|
||||
plan_str
|
||||
);
|
||||
|
||||
// Execute the INSERT
|
||||
df.collect().await.unwrap();
|
||||
|
||||
// Verify: should have made exactly one HTTP request despite multiple input partitions
|
||||
assert_eq!(request_count.load(Ordering::SeqCst), 1);
|
||||
}
|
||||
}
|
||||
@@ -1,29 +1,50 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
use arrow_array::RecordBatchReader;
|
||||
use arrow_ipc::CompressionType;
|
||||
use futures::{Stream, StreamExt};
|
||||
use reqwest::Response;
|
||||
|
||||
use crate::Result;
|
||||
use crate::{arrow::SendableRecordBatchStream, Result};
|
||||
|
||||
use super::db::ServerVersion;
|
||||
|
||||
pub fn batches_to_ipc_bytes(batches: impl RecordBatchReader) -> Result<Vec<u8>> {
|
||||
pub fn stream_as_ipc(
|
||||
data: SendableRecordBatchStream,
|
||||
) -> Result<impl Stream<Item = Result<bytes::Bytes>>> {
|
||||
let options = arrow_ipc::writer::IpcWriteOptions::default()
|
||||
.try_with_compression(Some(CompressionType::LZ4_FRAME))?;
|
||||
const WRITE_BUF_SIZE: usize = 4096;
|
||||
let buf = Vec::with_capacity(WRITE_BUF_SIZE);
|
||||
let mut buf = Cursor::new(buf);
|
||||
{
|
||||
let mut writer = arrow_ipc::writer::StreamWriter::try_new(&mut buf, &batches.schema())?;
|
||||
let writer =
|
||||
arrow_ipc::writer::StreamWriter::try_new_with_options(buf, &data.schema(), options)?;
|
||||
let stream = futures::stream::try_unfold(
|
||||
(data, writer, false),
|
||||
move |(mut data, mut writer, finished)| async move {
|
||||
if finished {
|
||||
return Ok(None);
|
||||
}
|
||||
match data.next().await {
|
||||
Some(Ok(batch)) => {
|
||||
writer.write(&batch)?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
Ok(Some((bytes::Bytes::from(buffer), (data, writer, false))))
|
||||
}
|
||||
Some(Err(e)) => Err(e),
|
||||
None => {
|
||||
writer.finish()?;
|
||||
let buffer = std::mem::take(writer.get_mut());
|
||||
Ok(Some((bytes::Bytes::from(buffer), (data, writer, true))))
|
||||
}
|
||||
}
|
||||
},
|
||||
);
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
for batch in batches {
|
||||
let batch = batch?;
|
||||
writer.write(&batch)?;
|
||||
}
|
||||
writer.finish()?;
|
||||
}
|
||||
Ok(buf.into_inner())
|
||||
pub fn stream_as_body(data: SendableRecordBatchStream) -> Result<reqwest::Body> {
|
||||
let stream = stream_as_ipc(data)?;
|
||||
Ok(reqwest::Body::wrap_stream(stream))
|
||||
}
|
||||
|
||||
pub fn parse_server_version(req_id: &str, rsp: &Response) -> Result<ServerVersion> {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user