Compare commits

..

11 Commits

Author SHA1 Message Date
Xuanwo
523030aa2f ci: Auto fix CI if bump failed 2025-12-24 20:06:36 +08:00
Prashanth Rao
8ae4f42fbe fix: add to_lance() and to_polars() stub methods for type-checkers (#2876)
Adds `Table.to_lance()` and `Table.to_polars()` methods (non-abstract
methods, defaulting to `NotImplementedError`) so type checkers like
mypy, pyright and ty don’t flag them as unknown attributes on `Table`.
Not making these abstract methods should keep existing remote/other
`Table` implementations instantiable.

This is non-breaking change to existing functionality and is purely for
the purpose of pleasing static type-checkers like mypy, ty and pyright.

<img width="626" height="134" alt="image"
src="https://github.com/user-attachments/assets/f4619bca-a882-432b-bd23-ae8f189ff9e3"
/>
2025-12-18 12:55:07 -05:00
Lance Release
0667fa38d4 Bump version: 0.23.1-beta.0 → 0.23.1-beta.1 2025-12-17 06:59:29 +00:00
Lance Release
30108c0b1f Bump version: 0.26.1-beta.0 → 0.26.1-beta.1 2025-12-17 06:58:52 +00:00
Jack Ye
1628f7e3f3 fix: pass namespace storage options provider into native table (#2873)
Previously the native table is created with static credentials and could
not auto-refresh credentials when expired.
2025-12-16 22:58:04 -08:00
Lance Release
2fd712312f Bump version: 0.23.0 → 0.23.1-beta.0 2025-12-17 03:30:51 +00:00
Lance Release
ba94e69d5d Bump version: 0.26.0 → 0.26.1-beta.0 2025-12-17 03:30:18 +00:00
Jack Ye
9e60fda0ec fix: use post for describe_namespace and allow access to underlying client (#2871)
Issues found during integration tests:
1. describe_namespace should use POST
2. service needs to access the underlying namespace to be able to do
operations like create_empty_table directly, or get credentials in
isolated paths like a remote take
2025-12-16 19:29:27 -08:00
LanceDB Robot
3e0d451e9b chore: update lance dependency to v1.0.1-beta.1 (#2872)
bump Lance crates to v1.0.1-beta.1

Triggering tag:
https://github.com/lance-format/lance/releases/tag/v1.0.1-beta.1
2025-12-16 17:44:32 -08:00
Lance Release
94bdffe13c Bump version: 0.23.0-beta.2 → 0.23.0 2025-12-16 16:58:35 +00:00
Lance Release
b93ea3a388 Bump version: 0.23.0-beta.1 → 0.23.0-beta.2 2025-12-16 16:57:55 +00:00
29 changed files with 1176 additions and 267 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.23.0-beta.1"
current_version = "0.23.1-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

297
.github/workflows/codex-autofix-ci.yml vendored Normal file
View File

@@ -0,0 +1,297 @@
name: Codex Autofix CI
on:
check_suite:
types:
- completed
permissions:
contents: write
pull-requests: write
actions: read
concurrency:
group: codex-autofix-${{ github.event.check_suite.head_branch }}
cancel-in-progress: false
jobs:
autofix:
runs-on: ubuntu-latest
steps:
- name: Resolve PR and failing required checks
id: ctx
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
REPO: ${{ github.repository }}
SHA: ${{ github.event.check_suite.head_sha }}
HEAD_BRANCH: ${{ github.event.check_suite.head_branch }}
MAX_ATTEMPTS: "3"
run: |
set -euo pipefail
echo "Repository: $REPO"
echo "head_branch: $HEAD_BRANCH"
echo "head_sha: $SHA"
if [[ "$HEAD_BRANCH" != codex/update-lance-* ]]; then
echo "Skip: branch '$HEAD_BRANCH' does not match codex/update-lance-*"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
prs_json="$(gh api -H "Accept: application/vnd.github+json" "repos/$REPO/commits/$SHA/pulls")"
pr_json="$(echo "$prs_json" | jq -c '[.[] | select(.state=="open")] | .[0]')"
if [[ -z "$pr_json" || "$pr_json" == "null" ]]; then
echo "Skip: no open PR found for sha $SHA"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
pr_number="$(echo "$pr_json" | jq -r '.number')"
head_ref="$(echo "$pr_json" | jq -r '.head.ref')"
head_repo="$(echo "$pr_json" | jq -r '.head.repo.full_name')"
pr_head_sha="$(echo "$pr_json" | jq -r '.head.sha')"
if [[ "$head_repo" != "$REPO" ]]; then
echo "Skip: cross-repo PR ($head_repo != $REPO)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
if [[ "$pr_head_sha" != "$SHA" ]]; then
echo "Skip: stale check_suite event (pr head sha $pr_head_sha != event sha $SHA)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
set +e
checks_json="$(gh pr checks "$pr_number" --required --repo "$REPO" --json name,state,bucket,link,workflow)"
checks_rc=$?
set -e
if [[ "$checks_rc" -eq 8 ]]; then
echo "Skip: required checks still pending"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
if [[ "$checks_rc" -ne 0 ]]; then
echo "Skip: failed to query required checks (exit=$checks_rc)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
fail_count="$(echo "$checks_json" | jq '[.[] | select(.bucket=="fail")] | length')"
if [[ "$fail_count" -eq 0 ]]; then
echo "Skip: no failing required checks"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
comments_json="$(gh api "repos/$REPO/issues/$pr_number/comments" --paginate)"
stopped_count="$(echo "$comments_json" | jq '[.[].body | select(test("<!-- codex-autofix stopped -->"))] | length')"
if [[ "$stopped_count" -gt 0 ]]; then
echo "Skip: codex-autofix already stopped for this PR"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
prior_attempts="$(echo "$comments_json" | jq '[.[].body | select(test("<!-- codex-autofix attempt:"))] | length')"
attempt="$((prior_attempts + 1))"
if [[ "$attempt" -gt "$MAX_ATTEMPTS" ]]; then
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf '%s\n' '<!-- codex-autofix stopped -->'
printf '<!-- codex-autofix attempt: %s -->\n' "$attempt"
printf 'Codex autofix stopped: reached max attempts (%s).\n\n' "$MAX_ATTEMPTS"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$SHA"
printf -- '- head_ref: `%s`\n' "$head_ref"
} >"$comment_file"
gh pr comment "$pr_number" --repo "$REPO" --body-file "$comment_file"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
evidence_file="$(mktemp /tmp/codex-autofix-evidence.XXXXXX.txt)"
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
{
echo "PR: #$pr_number"
echo "head_ref: $head_ref"
echo "head_sha: $SHA"
echo "Run: $run_url"
echo ""
echo "Failing required checks:"
echo "$checks_json" | jq -r '.[] | select(.bucket=="fail") | "- \(.name) (\(.workflow // "unknown")): \(.link // "n/a")"'
echo ""
} > "$evidence_file"
while IFS= read -r row; do
name="$(echo "$row" | jq -r '.name')"
link="$(echo "$row" | jq -r '.link // empty')"
workflow="$(echo "$row" | jq -r '.workflow // "unknown"')"
{
echo "================================================================================"
echo "CHECK: $name"
echo "WORKFLOW: $workflow"
echo "LINK: ${link:-n/a}"
} >> "$evidence_file"
run_id=""
if [[ -n "$link" ]]; then
run_id="$(echo "$link" | sed -n 's#.*actions/runs/\\([0-9][0-9]*\\).*#\\1#p' | head -n 1 || true)"
fi
if [[ -z "$run_id" ]]; then
echo "LOGS: unavailable (no run id found in link)" >> "$evidence_file"
echo "" >> "$evidence_file"
continue
fi
echo "LOGS: gh run view $run_id --log-failed (tail -c 20000)" >> "$evidence_file"
set +e
gh run view "$run_id" --repo "$REPO" --log-failed 2>/dev/null | tail -c 20000 >> "$evidence_file"
echo "" >> "$evidence_file"
set -e
done < <(echo "$checks_json" | jq -c '.[] | select(.bucket=="fail")')
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf '<!-- codex-autofix attempt: %s -->\n' "$attempt"
printf 'Starting Codex autofix attempt %s.\n\n' "$attempt"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$SHA"
printf -- '- head_ref: `%s`\n' "$head_ref"
printf -- '- Failing required checks: %s\n' "$fail_count"
} >"$comment_file"
gh pr comment "$pr_number" --repo "$REPO" --body-file "$comment_file"
{
echo "needs_fix=true"
echo "pr_number=$pr_number"
echo "head_ref=$head_ref"
echo "attempt=$attempt"
echo "evidence_file=$evidence_file"
} >> "$GITHUB_OUTPUT"
- name: Checkout PR branch
if: steps.ctx.outputs.needs_fix == 'true'
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ steps.ctx.outputs.head_ref }}
token: ${{ secrets.ROBOT_TOKEN }}
persist-credentials: true
- name: Configure git
if: steps.ctx.outputs.needs_fix == 'true'
run: |
git config user.name "lancedb automation"
git config user.email "robot@lancedb.com"
- name: Set up Node.js
if: steps.ctx.outputs.needs_fix == 'true'
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install Codex CLI
if: steps.ctx.outputs.needs_fix == 'true'
run: npm install -g @openai/codex
- name: Install Rust toolchain
if: steps.ctx.outputs.needs_fix == 'true'
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
components: clippy, rustfmt
- name: Install system dependencies
if: steps.ctx.outputs.needs_fix == 'true'
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler libssl-dev
- name: Run Codex to fix failing CI
if: steps.ctx.outputs.needs_fix == 'true'
env:
PR_NUMBER: ${{ steps.ctx.outputs.pr_number }}
HEAD_REF: ${{ steps.ctx.outputs.head_ref }}
ATTEMPT: ${{ steps.ctx.outputs.attempt }}
EVIDENCE_FILE: ${{ steps.ctx.outputs.evidence_file }}
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
run: |
set -euo pipefail
prompt_file="/tmp/codex-prompt.txt"
{
printf 'You are running inside the lancedb repository on a GitHub Actions runner.\n'
printf 'Your task is to fix failing required CI checks for pull request #%s on branch %s.\n\n' "$PR_NUMBER" "$HEAD_REF"
printf 'Goal:\n'
printf -- '- Make the smallest change necessary so that all required checks pass.\n\n'
printf 'Evidence (generated from GitHub checks and logs):\n'
printf '---\n'
cat "${EVIDENCE_FILE}"
printf '\n---\n\n'
printf 'Follow these steps exactly:\n'
printf '1. Identify the root cause from the evidence and repository state.\n'
printf '2. Make changes to fix the failures.\n'
printf '3. Run the relevant local commands that correspond to the failing checks until they succeed.\n'
printf ' If unsure, start with:\n'
printf ' - cargo fmt --all -- --check\n'
printf ' - cargo clippy --profile ci --workspace --tests --all-features -- -D warnings\n'
printf '4. Ensure the repository is clean except for intentional changes (git status --short, git diff).\n'
printf '5. Create a commit with message "fix: codex autofix (attempt %s)".\n' "$ATTEMPT"
printf '6. Push to origin branch "%s" (use --force-with-lease only if required).\n' "$HEAD_REF"
printf '7. Print the commands you ran and their results, plus git status --short and git log -1 --oneline.\n\n'
printf 'Constraints:\n'
printf -- '- Do not create a new pull request.\n'
printf -- '- Do not merge.\n'
printf -- '- Avoid modifying GitHub workflow files unless strictly required to fix CI for this PR.\n'
} >"$prompt_file"
printenv OPENAI_API_KEY | codex login --with-api-key
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat "$prompt_file")"
- name: Ensure branch is pushed
if: steps.ctx.outputs.needs_fix == 'true'
env:
HEAD_REF: ${{ steps.ctx.outputs.head_ref }}
run: |
set -euo pipefail
if git diff --quiet && git diff --cached --quiet; then
echo "Working tree clean."
else
git add -A
git commit -m "fix: codex autofix (post-run)" || true
fi
git push origin "HEAD:${HEAD_REF}" --force-with-lease
- name: Comment result
if: steps.ctx.outputs.needs_fix == 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
REPO: ${{ github.repository }}
PR_NUMBER: ${{ steps.ctx.outputs.pr_number }}
ATTEMPT: ${{ steps.ctx.outputs.attempt }}
run: |
set -euo pipefail
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
sha="$(git rev-parse HEAD)"
summary="$(git log -1 --oneline || true)"
status="$(git status --short || true)"
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf 'Codex autofix attempt %s finished.\n\n' "$ATTEMPT"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$sha"
printf -- '- Last commit: %s\n\n' "$summary"
printf '```\n%s\n```\n' "$status"
} >"$comment_file"
gh pr comment "$PR_NUMBER" --repo "$REPO" --body-file "$comment_file"

172
Cargo.lock generated
View File

@@ -1041,6 +1041,61 @@ dependencies = [
"tracing",
]
[[package]]
name = "axum"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [
"async-trait",
"axum-core",
"bytes",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"hyper 1.7.0",
"hyper-util",
"itoa",
"matchit",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "axum-core"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "backoff"
version = "0.4.0"
@@ -3086,8 +3141,8 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "fsst"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"rand 0.9.2",
@@ -3930,6 +3985,7 @@ dependencies = [
"http 1.3.1",
"http-body 1.0.1",
"httparse",
"httpdate",
"itoa",
"pin-project-lite",
"pin-utils",
@@ -4422,8 +4478,8 @@ dependencies = [
[[package]]
name = "lance"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4488,8 +4544,8 @@ dependencies = [
[[package]]
name = "lance-arrow"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4507,8 +4563,8 @@ dependencies = [
[[package]]
name = "lance-bitpacking"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrayref",
"paste",
@@ -4517,8 +4573,8 @@ dependencies = [
[[package]]
name = "lance-core"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4554,8 +4610,8 @@ dependencies = [
[[package]]
name = "lance-datafusion"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4585,8 +4641,8 @@ dependencies = [
[[package]]
name = "lance-datagen"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4603,8 +4659,8 @@ dependencies = [
[[package]]
name = "lance-encoding"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4641,8 +4697,8 @@ dependencies = [
[[package]]
name = "lance-file"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -4674,8 +4730,8 @@ dependencies = [
[[package]]
name = "lance-geo"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"datafusion",
"geo-types",
@@ -4686,8 +4742,8 @@ dependencies = [
[[package]]
name = "lance-index"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4748,8 +4804,8 @@ dependencies = [
[[package]]
name = "lance-io"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-arith",
@@ -4789,8 +4845,8 @@ dependencies = [
[[package]]
name = "lance-linalg"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -4806,8 +4862,8 @@ dependencies = [
[[package]]
name = "lance-namespace"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"async-trait",
@@ -4819,13 +4875,14 @@ dependencies = [
[[package]]
name = "lance-namespace-impls"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-ipc",
"arrow-schema",
"async-trait",
"axum",
"bytes",
"futures",
"lance",
@@ -4837,9 +4894,12 @@ dependencies = [
"object_store",
"rand 0.9.2",
"reqwest",
"serde",
"serde_json",
"snafu",
"tokio",
"tower",
"tower-http 0.5.2",
"url",
]
@@ -4858,8 +4918,8 @@ dependencies = [
[[package]]
name = "lance-table"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow",
"arrow-array",
@@ -4898,8 +4958,8 @@ dependencies = [
[[package]]
name = "lance-testing"
version = "1.0.0"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.0#a0979691926f57afd5d3ac90bf6e5bb11188c0c3"
version = "1.0.1-beta.1"
source = "git+https://github.com/lance-format/lance.git?tag=v1.0.1-beta.1#9e65b2a9ca17b1c81a33183e5660f88d1b3b9ce0"
dependencies = [
"arrow-array",
"arrow-schema",
@@ -4910,7 +4970,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.23.0-beta.1"
version = "0.23.1-beta.1"
dependencies = [
"ahash",
"anyhow",
@@ -4989,7 +5049,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.23.0-beta.1"
version = "0.23.1-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5009,7 +5069,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.26.0-beta.1"
version = "0.26.1-beta.1"
dependencies = [
"arrow",
"async-trait",
@@ -5277,6 +5337,12 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "matchit"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "matrixmultiply"
version = "0.3.10"
@@ -7265,7 +7331,7 @@ dependencies = [
"tokio-rustls 0.26.4",
"tokio-util",
"tower",
"tower-http",
"tower-http 0.6.6",
"tower-service",
"url",
"wasm-bindgen",
@@ -7784,6 +7850,17 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_path_to_error"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
dependencies = [
"itoa",
"serde",
"serde_core",
]
[[package]]
name = "serde_plain"
version = "1.0.2"
@@ -8819,6 +8896,24 @@ dependencies = [
"tokio",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tower-http"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5"
dependencies = [
"bitflags 2.9.4",
"bytes",
"http 1.3.1",
"http-body 1.0.1",
"http-body-util",
"pin-project-lite",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
@@ -8857,6 +8952,7 @@ version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [
"log",
"pin-project-lite",
"tracing-attributes",
"tracing-core",

View File

@@ -15,20 +15,20 @@ categories = ["database-implementations"]
rust-version = "1.78.0"
[workspace.dependencies]
lance = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.0", default-features = false, "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.0", "tag" = "v1.0.0", "git" = "https://github.com/lance-format/lance.git" }
lance = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-core = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datagen = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-file = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-io = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-index = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-linalg = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-namespace-impls = { "version" = "=1.0.1-beta.1", default-features = false, "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-table = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-testing = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-datafusion = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-encoding = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
lance-arrow = { "version" = "=1.0.1-beta.1", "tag" = "v1.0.1-beta.1", "git" = "https://github.com/lance-format/lance.git" }
ahash = "0.8"
# Note that this one does not include pyarrow
arrow = { version = "56.2", optional = false }

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.23.0-beta.1</version>
<version>0.23.1-beta.1</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.0-beta.1</version>
<version>0.23.1-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.0-beta.1</version>
<version>0.23.1-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.23.0-beta.1"
version = "0.23.1-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.23.0-beta.1",
"version": "0.23.1-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.26.0"
current_version = "0.26.1-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.26.0"
version = "0.26.1-beta.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -684,6 +684,24 @@ class Table(ABC):
"""
raise NotImplementedError
def to_lance(self, **kwargs) -> lance.LanceDataset:
"""Return the table as a lance.LanceDataset.
Returns
-------
lance.LanceDataset
"""
raise NotImplementedError
def to_polars(self, **kwargs) -> "pl.DataFrame":
"""Return the table as a polars.DataFrame.
Returns
-------
polars.DataFrame
"""
raise NotImplementedError
def create_index(
self,
metric="l2",

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.23.0-beta.1"
version = "0.23.1-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true
@@ -110,7 +110,7 @@ oss = ["lance/oss", "lance-io/oss", "lance-namespace-impls/dir-oss"]
gcs = ["lance/gcp", "lance-io/gcp", "lance-namespace-impls/dir-gcp"]
azure = ["lance/azure", "lance-io/azure", "lance-namespace-impls/dir-azure"]
dynamodb = ["lance/dynamodb", "aws"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest"]
remote = ["dep:reqwest", "dep:http", "lance-namespace-impls/rest", "lance-namespace-impls/rest-adapter"]
fp16kernels = ["lance-linalg/fp16kernels"]
s3-test = []
bedrock = ["dep:aws-sdk-bedrockruntime"]

View File

@@ -804,6 +804,14 @@ impl Connection {
self.internal.describe_namespace(request).await
}
/// Get the equivalent namespace client in the database of this connection.
/// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
pub async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
self.internal.namespace_client().await
}
/// List tables with pagination support
pub async fn list_tables(&self, request: ListTablesRequest) -> Result<ListTablesResponse> {
self.internal.list_tables(request).await

View File

@@ -296,4 +296,10 @@ pub trait Database:
/// Drop all tables in the database
async fn drop_all_tables(&self, namespace: &[String]) -> Result<()>;
fn as_any(&self) -> &dyn std::any::Any;
/// Get the equivalent namespace client of this database
/// For LanceNamespaceDatabase, it is the underlying LanceNamespace.
/// For ListingDatabase, it is the equivalent DirectoryNamespace.
/// For RemoteDatabase, it is the equivalent RestNamespace.
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>>;
}

View File

@@ -1043,6 +1043,24 @@ impl Database for ListingDatabase {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
// Create a DirectoryNamespace pointing to the same root with the same storage options
let mut builder = lance_namespace_impls::DirectoryNamespaceBuilder::new(&self.uri);
// Add storage options
if !self.storage_options.is_empty() {
builder = builder.storage_options(self.storage_options.clone());
}
// Use the same session
builder = builder.session(self.session.clone());
let namespace = builder.build().await.map_err(|e| Error::Runtime {
message: format!("Failed to create namespace client: {}", e),
})?;
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
}
#[cfg(test)]
@@ -2027,4 +2045,63 @@ mod tests {
let db_options = ListingDatabaseOptions::parse_from_map(&options).unwrap();
assert_eq!(db_options.new_table_config.enable_stable_row_ids, None);
}
#[tokio::test]
async fn test_namespace_client() {
let (_tempdir, db) = setup_database().await;
// Create some tables first
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int32, false),
Field::new("name", DataType::Utf8, false),
]));
db.create_table(CreateTableRequest {
name: "table1".to_string(),
namespace: vec![],
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema.clone())),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
namespace_client: None,
})
.await
.unwrap();
db.create_table(CreateTableRequest {
name: "table2".to_string(),
namespace: vec![],
data: CreateTableData::Empty(TableDefinition::new_from_schema(schema)),
mode: CreateTableMode::Create,
write_options: Default::default(),
location: None,
namespace_client: None,
})
.await
.unwrap();
// Get the namespace client
let namespace_client = db.namespace_client().await;
assert!(namespace_client.is_ok());
let namespace_client = namespace_client.unwrap();
// Verify the namespace client can list the tables we created
// Use empty vec for root namespace
let list_result = namespace_client
.list_tables(lance_namespace::models::ListTablesRequest {
id: Some(vec![]),
..Default::default()
})
.await;
assert!(
list_result.is_ok(),
"list_tables failed: {:?}",
list_result.err()
);
let tables = list_result.unwrap().tables;
assert_eq!(tables.len(), 2);
assert!(tables.contains(&"table1".to_string()));
assert!(tables.contains(&"table2".to_string()));
}
}

View File

@@ -7,7 +7,6 @@ use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
use lance_namespace::{
models::{
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
@@ -19,13 +18,13 @@ use lance_namespace::{
};
use lance_namespace_impls::ConnectBuilder;
use crate::connection::ConnectRequest;
use crate::database::ReadConsistency;
use crate::error::{Error, Result};
use crate::table::NativeTable;
use super::{
listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode,
CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest,
BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest as DbCreateTableRequest,
Database, OpenTableRequest, TableNamesRequest,
};
/// A database implementation that uses lance-namespace for table management
@@ -90,51 +89,6 @@ impl std::fmt::Display for LanceNamespaceDatabase {
}
}
impl LanceNamespaceDatabase {
/// Create a temporary listing database for the given location
///
/// Merges storage options with priority: connection < user < namespace
async fn create_listing_database(
&self,
location: &str,
table_id: Vec<String>,
user_storage_options: Option<&HashMap<String, String>>,
response_storage_options: Option<&HashMap<String, String>>,
) -> Result<ListingDatabase> {
// Merge storage options: connection < user < namespace
let mut merged_storage_options = self.storage_options.clone();
if let Some(opts) = user_storage_options {
merged_storage_options.extend(opts.clone());
}
if let Some(opts) = response_storage_options {
merged_storage_options.extend(opts.clone());
}
let request = ConnectRequest {
uri: location.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options: merged_storage_options,
read_consistency_interval: self.read_consistency_interval,
session: self.session.clone(),
};
let mut listing_db = ListingDatabase::connect_with_options(&request).await?;
// Create storage options provider only if namespace returned storage options
// (not just user-provided options)
if response_storage_options.is_some() {
let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
self.namespace.clone(),
table_id,
)) as Arc<dyn StorageOptionsProvider>;
listing_db.storage_options_provider = Some(provider);
}
Ok(listing_db)
}
}
#[async_trait]
impl Database for LanceNamespaceDatabase {
fn uri(&self) -> &str {
@@ -195,14 +149,6 @@ impl Database for LanceNamespaceDatabase {
}
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.write_options
.lance_write_params
.as_ref()
.and_then(|lwp| lwp.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref());
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
@@ -235,34 +181,20 @@ impl Database for LanceNamespaceDatabase {
}
}
CreateTableMode::ExistOk(_) => {
if let Ok(response) = describe_result {
let location = response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from namespace response".to_string(),
})?;
if describe_result.is_ok() {
let native_table = NativeTable::open_from_namespace(
self.namespace.clone(),
&request.name,
request.namespace.clone(),
None,
None,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
response.storage_options.as_ref(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
return listing_db
.open_table(OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
index_cache_size: None,
lance_read_params: None,
location: Some(location),
namespace_client,
})
.await;
return Ok(Arc::new(native_table));
}
}
}
@@ -294,82 +226,37 @@ impl Database for LanceNamespaceDatabase {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
create_empty_response.storage_options.as_ref(),
)
.await?;
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
&location,
&request.name,
request.namespace.clone(),
request.data,
None, // write_store_wrapper not used for namespace connections
request.write_options.lance_write_params,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let create_request = DbCreateTableRequest {
name: request.name,
namespace: request.namespace,
data: request.data,
mode: request.mode,
write_options: request.write_options,
location: Some(location),
namespace_client,
};
listing_db.create_table(create_request).await
Ok(Arc::new(native_table))
}
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.lance_read_params
.as_ref()
.and_then(|lrp| lrp.store_options.as_ref())
.and_then(|so| so.storage_options.as_ref());
let native_table = NativeTable::open_from_namespace(
self.namespace.clone(),
&request.name,
request.namespace.clone(),
None, // write_store_wrapper not used for namespace connections
request.lance_read_params,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
version: None,
};
let response = self
.namespace
.describe_table(describe_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to describe table: {}", e),
})?;
let location = response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from namespace response".to_string(),
})?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
response.storage_options.as_ref(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let open_request = OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params,
location: Some(location),
namespace_client,
};
listing_db.open_table(open_request).await
Ok(Arc::new(native_table))
}
async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {
@@ -425,6 +312,10 @@ impl Database for LanceNamespaceDatabase {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn LanceNamespace>> {
Ok(self.namespace.clone())
}
}
#[cfg(test)]

View File

@@ -232,6 +232,38 @@ impl HttpSend for Sender {
}
}
/// Parsed components from a database URL (db://...)
pub struct ParsedDbUrl {
pub db_name: String,
pub db_prefix: Option<String>,
}
/// Parse a database URL and extract the database name and optional prefix.
///
/// Expected format: `db://db_name` or `db://db_name/prefix`
pub fn parse_db_url(db_url: &str) -> Result<ParsedDbUrl> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
})?;
debug_assert_eq!(parsed_url.scheme(), "db");
if !parsed_url.has_host() {
return Err(Error::InvalidInput {
message: format!("Invalid database URL (missing host) '{}'", db_url),
});
}
let db_name = parsed_url.host_str().unwrap().to_string();
let db_prefix = {
let prefix = parsed_url.path().trim_start_matches('/');
if prefix.is_empty() {
None
} else {
Some(prefix.to_string())
}
};
Ok(ParsedDbUrl { db_name, db_prefix })
}
impl RestfulLanceDbClient<Sender> {
fn get_timeout(passed: Option<Duration>, env_var: &str) -> Result<Option<Duration>> {
if let Some(passed) = passed {
@@ -250,32 +282,12 @@ impl RestfulLanceDbClient<Sender> {
}
pub fn try_new(
db_url: &str,
api_key: &str,
parsed_url: &ParsedDbUrl,
region: &str,
host_override: Option<String>,
default_headers: HeaderMap,
client_config: ClientConfig,
options: &RemoteOptions,
) -> Result<Self> {
let parsed_url = url::Url::parse(db_url).map_err(|err| Error::InvalidInput {
message: format!("db_url is not a valid URL. '{db_url}'. Error: {err}"),
})?;
debug_assert_eq!(parsed_url.scheme(), "db");
if !parsed_url.has_host() {
return Err(Error::InvalidInput {
message: format!("Invalid database URL (missing host) '{}'", db_url),
});
}
let db_name = parsed_url.host_str().unwrap();
let db_prefix = {
let prefix = parsed_url.path().trim_start_matches('/');
if prefix.is_empty() {
None
} else {
Some(prefix)
}
};
// Get the timeouts
let timeout =
Self::get_timeout(client_config.timeout_config.timeout, "LANCE_CLIENT_TIMEOUT")?;
@@ -348,15 +360,7 @@ impl RestfulLanceDbClient<Sender> {
}
let client = client_builder
.default_headers(Self::default_headers(
api_key,
region,
db_name,
host_override.is_some(),
options,
db_prefix,
&client_config,
)?)
.default_headers(default_headers)
.user_agent(client_config.user_agent)
.build()
.map_err(|err| Error::Other {
@@ -366,7 +370,7 @@ impl RestfulLanceDbClient<Sender> {
let host = match host_override {
Some(host_override) => host_override,
None => format!("https://{}.{}.api.lancedb.com", db_name, region),
None => format!("https://{}.{}.api.lancedb.com", parsed_url.db_name, region),
};
debug!("Created client for host: {}", host);
let retry_config = client_config.retry_config.clone().try_into()?;
@@ -389,7 +393,7 @@ impl<S: HttpSend> RestfulLanceDbClient<S> {
&self.host
}
fn default_headers(
pub fn default_headers(
api_key: &str,
region: &str,
db_name: &str,

View File

@@ -189,6 +189,10 @@ pub struct RemoteDatabase<S: HttpSend = Sender> {
client: RestfulLanceDbClient<S>,
table_cache: Cache<String, Arc<RemoteTable<S>>>,
uri: String,
/// Headers to pass to the namespace client for authentication
namespace_headers: HashMap<String, String>,
/// TLS configuration for mTLS support
tls_config: Option<super::client::TlsConfig>,
}
impl RemoteDatabase {
@@ -200,13 +204,32 @@ impl RemoteDatabase {
client_config: ClientConfig,
options: RemoteOptions,
) -> Result<Self> {
let client = RestfulLanceDbClient::try_new(
uri,
let parsed = super::client::parse_db_url(uri)?;
let header_map = RestfulLanceDbClient::<Sender>::default_headers(
api_key,
region,
host_override,
client_config,
&parsed.db_name,
host_override.is_some(),
&options,
parsed.db_prefix.as_deref(),
&client_config,
)?;
let namespace_headers: HashMap<String, String> = header_map
.iter()
.filter_map(|(k, v)| {
v.to_str()
.ok()
.map(|val| (k.as_str().to_string(), val.to_string()))
})
.collect();
let client = RestfulLanceDbClient::try_new(
&parsed,
region,
host_override,
header_map,
client_config.clone(),
)?;
let table_cache = Cache::builder()
@@ -218,6 +241,8 @@ impl RemoteDatabase {
client,
table_cache,
uri: uri.to_owned(),
namespace_headers,
tls_config: client_config.tls_config,
})
}
}
@@ -240,6 +265,8 @@ mod test_utils {
client,
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: HashMap::new(),
tls_config: None,
}
}
@@ -248,11 +275,13 @@ mod test_utils {
F: Fn(reqwest::Request) -> http::Response<T> + Send + Sync + 'static,
T: Into<reqwest::Body>,
{
let client = client_with_handler_and_config(handler, config);
let client = client_with_handler_and_config(handler, config.clone());
Self {
client,
table_cache: Cache::new(0),
uri: "http://localhost".to_string(),
namespace_headers: config.extra_headers.clone(),
tls_config: config.tls_config.clone(),
}
}
}
@@ -716,7 +745,8 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
let namespace_id = build_namespace_identifier(namespace_parts, &self.client.id_delimiter);
let req = self
.client
.get(&format!("/v1/namespace/{}/describe", namespace_id));
.post(&format!("/v1/namespace/{}/describe", namespace_id))
.json(&DescribeNamespaceRequest::default());
let (request_id, resp) = self.client.send(req).await?;
let resp = self.client.check_response(&request_id, resp).await?;
@@ -727,6 +757,31 @@ impl<S: HttpSend> Database for RemoteDatabase<S> {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn namespace_client(&self) -> Result<Arc<dyn lance_namespace::LanceNamespace>> {
// Create a RestNamespace pointing to the same remote host with the same authentication headers
let mut builder = lance_namespace_impls::RestNamespaceBuilder::new(self.client.host())
.delimiter(&self.client.id_delimiter)
// TODO: support header provider
.headers(self.namespace_headers.clone());
// Apply mTLS configuration if present
if let Some(tls_config) = &self.tls_config {
if let Some(cert_file) = &tls_config.cert_file {
builder = builder.cert_file(cert_file);
}
if let Some(key_file) = &tls_config.key_file {
builder = builder.key_file(key_file);
}
if let Some(ssl_ca_cert) = &tls_config.ssl_ca_cert {
builder = builder.ssl_ca_cert(ssl_ca_cert);
}
builder = builder.assert_hostname(tls_config.assert_hostname);
}
let namespace = builder.build();
Ok(Arc::new(namespace) as Arc<dyn lance_namespace::LanceNamespace>)
}
}
/// RemoteOptions contains a subset of StorageOptions that are compatible with Remote LanceDB connections
@@ -1518,4 +1573,265 @@ mod tests {
panic!("Expected HTTP error");
}
}
#[tokio::test]
async fn test_namespace_client() {
let conn = Connection::new_with_handler(|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
});
// Get the namespace client from the connection's internal database
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
#[tokio::test]
async fn test_namespace_client_with_tls_config() {
use crate::remote::client::TlsConfig;
let tls_config = TlsConfig {
cert_file: Some("/path/to/cert.pem".to_string()),
key_file: Some("/path/to/key.pem".to_string()),
ssl_ca_cert: Some("/path/to/ca.pem".to_string()),
assert_hostname: true,
};
let client_config = ClientConfig {
tls_config: Some(tls_config),
..Default::default()
};
let conn = Connection::new_with_handler_and_config(
|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
},
client_config,
);
// Get the namespace client - it should be created with the TLS config
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
#[tokio::test]
async fn test_namespace_client_with_headers() {
let mut extra_headers = HashMap::new();
extra_headers.insert("X-Custom-Header".to_string(), "custom-value".to_string());
let client_config = ClientConfig {
extra_headers,
..Default::default()
};
let conn = Connection::new_with_handler_and_config(
|_| {
http::Response::builder()
.status(200)
.body(r#"{"tables": []}"#)
.unwrap()
},
client_config,
);
// Get the namespace client - it should be created with the extra headers
let namespace_client = conn.namespace_client().await;
assert!(namespace_client.is_ok());
}
/// Integration tests using RestAdapter to run RemoteDatabase against a real namespace server
mod rest_adapter_integration {
use super::*;
use lance_namespace::models::ListTablesRequest;
use lance_namespace_impls::{DirectoryNamespaceBuilder, RestAdapter, RestAdapterConfig};
use std::sync::Arc;
use tempfile::TempDir;
/// Test fixture that manages a REST server backed by DirectoryNamespace
struct RestServerFixture {
_temp_dir: TempDir,
server_handle: lance_namespace_impls::RestAdapterHandle,
server_url: String,
}
impl RestServerFixture {
async fn new() -> Self {
let temp_dir = TempDir::new().unwrap();
let temp_path = temp_dir.path().to_str().unwrap().to_string();
// Create DirectoryNamespace backend
let backend = DirectoryNamespaceBuilder::new(&temp_path)
.build()
.await
.unwrap();
let backend = Arc::new(backend);
// Start REST server with port 0 (OS assigns available port)
let config = RestAdapterConfig {
port: 0,
..Default::default()
};
let server = RestAdapter::new(backend, config);
let server_handle = server.start().await.unwrap();
// Get the actual port assigned by OS
let actual_port = server_handle.port();
let server_url = format!("http://127.0.0.1:{}", actual_port);
Self {
_temp_dir: temp_dir,
server_handle,
server_url,
}
}
}
impl Drop for RestServerFixture {
fn drop(&mut self) {
self.server_handle.shutdown();
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_remote_database_with_rest_adapter() {
use lance_namespace::models::CreateNamespaceRequest;
let fixture = RestServerFixture::new().await;
// Connect to the REST server using lancedb Connection
// Use db://dummy as URI and set actual server URL via host_override
let conn = ConnectBuilder::new("db://dummy")
.api_key("test-api-key")
.region("us-east-1")
.host_override(&fixture.server_url)
.execute()
.await
.unwrap();
// Create a child namespace first
let namespace = vec!["test_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
})
.await
.expect("Failed to create namespace");
// Create a table in the child namespace
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
let data = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(Int32Array::from(vec![1, 2, 3]))],
)
.unwrap();
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
let table = conn
.create_table("test_table", reader)
.namespace(namespace.clone())
.execute()
.await;
assert!(table.is_ok(), "Failed to create table: {:?}", table.err());
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.expect("Failed to list tables");
assert_eq!(list_response.tables, vec!["test_table"]);
// Get namespace client and verify it can also list tables
let namespace_client = conn.namespace_client().await.unwrap();
let list_response = namespace_client
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.unwrap();
assert_eq!(list_response.tables, vec!["test_table"]);
// Open the table from the child namespace
let opened_table = conn
.open_table("test_table")
.namespace(namespace.clone())
.execute()
.await;
assert!(
opened_table.is_ok(),
"Failed to open table: {:?}",
opened_table.err()
);
assert_eq!(opened_table.unwrap().name(), "test_table");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn test_remote_database_with_multiple_tables() {
use lance_namespace::models::CreateNamespaceRequest;
let fixture = RestServerFixture::new().await;
// Connect to the REST server
// Use db://dummy as URI and set actual server URL via host_override
let conn = ConnectBuilder::new("db://dummy")
.api_key("test-api-key")
.region("us-east-1")
.host_override(&fixture.server_url)
.execute()
.await
.unwrap();
// Create a child namespace first
let namespace = vec!["multi_table_ns".to_string()];
conn.create_namespace(CreateNamespaceRequest {
id: Some(namespace.clone()),
mode: None,
properties: None,
})
.await
.expect("Failed to create namespace");
// Create multiple tables in the child namespace
let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
for i in 1..=3 {
let data =
RecordBatch::try_new(schema.clone(), vec![Arc::new(Int32Array::from(vec![i]))])
.unwrap();
let reader = RecordBatchIterator::new([Ok(data.clone())], schema.clone());
conn.create_table(format!("table{}", i), reader)
.namespace(namespace.clone())
.execute()
.await
.unwrap_or_else(|e| panic!("Failed to create table{}: {:?}", i, e));
}
// List tables in the child namespace
let list_response = conn
.list_tables(ListTablesRequest {
id: Some(namespace.clone()),
page_token: None,
limit: None,
})
.await
.unwrap();
assert_eq!(list_response.tables.len(), 3);
assert!(list_response.tables.contains(&"table1".to_string()));
assert!(list_response.tables.contains(&"table2".to_string()));
assert!(list_response.tables.contains(&"table3".to_string()));
}
}
}

View File

@@ -29,7 +29,7 @@ use lance::dataset::{
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
use lance::index::vector::utils::infer_vector_dim;
use lance::index::vector::VectorIndexParams;
use lance::io::WrappingObjectStore;
use lance::io::{ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
use lance_datafusion::utils::StreamingWriteSource;
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
@@ -40,6 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt;
use lance_index::IndexType;
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
QueryTableRequestVector, StringFtsQuery,
@@ -1611,6 +1612,105 @@ impl NativeTable {
self
}
/// Opens an existing Table using a namespace client.
///
/// This method uses `DatasetBuilder::from_namespace` to open the table, which
/// automatically fetches the table location and storage options from the namespace.
/// This eliminates the need to pre-fetch and merge storage options before opening.
///
/// # Arguments
///
/// * `namespace_client` - The namespace client to use for fetching table metadata
/// * `name` - The table name
/// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
/// * `write_store_wrapper` - Optional wrapper for the object store on write path
/// * `params` - Optional read parameters
/// * `read_consistency_interval` - Optional interval for read consistency
/// * `server_side_query_enabled` - Whether to enable server-side query execution.
/// When true, the namespace_client will be stored and queries will be executed
/// on the namespace server. When false, the namespace is only used for opening
/// the table, and queries are executed locally.
/// * `session` - Optional session for object stores and caching
///
/// # Returns
///
/// * A [NativeTable] object.
#[allow(clippy::too_many_arguments)]
pub async fn open_from_namespace(
namespace_client: Arc<dyn LanceNamespace>,
name: &str,
namespace: Vec<String>,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<ReadParams>,
read_consistency_interval: Option<std::time::Duration>,
server_side_query_enabled: bool,
session: Option<Arc<lance::session::Session>>,
) -> Result<Self> {
let mut params = params.unwrap_or_default();
// Set the session in read params
if let Some(sess) = session {
params.session(sess);
}
// patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
None => params,
};
// Build table_id from namespace + name
let mut table_id = namespace.clone();
table_id.push(name.to_string());
// Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace
let builder = DatasetBuilder::from_namespace(
namespace_client.clone(),
table_id,
false, // Don't ignore namespace storage options
)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder
.with_read_params(params)
.load()
.await
.map_err(|e| match e {
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
name: name.to_string(),
source: Box::new(e),
},
source => Error::Lance { source },
})?;
let uri = dataset.uri().to_string();
let dataset = DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval);
let id = Self::build_id(&namespace, name);
let stored_namespace_client = if server_side_query_enabled {
Some(namespace_client)
} else {
None
};
Ok(Self {
name: name.to_string(),
namespace,
id,
uri,
dataset,
read_consistency_interval,
namespace_client: stored_namespace_client,
})
}
fn get_table_name(uri: &str) -> Result<String> {
let path = Path::new(uri);
let name = path
@@ -1722,6 +1822,102 @@ impl NativeTable {
.await
}
/// Creates a new Table using a namespace client for storage options.
///
/// This method sets up a `StorageOptionsProvider` from the namespace client,
/// enabling automatic credential refresh for cloud storage. The namespace
/// is used for:
/// 1. Setting up storage options provider for credential vending
/// 2. Optionally enabling server-side query execution
///
/// # Arguments
///
/// * `namespace_client` - The namespace client to use for storage options
/// * `uri` - The URI to the table (obtained from create_empty_table response)
/// * `name` - The table name
/// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
/// * `batches` - RecordBatch to be saved in the database
/// * `write_store_wrapper` - Optional wrapper for the object store on write path
/// * `params` - Optional write parameters
/// * `read_consistency_interval` - Optional interval for read consistency
/// * `server_side_query_enabled` - Whether to enable server-side query execution
///
/// # Returns
///
/// * A [NativeTable] object.
#[allow(clippy::too_many_arguments)]
pub async fn create_from_namespace(
namespace_client: Arc<dyn LanceNamespace>,
uri: &str,
name: &str,
namespace: Vec<String>,
batches: impl StreamingWriteSource,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>,
read_consistency_interval: Option<std::time::Duration>,
server_side_query_enabled: bool,
session: Option<Arc<lance::session::Session>>,
) -> Result<Self> {
// Build table_id from namespace + name for the storage options provider
let mut table_id = namespace.clone();
table_id.push(name.to_string());
// Set up storage options provider from namespace
let storage_options_provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
namespace_client.clone(),
table_id,
));
// Start with provided params or defaults
let mut params = params.unwrap_or_default();
// Set the session in write params
if let Some(sess) = session {
params.session = Some(sess);
}
// Ensure store_params exists and set the storage options provider
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
store_params.storage_options_provider = Some(storage_options_provider);
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
None => params,
};
let insert_builder = InsertBuilder::new(uri).with_params(&params);
let dataset = insert_builder
.execute_stream(batches)
.await
.map_err(|e| match e {
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
name: name.to_string(),
},
source => Error::Lance { source },
})?;
let id = Self::build_id(&namespace, name);
let stored_namespace_client = if server_side_query_enabled {
Some(namespace_client)
} else {
None
};
Ok(Self {
name: name.to_string(),
namespace,
id,
uri: uri.to_string(),
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
read_consistency_interval,
namespace_client: stored_namespace_client,
})
}
async fn optimize_indices(&self, options: &OptimizeOptions) -> Result<()> {
info!("LanceDB: optimizing indices: {:?}", options);
self.dataset