Compare commits

..

6 Commits

Author SHA1 Message Date
Xuanwo
523030aa2f ci: Auto fix CI if bump failed 2025-12-24 20:06:36 +08:00
Prashanth Rao
8ae4f42fbe fix: add to_lance() and to_polars() stub methods for type-checkers (#2876)
Adds `Table.to_lance()` and `Table.to_polars()` methods (non-abstract
methods, defaulting to `NotImplementedError`) so type checkers like
mypy, pyright and ty don’t flag them as unknown attributes on `Table`.
Not making these abstract methods should keep existing remote/other
`Table` implementations instantiable.

This is non-breaking change to existing functionality and is purely for
the purpose of pleasing static type-checkers like mypy, ty and pyright.

<img width="626" height="134" alt="image"
src="https://github.com/user-attachments/assets/f4619bca-a882-432b-bd23-ae8f189ff9e3"
/>
2025-12-18 12:55:07 -05:00
Lance Release
0667fa38d4 Bump version: 0.23.1-beta.0 → 0.23.1-beta.1 2025-12-17 06:59:29 +00:00
Lance Release
30108c0b1f Bump version: 0.26.1-beta.0 → 0.26.1-beta.1 2025-12-17 06:58:52 +00:00
Jack Ye
1628f7e3f3 fix: pass namespace storage options provider into native table (#2873)
Previously the native table is created with static credentials and could
not auto-refresh credentials when expired.
2025-12-16 22:58:04 -08:00
Lance Release
2fd712312f Bump version: 0.23.0 → 0.23.1-beta.0 2025-12-17 03:30:51 +00:00
23 changed files with 576 additions and 178 deletions

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.23.0"
current_version = "0.23.1-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

297
.github/workflows/codex-autofix-ci.yml vendored Normal file
View File

@@ -0,0 +1,297 @@
name: Codex Autofix CI
on:
check_suite:
types:
- completed
permissions:
contents: write
pull-requests: write
actions: read
concurrency:
group: codex-autofix-${{ github.event.check_suite.head_branch }}
cancel-in-progress: false
jobs:
autofix:
runs-on: ubuntu-latest
steps:
- name: Resolve PR and failing required checks
id: ctx
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
REPO: ${{ github.repository }}
SHA: ${{ github.event.check_suite.head_sha }}
HEAD_BRANCH: ${{ github.event.check_suite.head_branch }}
MAX_ATTEMPTS: "3"
run: |
set -euo pipefail
echo "Repository: $REPO"
echo "head_branch: $HEAD_BRANCH"
echo "head_sha: $SHA"
if [[ "$HEAD_BRANCH" != codex/update-lance-* ]]; then
echo "Skip: branch '$HEAD_BRANCH' does not match codex/update-lance-*"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
prs_json="$(gh api -H "Accept: application/vnd.github+json" "repos/$REPO/commits/$SHA/pulls")"
pr_json="$(echo "$prs_json" | jq -c '[.[] | select(.state=="open")] | .[0]')"
if [[ -z "$pr_json" || "$pr_json" == "null" ]]; then
echo "Skip: no open PR found for sha $SHA"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
pr_number="$(echo "$pr_json" | jq -r '.number')"
head_ref="$(echo "$pr_json" | jq -r '.head.ref')"
head_repo="$(echo "$pr_json" | jq -r '.head.repo.full_name')"
pr_head_sha="$(echo "$pr_json" | jq -r '.head.sha')"
if [[ "$head_repo" != "$REPO" ]]; then
echo "Skip: cross-repo PR ($head_repo != $REPO)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
if [[ "$pr_head_sha" != "$SHA" ]]; then
echo "Skip: stale check_suite event (pr head sha $pr_head_sha != event sha $SHA)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
set +e
checks_json="$(gh pr checks "$pr_number" --required --repo "$REPO" --json name,state,bucket,link,workflow)"
checks_rc=$?
set -e
if [[ "$checks_rc" -eq 8 ]]; then
echo "Skip: required checks still pending"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
if [[ "$checks_rc" -ne 0 ]]; then
echo "Skip: failed to query required checks (exit=$checks_rc)"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
fail_count="$(echo "$checks_json" | jq '[.[] | select(.bucket=="fail")] | length')"
if [[ "$fail_count" -eq 0 ]]; then
echo "Skip: no failing required checks"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
comments_json="$(gh api "repos/$REPO/issues/$pr_number/comments" --paginate)"
stopped_count="$(echo "$comments_json" | jq '[.[].body | select(test("<!-- codex-autofix stopped -->"))] | length')"
if [[ "$stopped_count" -gt 0 ]]; then
echo "Skip: codex-autofix already stopped for this PR"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
prior_attempts="$(echo "$comments_json" | jq '[.[].body | select(test("<!-- codex-autofix attempt:"))] | length')"
attempt="$((prior_attempts + 1))"
if [[ "$attempt" -gt "$MAX_ATTEMPTS" ]]; then
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf '%s\n' '<!-- codex-autofix stopped -->'
printf '<!-- codex-autofix attempt: %s -->\n' "$attempt"
printf 'Codex autofix stopped: reached max attempts (%s).\n\n' "$MAX_ATTEMPTS"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$SHA"
printf -- '- head_ref: `%s`\n' "$head_ref"
} >"$comment_file"
gh pr comment "$pr_number" --repo "$REPO" --body-file "$comment_file"
echo "needs_fix=false" >> "$GITHUB_OUTPUT"
exit 0
fi
evidence_file="$(mktemp /tmp/codex-autofix-evidence.XXXXXX.txt)"
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
{
echo "PR: #$pr_number"
echo "head_ref: $head_ref"
echo "head_sha: $SHA"
echo "Run: $run_url"
echo ""
echo "Failing required checks:"
echo "$checks_json" | jq -r '.[] | select(.bucket=="fail") | "- \(.name) (\(.workflow // "unknown")): \(.link // "n/a")"'
echo ""
} > "$evidence_file"
while IFS= read -r row; do
name="$(echo "$row" | jq -r '.name')"
link="$(echo "$row" | jq -r '.link // empty')"
workflow="$(echo "$row" | jq -r '.workflow // "unknown"')"
{
echo "================================================================================"
echo "CHECK: $name"
echo "WORKFLOW: $workflow"
echo "LINK: ${link:-n/a}"
} >> "$evidence_file"
run_id=""
if [[ -n "$link" ]]; then
run_id="$(echo "$link" | sed -n 's#.*actions/runs/\\([0-9][0-9]*\\).*#\\1#p' | head -n 1 || true)"
fi
if [[ -z "$run_id" ]]; then
echo "LOGS: unavailable (no run id found in link)" >> "$evidence_file"
echo "" >> "$evidence_file"
continue
fi
echo "LOGS: gh run view $run_id --log-failed (tail -c 20000)" >> "$evidence_file"
set +e
gh run view "$run_id" --repo "$REPO" --log-failed 2>/dev/null | tail -c 20000 >> "$evidence_file"
echo "" >> "$evidence_file"
set -e
done < <(echo "$checks_json" | jq -c '.[] | select(.bucket=="fail")')
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf '<!-- codex-autofix attempt: %s -->\n' "$attempt"
printf 'Starting Codex autofix attempt %s.\n\n' "$attempt"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$SHA"
printf -- '- head_ref: `%s`\n' "$head_ref"
printf -- '- Failing required checks: %s\n' "$fail_count"
} >"$comment_file"
gh pr comment "$pr_number" --repo "$REPO" --body-file "$comment_file"
{
echo "needs_fix=true"
echo "pr_number=$pr_number"
echo "head_ref=$head_ref"
echo "attempt=$attempt"
echo "evidence_file=$evidence_file"
} >> "$GITHUB_OUTPUT"
- name: Checkout PR branch
if: steps.ctx.outputs.needs_fix == 'true'
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ steps.ctx.outputs.head_ref }}
token: ${{ secrets.ROBOT_TOKEN }}
persist-credentials: true
- name: Configure git
if: steps.ctx.outputs.needs_fix == 'true'
run: |
git config user.name "lancedb automation"
git config user.email "robot@lancedb.com"
- name: Set up Node.js
if: steps.ctx.outputs.needs_fix == 'true'
uses: actions/setup-node@v4
with:
node-version: 20
- name: Install Codex CLI
if: steps.ctx.outputs.needs_fix == 'true'
run: npm install -g @openai/codex
- name: Install Rust toolchain
if: steps.ctx.outputs.needs_fix == 'true'
uses: dtolnay/rust-toolchain@stable
with:
toolchain: stable
components: clippy, rustfmt
- name: Install system dependencies
if: steps.ctx.outputs.needs_fix == 'true'
run: |
sudo apt-get update
sudo apt-get install -y protobuf-compiler libssl-dev
- name: Run Codex to fix failing CI
if: steps.ctx.outputs.needs_fix == 'true'
env:
PR_NUMBER: ${{ steps.ctx.outputs.pr_number }}
HEAD_REF: ${{ steps.ctx.outputs.head_ref }}
ATTEMPT: ${{ steps.ctx.outputs.attempt }}
EVIDENCE_FILE: ${{ steps.ctx.outputs.evidence_file }}
GITHUB_TOKEN: ${{ secrets.ROBOT_TOKEN }}
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
OPENAI_API_KEY: ${{ secrets.CODEX_TOKEN }}
run: |
set -euo pipefail
prompt_file="/tmp/codex-prompt.txt"
{
printf 'You are running inside the lancedb repository on a GitHub Actions runner.\n'
printf 'Your task is to fix failing required CI checks for pull request #%s on branch %s.\n\n' "$PR_NUMBER" "$HEAD_REF"
printf 'Goal:\n'
printf -- '- Make the smallest change necessary so that all required checks pass.\n\n'
printf 'Evidence (generated from GitHub checks and logs):\n'
printf '---\n'
cat "${EVIDENCE_FILE}"
printf '\n---\n\n'
printf 'Follow these steps exactly:\n'
printf '1. Identify the root cause from the evidence and repository state.\n'
printf '2. Make changes to fix the failures.\n'
printf '3. Run the relevant local commands that correspond to the failing checks until they succeed.\n'
printf ' If unsure, start with:\n'
printf ' - cargo fmt --all -- --check\n'
printf ' - cargo clippy --profile ci --workspace --tests --all-features -- -D warnings\n'
printf '4. Ensure the repository is clean except for intentional changes (git status --short, git diff).\n'
printf '5. Create a commit with message "fix: codex autofix (attempt %s)".\n' "$ATTEMPT"
printf '6. Push to origin branch "%s" (use --force-with-lease only if required).\n' "$HEAD_REF"
printf '7. Print the commands you ran and their results, plus git status --short and git log -1 --oneline.\n\n'
printf 'Constraints:\n'
printf -- '- Do not create a new pull request.\n'
printf -- '- Do not merge.\n'
printf -- '- Avoid modifying GitHub workflow files unless strictly required to fix CI for this PR.\n'
} >"$prompt_file"
printenv OPENAI_API_KEY | codex login --with-api-key
codex --config shell_environment_policy.ignore_default_excludes=true exec --dangerously-bypass-approvals-and-sandbox "$(cat "$prompt_file")"
- name: Ensure branch is pushed
if: steps.ctx.outputs.needs_fix == 'true'
env:
HEAD_REF: ${{ steps.ctx.outputs.head_ref }}
run: |
set -euo pipefail
if git diff --quiet && git diff --cached --quiet; then
echo "Working tree clean."
else
git add -A
git commit -m "fix: codex autofix (post-run)" || true
fi
git push origin "HEAD:${HEAD_REF}" --force-with-lease
- name: Comment result
if: steps.ctx.outputs.needs_fix == 'true'
env:
GH_TOKEN: ${{ secrets.ROBOT_TOKEN }}
REPO: ${{ github.repository }}
PR_NUMBER: ${{ steps.ctx.outputs.pr_number }}
ATTEMPT: ${{ steps.ctx.outputs.attempt }}
run: |
set -euo pipefail
run_url="${GITHUB_SERVER_URL}/${REPO}/actions/runs/${GITHUB_RUN_ID}"
sha="$(git rev-parse HEAD)"
summary="$(git log -1 --oneline || true)"
status="$(git status --short || true)"
comment_file="$(mktemp /tmp/codex-autofix-comment.XXXXXX.md)"
{
printf 'Codex autofix attempt %s finished.\n\n' "$ATTEMPT"
printf -- '- Run: %s\n' "$run_url"
printf -- '- head_sha: `%s`\n' "$sha"
printf -- '- Last commit: %s\n\n' "$summary"
printf '```\n%s\n```\n' "$status"
} >"$comment_file"
gh pr comment "$PR_NUMBER" --repo "$REPO" --body-file "$comment_file"

6
Cargo.lock generated
View File

@@ -4970,7 +4970,7 @@ dependencies = [
[[package]]
name = "lancedb"
version = "0.23.0"
version = "0.23.1-beta.1"
dependencies = [
"ahash",
"anyhow",
@@ -5049,7 +5049,7 @@ dependencies = [
[[package]]
name = "lancedb-nodejs"
version = "0.23.0"
version = "0.23.1-beta.1"
dependencies = [
"arrow-array",
"arrow-ipc",
@@ -5069,7 +5069,7 @@ dependencies = [
[[package]]
name = "lancedb-python"
version = "0.26.0"
version = "0.26.1-beta.1"
dependencies = [
"arrow",
"async-trait",

View File

@@ -14,7 +14,7 @@ Add the following dependency to your `pom.xml`:
<dependency>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-core</artifactId>
<version>0.23.0</version>
<version>0.23.1-beta.1</version>
</dependency>
```

View File

@@ -8,7 +8,7 @@
<parent>
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.0-final.0</version>
<version>0.23.1-beta.1</version>
<relativePath>../pom.xml</relativePath>
</parent>

View File

@@ -6,7 +6,7 @@
<groupId>com.lancedb</groupId>
<artifactId>lancedb-parent</artifactId>
<version>0.23.0-final.0</version>
<version>0.23.1-beta.1</version>
<packaging>pom</packaging>
<name>${project.artifactId}</name>
<description>LanceDB Java SDK Parent POM</description>

View File

@@ -1,7 +1,7 @@
[package]
name = "lancedb-nodejs"
edition.workspace = true
version = "0.23.0"
version = "0.23.1-beta.1"
license.workspace = true
description.workspace = true
repository.workspace = true

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-arm64",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["darwin"],
"cpu": ["arm64"],
"main": "lancedb.darwin-arm64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-darwin-x64",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["darwin"],
"cpu": ["x64"],
"main": "lancedb.darwin-x64.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-gnu",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-arm64-musl",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["arm64"],
"main": "lancedb.linux-arm64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-gnu",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-gnu.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-linux-x64-musl",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["linux"],
"cpu": ["x64"],
"main": "lancedb.linux-x64-musl.node",

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-arm64-msvc",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": [
"win32"
],

View File

@@ -1,6 +1,6 @@
{
"name": "@lancedb/lancedb-win32-x64-msvc",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"os": ["win32"],
"cpu": ["x64"],
"main": "lancedb.win32-x64-msvc.node",

View File

@@ -1,12 +1,12 @@
{
"name": "@lancedb/lancedb",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@lancedb/lancedb",
"version": "0.23.0",
"version": "0.23.1-beta.1",
"cpu": [
"x64",
"arm64"

View File

@@ -11,7 +11,7 @@
"ann"
],
"private": false,
"version": "0.23.0",
"version": "0.23.1-beta.1",
"main": "dist/index.js",
"exports": {
".": "./dist/index.js",

View File

@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.26.1-beta.0"
current_version = "0.26.1-beta.1"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb-python"
version = "0.26.1-beta.0"
version = "0.26.1-beta.1"
edition.workspace = true
description = "Python bindings for LanceDB"
license.workspace = true

View File

@@ -684,6 +684,24 @@ class Table(ABC):
"""
raise NotImplementedError
def to_lance(self, **kwargs) -> lance.LanceDataset:
"""Return the table as a lance.LanceDataset.
Returns
-------
lance.LanceDataset
"""
raise NotImplementedError
def to_polars(self, **kwargs) -> "pl.DataFrame":
"""Return the table as a polars.DataFrame.
Returns
-------
polars.DataFrame
"""
raise NotImplementedError
def create_index(
self,
metric="l2",

View File

@@ -1,6 +1,6 @@
[package]
name = "lancedb"
version = "0.23.0"
version = "0.23.1-beta.1"
edition.workspace = true
description = "LanceDB: A serverless, low-latency vector database for AI applications"
license.workspace = true

View File

@@ -7,7 +7,6 @@ use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
use lance_namespace::{
models::{
CreateEmptyTableRequest, CreateNamespaceRequest, CreateNamespaceResponse,
@@ -19,13 +18,13 @@ use lance_namespace::{
};
use lance_namespace_impls::ConnectBuilder;
use crate::connection::ConnectRequest;
use crate::database::ReadConsistency;
use crate::error::{Error, Result};
use crate::table::NativeTable;
use super::{
listing::ListingDatabase, BaseTable, CloneTableRequest, CreateTableMode,
CreateTableRequest as DbCreateTableRequest, Database, OpenTableRequest, TableNamesRequest,
BaseTable, CloneTableRequest, CreateTableMode, CreateTableRequest as DbCreateTableRequest,
Database, OpenTableRequest, TableNamesRequest,
};
/// A database implementation that uses lance-namespace for table management
@@ -90,51 +89,6 @@ impl std::fmt::Display for LanceNamespaceDatabase {
}
}
impl LanceNamespaceDatabase {
/// Create a temporary listing database for the given location
///
/// Merges storage options with priority: connection < user < namespace
async fn create_listing_database(
&self,
location: &str,
table_id: Vec<String>,
user_storage_options: Option<&HashMap<String, String>>,
response_storage_options: Option<&HashMap<String, String>>,
) -> Result<ListingDatabase> {
// Merge storage options: connection < user < namespace
let mut merged_storage_options = self.storage_options.clone();
if let Some(opts) = user_storage_options {
merged_storage_options.extend(opts.clone());
}
if let Some(opts) = response_storage_options {
merged_storage_options.extend(opts.clone());
}
let request = ConnectRequest {
uri: location.to_string(),
#[cfg(feature = "remote")]
client_config: Default::default(),
options: merged_storage_options,
read_consistency_interval: self.read_consistency_interval,
session: self.session.clone(),
};
let mut listing_db = ListingDatabase::connect_with_options(&request).await?;
// Create storage options provider only if namespace returned storage options
// (not just user-provided options)
if response_storage_options.is_some() {
let provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
self.namespace.clone(),
table_id,
)) as Arc<dyn StorageOptionsProvider>;
listing_db.storage_options_provider = Some(provider);
}
Ok(listing_db)
}
}
#[async_trait]
impl Database for LanceNamespaceDatabase {
fn uri(&self) -> &str {
@@ -195,14 +149,6 @@ impl Database for LanceNamespaceDatabase {
}
async fn create_table(&self, request: DbCreateTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.write_options
.lance_write_params
.as_ref()
.and_then(|lwp| lwp.store_params.as_ref())
.and_then(|sp| sp.storage_options.as_ref());
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
@@ -235,34 +181,20 @@ impl Database for LanceNamespaceDatabase {
}
}
CreateTableMode::ExistOk(_) => {
if let Ok(response) = describe_result {
let location = response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from namespace response".to_string(),
})?;
if describe_result.is_ok() {
let native_table = NativeTable::open_from_namespace(
self.namespace.clone(),
&request.name,
request.namespace.clone(),
None,
None,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
response.storage_options.as_ref(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
return listing_db
.open_table(OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
index_cache_size: None,
lance_read_params: None,
location: Some(location),
namespace_client,
})
.await;
return Ok(Arc::new(native_table));
}
}
}
@@ -294,82 +226,37 @@ impl Database for LanceNamespaceDatabase {
message: "Table location is missing from create_empty_table response".to_string(),
})?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
create_empty_response.storage_options.as_ref(),
)
.await?;
let native_table = NativeTable::create_from_namespace(
self.namespace.clone(),
&location,
&request.name,
request.namespace.clone(),
request.data,
None, // write_store_wrapper not used for namespace connections
request.write_options.lance_write_params,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let create_request = DbCreateTableRequest {
name: request.name,
namespace: request.namespace,
data: request.data,
mode: request.mode,
write_options: request.write_options,
location: Some(location),
namespace_client,
};
listing_db.create_table(create_request).await
Ok(Arc::new(native_table))
}
async fn open_table(&self, request: OpenTableRequest) -> Result<Arc<dyn BaseTable>> {
// Extract user-provided storage options from request
let user_storage_options = request
.lance_read_params
.as_ref()
.and_then(|lrp| lrp.store_options.as_ref())
.and_then(|so| so.storage_options.as_ref());
let native_table = NativeTable::open_from_namespace(
self.namespace.clone(),
&request.name,
request.namespace.clone(),
None, // write_store_wrapper not used for namespace connections
request.lance_read_params,
self.read_consistency_interval,
self.server_side_query_enabled,
self.session.clone(),
)
.await?;
let mut table_id = request.namespace.clone();
table_id.push(request.name.clone());
let describe_request = DescribeTableRequest {
id: Some(table_id.clone()),
version: None,
};
let response = self
.namespace
.describe_table(describe_request)
.await
.map_err(|e| Error::Runtime {
message: format!("Failed to describe table: {}", e),
})?;
let location = response.location.ok_or_else(|| Error::Runtime {
message: "Table location is missing from namespace response".to_string(),
})?;
let listing_db = self
.create_listing_database(
&location,
table_id.clone(),
user_storage_options,
response.storage_options.as_ref(),
)
.await?;
let namespace_client = self
.server_side_query_enabled
.then(|| self.namespace.clone());
let open_request = OpenTableRequest {
name: request.name.clone(),
namespace: request.namespace.clone(),
index_cache_size: request.index_cache_size,
lance_read_params: request.lance_read_params,
location: Some(location),
namespace_client,
};
listing_db.open_table(open_request).await
Ok(Arc::new(native_table))
}
async fn clone_table(&self, _request: CloneTableRequest) -> Result<Arc<dyn BaseTable>> {

View File

@@ -29,7 +29,7 @@ use lance::dataset::{
use lance::dataset::{MergeInsertBuilder as LanceMergeInsertBuilder, WhenNotMatchedBySource};
use lance::index::vector::utils::infer_vector_dim;
use lance::index::vector::VectorIndexParams;
use lance::io::WrappingObjectStore;
use lance::io::{ObjectStoreParams, WrappingObjectStore};
use lance_datafusion::exec::{analyze_plan as lance_analyze_plan, execute_plan};
use lance_datafusion::utils::StreamingWriteSource;
use lance_index::scalar::{BuiltinIndexType, ScalarIndexParams};
@@ -40,6 +40,7 @@ use lance_index::vector::pq::PQBuildParams;
use lance_index::vector::sq::builder::SQBuildParams;
use lance_index::DatasetIndexExt;
use lance_index::IndexType;
use lance_io::object_store::LanceNamespaceStorageOptionsProvider;
use lance_namespace::models::{
QueryTableRequest as NsQueryTableRequest, QueryTableRequestFullTextQuery,
QueryTableRequestVector, StringFtsQuery,
@@ -1611,6 +1612,105 @@ impl NativeTable {
self
}
/// Opens an existing Table using a namespace client.
///
/// This method uses `DatasetBuilder::from_namespace` to open the table, which
/// automatically fetches the table location and storage options from the namespace.
/// This eliminates the need to pre-fetch and merge storage options before opening.
///
/// # Arguments
///
/// * `namespace_client` - The namespace client to use for fetching table metadata
/// * `name` - The table name
/// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
/// * `write_store_wrapper` - Optional wrapper for the object store on write path
/// * `params` - Optional read parameters
/// * `read_consistency_interval` - Optional interval for read consistency
/// * `server_side_query_enabled` - Whether to enable server-side query execution.
/// When true, the namespace_client will be stored and queries will be executed
/// on the namespace server. When false, the namespace is only used for opening
/// the table, and queries are executed locally.
/// * `session` - Optional session for object stores and caching
///
/// # Returns
///
/// * A [NativeTable] object.
#[allow(clippy::too_many_arguments)]
pub async fn open_from_namespace(
namespace_client: Arc<dyn LanceNamespace>,
name: &str,
namespace: Vec<String>,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<ReadParams>,
read_consistency_interval: Option<std::time::Duration>,
server_side_query_enabled: bool,
session: Option<Arc<lance::session::Session>>,
) -> Result<Self> {
let mut params = params.unwrap_or_default();
// Set the session in read params
if let Some(sess) = session {
params.session(sess);
}
// patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
None => params,
};
// Build table_id from namespace + name
let mut table_id = namespace.clone();
table_id.push(name.to_string());
// Use DatasetBuilder::from_namespace which automatically fetches location
// and storage options from the namespace
let builder = DatasetBuilder::from_namespace(
namespace_client.clone(),
table_id,
false, // Don't ignore namespace storage options
)
.await
.map_err(|e| match e {
lance::Error::Namespace { source, .. } => Error::Runtime {
message: format!("Failed to get table info from namespace: {:?}", source),
},
source => Error::Lance { source },
})?;
let dataset = builder
.with_read_params(params)
.load()
.await
.map_err(|e| match e {
lance::Error::DatasetNotFound { .. } => Error::TableNotFound {
name: name.to_string(),
source: Box::new(e),
},
source => Error::Lance { source },
})?;
let uri = dataset.uri().to_string();
let dataset = DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval);
let id = Self::build_id(&namespace, name);
let stored_namespace_client = if server_side_query_enabled {
Some(namespace_client)
} else {
None
};
Ok(Self {
name: name.to_string(),
namespace,
id,
uri,
dataset,
read_consistency_interval,
namespace_client: stored_namespace_client,
})
}
fn get_table_name(uri: &str) -> Result<String> {
let path = Path::new(uri);
let name = path
@@ -1722,6 +1822,102 @@ impl NativeTable {
.await
}
/// Creates a new Table using a namespace client for storage options.
///
/// This method sets up a `StorageOptionsProvider` from the namespace client,
/// enabling automatic credential refresh for cloud storage. The namespace
/// is used for:
/// 1. Setting up storage options provider for credential vending
/// 2. Optionally enabling server-side query execution
///
/// # Arguments
///
/// * `namespace_client` - The namespace client to use for storage options
/// * `uri` - The URI to the table (obtained from create_empty_table response)
/// * `name` - The table name
/// * `namespace` - The namespace path (e.g., vec!["parent", "child"])
/// * `batches` - RecordBatch to be saved in the database
/// * `write_store_wrapper` - Optional wrapper for the object store on write path
/// * `params` - Optional write parameters
/// * `read_consistency_interval` - Optional interval for read consistency
/// * `server_side_query_enabled` - Whether to enable server-side query execution
///
/// # Returns
///
/// * A [NativeTable] object.
#[allow(clippy::too_many_arguments)]
pub async fn create_from_namespace(
namespace_client: Arc<dyn LanceNamespace>,
uri: &str,
name: &str,
namespace: Vec<String>,
batches: impl StreamingWriteSource,
write_store_wrapper: Option<Arc<dyn WrappingObjectStore>>,
params: Option<WriteParams>,
read_consistency_interval: Option<std::time::Duration>,
server_side_query_enabled: bool,
session: Option<Arc<lance::session::Session>>,
) -> Result<Self> {
// Build table_id from namespace + name for the storage options provider
let mut table_id = namespace.clone();
table_id.push(name.to_string());
// Set up storage options provider from namespace
let storage_options_provider = Arc::new(LanceNamespaceStorageOptionsProvider::new(
namespace_client.clone(),
table_id,
));
// Start with provided params or defaults
let mut params = params.unwrap_or_default();
// Set the session in write params
if let Some(sess) = session {
params.session = Some(sess);
}
// Ensure store_params exists and set the storage options provider
let store_params = params
.store_params
.get_or_insert_with(ObjectStoreParams::default);
store_params.storage_options_provider = Some(storage_options_provider);
// Patch the params if we have a write store wrapper
let params = match write_store_wrapper.clone() {
Some(wrapper) => params.patch_with_store_wrapper(wrapper)?,
None => params,
};
let insert_builder = InsertBuilder::new(uri).with_params(&params);
let dataset = insert_builder
.execute_stream(batches)
.await
.map_err(|e| match e {
lance::Error::DatasetAlreadyExists { .. } => Error::TableAlreadyExists {
name: name.to_string(),
},
source => Error::Lance { source },
})?;
let id = Self::build_id(&namespace, name);
let stored_namespace_client = if server_side_query_enabled {
Some(namespace_client)
} else {
None
};
Ok(Self {
name: name.to_string(),
namespace,
id,
uri: uri.to_string(),
dataset: DatasetConsistencyWrapper::new_latest(dataset, read_consistency_interval),
read_consistency_interval,
namespace_client: stored_namespace_client,
})
}
async fn optimize_indices(&self, options: &OptimizeOptions) -> Result<()> {
info!("LanceDB: optimizing indices: {:?}", options);
self.dataset