refactor!: consolidate namespace related naming and enterprise integration (#3205)

1. Refactored every client (Rust core, Python, Node/TypeScript) so
“namespace” usage is explicit: code now keeps namespace paths
(namespace_path) separate from namespace clients (namespace_client).
Connections propagate the client, table creation routes through it, and
managed versioning defaults are resolved from namespace metadata. Python
gained LanceNamespaceDBConnection/async counterparts, and the
namespace-focused tests were rewritten to match the clarified API
surface.
2. Synchronized the workspace with Lance 5.0.0-beta.3 (see
https://github.com/lance-format/lance/pull/6186 for the upstream
namespace refactor), updating Cargo/uv lockfiles and ensuring all
bindings align with the new namespace semantics.
3. Added a namespace-backed code path to lancedb.connect() via new
keyword arguments (namespace_client_impl, namespace_client_properties,
plus the existing pushdown-ops flag). When those kwargs are supplied,
connect() delegates to connect_namespace, so users can opt into
namespace clients without changing APIs. (The async helper will gain
parity in a later change)
This commit is contained in:
Jack Ye
2026-04-03 00:09:03 -07:00
committed by GitHub
parent 3ba46135a5
commit e26b22bcca
33 changed files with 2022 additions and 1609 deletions

View File

@@ -17,8 +17,9 @@ use pyo3::{
use pyo3_async_runtimes::tokio::future_into_py;
use crate::{
error::PythonErrorExt, namespace::extract_namespace_arc,
storage_options::py_object_to_storage_options_provider, table::Table,
error::PythonErrorExt,
namespace::{create_namespace_storage_options_provider, extract_namespace_arc},
table::Table,
};
#[pyclass]
@@ -87,16 +88,16 @@ impl Connection {
})
}
#[pyo3(signature = (namespace=vec![], start_after=None, limit=None))]
#[pyo3(signature = (namespace_path=None, start_after=None, limit=None))]
pub fn table_names(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
start_after: Option<String>,
limit: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let mut op = inner.table_names();
op = op.namespace(namespace);
op = op.namespace(namespace_path.unwrap_or_default());
if let Some(start_after) = start_after {
op = op.start_after(start_after);
}
@@ -107,34 +108,43 @@ impl Connection {
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
#[pyo3(signature = (name, mode, data, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
pub fn create_table<'a>(
self_: PyRef<'a, Self>,
name: String,
mode: &str,
data: Bound<'_, PyAny>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
namespace_client: Option<Py<PyAny>>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
let mode = Self::parse_create_mode_str(mode)?;
let batches: Box<dyn arrow::array::RecordBatchReader + Send> =
Box::new(ArrowArrayStreamReader::from_pyarrow_bound(&data)?);
let mut builder = inner.create_table(name, batches).mode(mode);
let ns_path = namespace_path.clone().unwrap_or_default();
let mut builder = inner.create_table(name.clone(), batches).mode(mode);
builder = builder.namespace(namespace);
builder = builder.namespace(ns_path.clone());
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
// Auto-create storage options provider from namespace_client
if let Some(ns_obj) = namespace_client {
let ns_client = extract_namespace_arc(py, ns_obj)?;
// Create table_id by combining namespace_path with table name
let mut table_id = ns_path;
table_id.push(name);
let provider = create_namespace_storage_options_provider(ns_client, table_id);
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
@@ -146,33 +156,44 @@ impl Connection {
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
#[pyo3(signature = (name, mode, schema, namespace_path=None, storage_options=None, location=None, namespace_client=None))]
pub fn create_empty_table<'a>(
self_: PyRef<'a, Self>,
name: String,
mode: &str,
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
location: Option<String>,
namespace_client: Option<Py<PyAny>>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
let mode = Self::parse_create_mode_str(mode)?;
let schema = Schema::from_pyarrow_bound(&schema)?;
let mut builder = inner.create_empty_table(name, Arc::new(schema)).mode(mode);
let ns_path = namespace_path.clone().unwrap_or_default();
let mut builder = inner
.create_empty_table(name.clone(), Arc::new(schema))
.mode(mode);
builder = builder.namespace(namespace);
builder = builder.namespace(ns_path.clone());
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
// Auto-create storage options provider from namespace_client
if let Some(ns_obj) = namespace_client {
let ns_client = extract_namespace_arc(py, ns_obj)?;
// Create table_id by combining namespace_path with table name
let mut table_id = ns_path;
table_id.push(name);
let provider = create_namespace_storage_options_provider(ns_client, table_id);
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
@@ -184,45 +205,44 @@ impl Connection {
}
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None, namespace_client=None, managed_versioning=None))]
#[pyo3(signature = (name, namespace_path=None, storage_options=None, index_cache_size=None, location=None, namespace_client=None, managed_versioning=None))]
pub fn open_table(
self_: PyRef<'_, Self>,
name: String,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<Py<PyAny>>,
index_cache_size: Option<u32>,
location: Option<String>,
namespace_client: Option<Py<PyAny>>,
managed_versioning: Option<bool>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
let mut builder = inner.open_table(name);
builder = builder.namespace(namespace.clone());
let ns_path = namespace_path.clone().unwrap_or_default();
let mut builder = inner.open_table(name.clone());
builder = builder.namespace(ns_path.clone());
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
// Auto-create storage options provider from namespace_client
if let Some(ns_obj) = namespace_client {
let ns_client = extract_namespace_arc(py, ns_obj)?;
// Create table_id by combining namespace_path with table name
let mut table_id = ns_path;
table_id.push(name);
let provider = create_namespace_storage_options_provider(ns_client.clone(), table_id);
builder = builder.storage_options_provider(provider);
builder = builder.namespace_client(ns_client);
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
if let Some(location) = location {
builder = builder.location(location);
}
// Extract namespace client from Python object if provided
let ns_client = if let Some(ns_obj) = namespace_client {
let py = self_.py();
Some(extract_namespace_arc(py, ns_obj)?)
} else {
None
};
if let Some(ns_client) = ns_client {
builder = builder.namespace_client(ns_client);
}
// Pass managed_versioning if provided to avoid redundant describe_table call
if let Some(enabled) = managed_versioning {
builder = builder.managed_versioning(enabled);
@@ -234,12 +254,12 @@ impl Connection {
})
}
#[pyo3(signature = (target_table_name, source_uri, target_namespace=vec![], source_version=None, source_tag=None, is_shallow=true))]
#[pyo3(signature = (target_table_name, source_uri, target_namespace_path=None, source_version=None, source_tag=None, is_shallow=true))]
pub fn clone_table(
self_: PyRef<'_, Self>,
target_table_name: String,
source_uri: String,
target_namespace: Vec<String>,
target_namespace_path: Option<Vec<String>>,
source_version: Option<u64>,
source_tag: Option<String>,
is_shallow: bool,
@@ -247,7 +267,7 @@ impl Connection {
let inner = self_.get_inner()?.clone();
let mut builder = inner.clone_table(target_table_name, source_uri);
builder = builder.target_namespace(target_namespace);
builder = builder.target_namespace(target_namespace_path.unwrap_or_default());
if let Some(version) = source_version {
builder = builder.source_version(version);
}
@@ -262,52 +282,56 @@ impl Connection {
})
}
#[pyo3(signature = (cur_name, new_name, cur_namespace=vec![], new_namespace=vec![]))]
#[pyo3(signature = (cur_name, new_name, cur_namespace_path=None, new_namespace_path=None))]
pub fn rename_table(
self_: PyRef<'_, Self>,
cur_name: String,
new_name: String,
cur_namespace: Vec<String>,
new_namespace: Vec<String>,
cur_namespace_path: Option<Vec<String>>,
new_namespace_path: Option<Vec<String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let cur_ns_path = cur_namespace_path.unwrap_or_default();
let new_ns_path = new_namespace_path.unwrap_or_default();
future_into_py(self_.py(), async move {
inner
.rename_table(cur_name, new_name, &cur_namespace, &new_namespace)
.rename_table(cur_name, new_name, &cur_ns_path, &new_ns_path)
.await
.infer_error()
})
}
#[pyo3(signature = (name, namespace=vec![]))]
#[pyo3(signature = (name, namespace_path=None))]
pub fn drop_table(
self_: PyRef<'_, Self>,
name: String,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let ns_path = namespace_path.unwrap_or_default();
future_into_py(self_.py(), async move {
inner.drop_table(name, &namespace).await.infer_error()
inner.drop_table(name, &ns_path).await.infer_error()
})
}
#[pyo3(signature = (namespace=vec![],))]
#[pyo3(signature = (namespace_path=None,))]
pub fn drop_all_tables(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let ns_path = namespace_path.unwrap_or_default();
future_into_py(self_.py(), async move {
inner.drop_all_tables(&namespace).await.infer_error()
inner.drop_all_tables(&ns_path).await.infer_error()
})
}
// Namespace management methods
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
#[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
pub fn list_namespaces(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
page_token: Option<String>,
limit: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -316,11 +340,7 @@ impl Connection {
future_into_py(py, async move {
use lance_namespace::models::ListNamespacesRequest;
let request = ListNamespacesRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
id: namespace_path,
page_token,
limit: limit.map(|l| l as i32),
..Default::default()
@@ -335,10 +355,10 @@ impl Connection {
})
}
#[pyo3(signature = (namespace, mode=None, properties=None))]
#[pyo3(signature = (namespace_path, mode=None, properties=None))]
pub fn create_namespace(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Vec<String>,
mode: Option<String>,
properties: Option<std::collections::HashMap<String, String>>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -354,11 +374,7 @@ impl Connection {
_ => None,
});
let request = CreateNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
id: Some(namespace_path),
mode: mode_str,
properties,
..Default::default()
@@ -372,10 +388,10 @@ impl Connection {
})
}
#[pyo3(signature = (namespace, mode=None, behavior=None))]
#[pyo3(signature = (namespace_path, mode=None, behavior=None))]
pub fn drop_namespace(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Vec<String>,
mode: Option<String>,
behavior: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -395,11 +411,7 @@ impl Connection {
_ => None,
});
let request = DropNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
id: Some(namespace_path),
mode: mode_str,
behavior: behavior_str,
..Default::default()
@@ -414,21 +426,17 @@ impl Connection {
})
}
#[pyo3(signature = (namespace,))]
#[pyo3(signature = (namespace_path,))]
pub fn describe_namespace(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Vec<String>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
let py = self_.py();
future_into_py(py, async move {
use lance_namespace::models::DescribeNamespaceRequest;
let request = DescribeNamespaceRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
id: Some(namespace_path),
..Default::default()
};
let response = inner.describe_namespace(request).await.infer_error()?;
@@ -440,10 +448,10 @@ impl Connection {
})
}
#[pyo3(signature = (namespace=vec![], page_token=None, limit=None))]
#[pyo3(signature = (namespace_path=None, page_token=None, limit=None))]
pub fn list_tables(
self_: PyRef<'_, Self>,
namespace: Vec<String>,
namespace_path: Option<Vec<String>>,
page_token: Option<String>,
limit: Option<u32>,
) -> PyResult<Bound<'_, PyAny>> {
@@ -452,11 +460,7 @@ impl Connection {
future_into_py(py, async move {
use lance_namespace::models::ListTablesRequest;
let request = ListTablesRequest {
id: if namespace.is_empty() {
None
} else {
Some(namespace)
},
id: namespace_path,
page_token,
limit: limit.map(|l| l as i32),
..Default::default()

View File

@@ -29,7 +29,6 @@ pub mod namespace;
pub mod permutation;
pub mod query;
pub mod session;
pub mod storage_options;
pub mod table;
pub mod util;

View File

@@ -8,6 +8,7 @@ use std::sync::Arc;
use async_trait::async_trait;
use bytes::Bytes;
use lance_io::object_store::{LanceNamespaceStorageOptionsProvider, StorageOptionsProvider};
use lance_namespace::LanceNamespace as LanceNamespaceTrait;
use lance_namespace::models::*;
use pyo3::prelude::*;
@@ -694,3 +695,21 @@ pub fn extract_namespace_arc(
let ns_ref = ns.bind(py);
PyLanceNamespace::create_arc(py, ns_ref)
}
/// Create a LanceNamespaceStorageOptionsProvider from a namespace client and table ID.
///
/// This creates a Rust storage options provider that fetches credentials from the
/// namespace's describe_table() method, enabling automatic credential refresh.
///
/// # Arguments
/// * `namespace_client` - The namespace client (wrapped PyLanceNamespace)
/// * `table_id` - Full table identifier (namespace_path + table_name)
pub fn create_namespace_storage_options_provider(
namespace_client: Arc<dyn LanceNamespaceTrait>,
table_id: Vec<String>,
) -> Arc<dyn StorageOptionsProvider> {
Arc::new(LanceNamespaceStorageOptionsProvider::new(
namespace_client,
table_id,
))
}

View File

@@ -1,137 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! PyO3 bindings for StorageOptionsProvider
//!
//! This module provides the bridge between Python StorageOptionsProvider objects
//! and Rust's StorageOptionsProvider trait, enabling automatic credential refresh.
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::StorageOptionsProvider;
use pyo3::prelude::*;
use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: Py<PyAny>,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::attach(|py| Self {
inner: self.inner.clone_ref(py),
})
}
}
impl PyStorageOptionsProvider {
pub fn new(obj: Py<PyAny>) -> PyResult<Self> {
Python::attach(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self { inner: obj })
})
}
}
/// Wrapper that implements the Rust StorageOptionsProvider trait
pub struct PyStorageOptionsProviderWrapper {
py_provider: PyStorageOptionsProvider,
}
impl PyStorageOptionsProviderWrapper {
pub fn new(py_provider: PyStorageOptionsProvider) -> Self {
Self { py_provider }
}
}
#[async_trait]
impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
async fn fetch_storage_options(&self) -> lance_core::Result<Option<HashMap<String, String>>> {
// Call Python method from async context using spawn_blocking
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::attach(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
.bind(py)
.call_method0("fetch_storage_options")
.map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
"Failed to call fetch_storage_options: {}",
e
)))))?;
// If result is None, return None
if result.is_none() {
return Ok(None);
}
// Extract the result dict - should be a flat Map<String, String>
let result_dict = result.downcast::<PyDict>().map_err(|_| {
lance_core::Error::invalid_input(
"fetch_storage_options() must return a dict of string key-value pairs or None",
)
})?;
// Convert all entries to HashMap<String, String>
let mut storage_options = HashMap::new();
for (key, value) in result_dict.iter() {
let key_str: String = key.extract().map_err(|e| {
lance_core::Error::invalid_input(format!("Storage option key must be a string: {}", e))
})?;
let value_str: String = value.extract().map_err(|e| {
lance_core::Error::invalid_input(format!("Storage option value must be a string: {}", e))
})?;
storage_options.insert(key_str, value_str);
}
Ok(Some(storage_options))
})
})
.await
.map_err(|e| lance_core::Error::io_source(Box::new(std::io::Error::other(format!(
"Task join error: {}",
e
)))))?
}
fn provider_id(&self) -> String {
Python::attach(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")
.and_then(|result| result.extract::<String>())
.unwrap_or_else(|e| {
// If provider_id() fails, construct a fallback ID
format!("PyStorageOptionsProvider(error: {})", e)
})
})
}
}
impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "PyStorageOptionsProviderWrapper({})", self.provider_id())
}
}
/// Convert a Python object to an Arc<dyn StorageOptionsProvider>
///
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: Py<PyAny>,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
}