feat: support namespace credentials vending (#2778)

Based on https://github.com/lancedb/lance/pull/4984

1. Bump to 1.0.0-beta.2
2. Use DirectoryNamespace in lance to perform all testing in python and
rust for much better coverage
3. Refactor `ListingDatabase` to be able to accept location and
namespace. This is because we have to leverage listing database (local
lancedb connection) for using namespace, namespace only resolves the
location and storage options but we don't want to bind all the way to
rust since user will plug-in namespace from python side. And thus
`ListingDatabase` needs to be able to accept location and namespace that
are created from namespace connection.
4. For credentials vending, we also pass storage options provider all
the way to rust layer, and the rust layer calls back to the python
function to fetch next storage option. This is exactly the same thing we
did in pylance.
This commit is contained in:
Jack Ye
2025-11-17 00:42:24 -08:00
committed by GitHub
parent c0cc58c156
commit e47f552a86
27 changed files with 1660 additions and 636 deletions

View File

@@ -10,11 +10,14 @@ use lancedb::{
};
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyRef, PyResult, Python,
pyclass, pyfunction, pymethods, Bound, FromPyObject, Py, PyAny, PyObject, PyRef, PyResult,
Python,
};
use pyo3_async_runtimes::tokio::future_into_py;
use crate::{error::PythonErrorExt, table::Table};
use crate::{
error::PythonErrorExt, storage_options::py_object_to_storage_options_provider, table::Table,
};
#[pyclass]
pub struct Connection {
@@ -101,7 +104,8 @@ impl Connection {
future_into_py(self_.py(), async move { op.execute().await.infer_error() })
}
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None))]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, data, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
pub fn create_table<'a>(
self_: PyRef<'a, Self>,
name: String,
@@ -109,6 +113,8 @@ impl Connection {
data: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -122,6 +128,13 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
@@ -129,7 +142,8 @@ impl Connection {
})
}
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None))]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (name, mode, schema, namespace=vec![], storage_options=None, storage_options_provider=None, location=None))]
pub fn create_empty_table<'a>(
self_: PyRef<'a, Self>,
name: String,
@@ -137,6 +151,8 @@ impl Connection {
schema: Bound<'_, PyAny>,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
location: Option<String>,
) -> PyResult<Bound<'a, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -150,6 +166,13 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;
@@ -157,13 +180,15 @@ impl Connection {
})
}
#[pyo3(signature = (name, namespace=vec![], storage_options = None, index_cache_size = None))]
#[pyo3(signature = (name, namespace=vec![], storage_options = None, storage_options_provider=None, index_cache_size = None, location=None))]
pub fn open_table(
self_: PyRef<'_, Self>,
name: String,
namespace: Vec<String>,
storage_options: Option<HashMap<String, String>>,
storage_options_provider: Option<PyObject>,
index_cache_size: Option<u32>,
location: Option<String>,
) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.get_inner()?.clone();
@@ -172,9 +197,16 @@ impl Connection {
if let Some(storage_options) = storage_options {
builder = builder.storage_options(storage_options);
}
if let Some(provider_obj) = storage_options_provider {
let provider = py_object_to_storage_options_provider(provider_obj)?;
builder = builder.storage_options_provider(provider);
}
if let Some(index_cache_size) = index_cache_size {
builder = builder.index_cache_size(index_cache_size);
}
if let Some(location) = location {
builder = builder.location(location);
}
future_into_py(self_.py(), async move {
let table = builder.execute().await.infer_error()?;

View File

@@ -26,6 +26,7 @@ pub mod index;
pub mod permutation;
pub mod query;
pub mod session;
pub mod storage_options;
pub mod table;
pub mod util;

View File

@@ -0,0 +1,150 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
//! PyO3 bindings for StorageOptionsProvider
//!
//! This module provides the bridge between Python StorageOptionsProvider objects
//! and Rust's StorageOptionsProvider trait, enabling automatic credential refresh.
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use lance_io::object_store::StorageOptionsProvider;
use pyo3::prelude::*;
use pyo3::types::PyDict;
/// Internal wrapper around a Python object implementing StorageOptionsProvider
pub struct PyStorageOptionsProvider {
/// The Python object implementing fetch_storage_options()
inner: PyObject,
}
impl Clone for PyStorageOptionsProvider {
fn clone(&self) -> Self {
Python::with_gil(|py| Self {
inner: self.inner.clone_ref(py),
})
}
}
impl PyStorageOptionsProvider {
pub fn new(obj: PyObject) -> PyResult<Self> {
Python::with_gil(|py| {
// Verify the object has a fetch_storage_options method
if !obj.bind(py).hasattr("fetch_storage_options")? {
return Err(pyo3::exceptions::PyTypeError::new_err(
"StorageOptionsProvider must implement fetch_storage_options() method",
));
}
Ok(Self { inner: obj })
})
}
}
/// Wrapper that implements the Rust StorageOptionsProvider trait
pub struct PyStorageOptionsProviderWrapper {
py_provider: PyStorageOptionsProvider,
}
impl PyStorageOptionsProviderWrapper {
pub fn new(py_provider: PyStorageOptionsProvider) -> Self {
Self { py_provider }
}
}
#[async_trait]
impl StorageOptionsProvider for PyStorageOptionsProviderWrapper {
async fn fetch_storage_options(&self) -> lance_core::Result<Option<HashMap<String, String>>> {
// Call Python method from async context using spawn_blocking
let py_provider = self.py_provider.clone();
tokio::task::spawn_blocking(move || {
Python::with_gil(|py| {
// Call the Python fetch_storage_options method
let result = py_provider
.inner
.bind(py)
.call_method0("fetch_storage_options")
.map_err(|e| lance_core::Error::IO {
source: Box::new(std::io::Error::other(format!(
"Failed to call fetch_storage_options: {}",
e
))),
location: snafu::location!(),
})?;
// If result is None, return None
if result.is_none() {
return Ok(None);
}
// Extract the result dict - should be a flat Map<String, String>
let result_dict = result.downcast::<PyDict>().map_err(|_| {
lance_core::Error::InvalidInput {
source: "fetch_storage_options() must return None or a dict of string key-value pairs".into(),
location: snafu::location!(),
}
})?;
// Convert all entries to HashMap<String, String>
let mut storage_options = HashMap::new();
for (key, value) in result_dict.iter() {
let key_str: String = key.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option key must be a string: {}", e).into(),
location: snafu::location!(),
}
})?;
let value_str: String = value.extract().map_err(|e| {
lance_core::Error::InvalidInput {
source: format!("Storage option value must be a string: {}", e).into(),
location: snafu::location!(),
}
})?;
storage_options.insert(key_str, value_str);
}
Ok(Some(storage_options))
})
})
.await
.map_err(|e| lance_core::Error::IO {
source: Box::new(std::io::Error::other(format!(
"Task join error: {}",
e
))),
location: snafu::location!(),
})?
}
fn provider_id(&self) -> String {
Python::with_gil(|py| {
// Call provider_id() method on the Python object
let obj = self.py_provider.inner.bind(py);
obj.call_method0("provider_id")
.and_then(|result| result.extract::<String>())
.unwrap_or_else(|e| {
// If provider_id() fails, construct a fallback ID
format!("PyStorageOptionsProvider(error: {})", e)
})
})
}
}
impl std::fmt::Debug for PyStorageOptionsProviderWrapper {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "PyStorageOptionsProviderWrapper({})", self.provider_id())
}
}
/// Convert a Python object to an Arc<dyn StorageOptionsProvider>
///
/// This is the main entry point for converting Python StorageOptionsProvider objects
/// to Rust trait objects that can be used by the Lance ecosystem.
pub fn py_object_to_storage_options_provider(
py_obj: PyObject,
) -> PyResult<Arc<dyn StorageOptionsProvider>> {
let py_provider = PyStorageOptionsProvider::new(py_obj)?;
Ok(Arc::new(PyStorageOptionsProviderWrapper::new(py_provider)))
}