mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-13 18:10:41 +00:00
feat: add manifest-enabled directory namespace mode (#3332)
Adds manifest_enabled for local/native connections so directory namespace manifests can be the source of truth, including migration from directory listing and Azure credential vending feature wiring. Also exposes the option through Rust, Python, and Node bindings with focused validation.
This commit is contained in:
@@ -73,6 +73,7 @@ def connect(
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
@@ -111,6 +112,10 @@ def connect(
|
||||
storage_options: dict, optional
|
||||
Additional options for the storage backend. See available options at
|
||||
<https://docs.lancedb.com/storage/>
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
session: Session, optional
|
||||
(For LanceDB OSS only)
|
||||
A session to use for this connection. Sessions allow you to configure
|
||||
@@ -158,11 +163,11 @@ def connect(
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if namespace_client_impl is not None or namespace_client_properties is not None:
|
||||
if namespace_client_impl is None or namespace_client_properties is None:
|
||||
if namespace_client_impl is not None:
|
||||
if namespace_client_properties is None:
|
||||
raise ValueError(
|
||||
"Both namespace_client_impl and "
|
||||
"namespace_client_properties must be provided"
|
||||
"namespace_client_properties must be provided when "
|
||||
"namespace_client_impl is set"
|
||||
)
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
@@ -175,6 +180,12 @@ def connect(
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
if namespace_client_properties is not None and not manifest_enabled:
|
||||
raise ValueError(
|
||||
"namespace_client_impl must be provided when using "
|
||||
"namespace_client_properties unless manifest_enabled=True"
|
||||
)
|
||||
|
||||
if namespace_client_pushdown_operations is not None:
|
||||
raise ValueError(
|
||||
"namespace_client_pushdown_operations is only valid when "
|
||||
@@ -212,6 +223,8 @@ def connect(
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
manifest_enabled=manifest_enabled,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
|
||||
@@ -289,6 +302,8 @@ def deserialize_conn(
|
||||
parsed["uri"],
|
||||
read_consistency_interval=rci,
|
||||
storage_options=storage_options,
|
||||
manifest_enabled=parsed.get("manifest_enabled", False),
|
||||
namespace_client_properties=parsed.get("namespace_client_properties"),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown connection_type: {connection_type}")
|
||||
@@ -304,6 +319,8 @@ async def connect_async(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> AsyncConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
@@ -343,6 +360,13 @@ async def connect_async(
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
manifest_enabled : bool, default False
|
||||
When true for local/native connections, use directory namespace
|
||||
manifests as the source of truth for table metadata. Existing
|
||||
directory-listed root tables are migrated into the manifest on access.
|
||||
namespace_client_properties : dict, optional
|
||||
Additional directory namespace client properties to use with
|
||||
``manifest_enabled=True``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -385,6 +409,8 @@ async def connect_async(
|
||||
client_config,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -242,6 +242,8 @@ async def connect(
|
||||
client_config: Optional[Union[ClientConfig, Dict[str, Any]]],
|
||||
storage_options: Optional[Dict[str, str]],
|
||||
session: Optional[Session],
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
) -> Connection: ...
|
||||
|
||||
class RecordBatchStream:
|
||||
|
||||
@@ -590,8 +590,13 @@ class LanceDBConnection(DBConnection):
|
||||
read_consistency_interval: Optional[timedelta] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
manifest_enabled: bool = False,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
_inner: Optional[LanceDbConnection] = None,
|
||||
):
|
||||
self.storage_options = storage_options
|
||||
self._manifest_enabled = manifest_enabled
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
if _inner is not None:
|
||||
self._conn = _inner
|
||||
self._cached_namespace_client = None
|
||||
@@ -633,6 +638,8 @@ class LanceDBConnection(DBConnection):
|
||||
None,
|
||||
storage_options,
|
||||
session,
|
||||
manifest_enabled,
|
||||
namespace_client_properties,
|
||||
)
|
||||
|
||||
# TODO: It would be nice if we didn't store self.storage_options but it is
|
||||
@@ -640,7 +647,6 @@ class LanceDBConnection(DBConnection):
|
||||
# work because some paths like LanceDBConnection.from_inner will lose the
|
||||
# storage_options. Also, this class really shouldn't be holding any state
|
||||
# beyond _conn.
|
||||
self.storage_options = storage_options
|
||||
self._conn = AsyncConnection(LOOP.run(do_connect()))
|
||||
self._cached_namespace_client: Optional[LanceNamespace] = None
|
||||
|
||||
@@ -677,6 +683,8 @@ class LanceDBConnection(DBConnection):
|
||||
"connection_type": "local",
|
||||
"uri": self.uri,
|
||||
"storage_options": self.storage_options,
|
||||
"manifest_enabled": self._manifest_enabled,
|
||||
"namespace_client_properties": self._namespace_client_properties,
|
||||
"read_consistency_interval_seconds": (
|
||||
rci.total_seconds() if rci else None
|
||||
),
|
||||
|
||||
@@ -525,7 +525,7 @@ impl Connection {
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None))]
|
||||
#[pyo3(signature = (uri, api_key=None, region=None, host_override=None, read_consistency_interval=None, client_config=None, storage_options=None, session=None, manifest_enabled=false, namespace_client_properties=None))]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn connect(
|
||||
py: Python<'_>,
|
||||
@@ -537,6 +537,8 @@ pub fn connect(
|
||||
client_config: Option<PyClientConfig>,
|
||||
storage_options: Option<HashMap<String, String>>,
|
||||
session: Option<crate::session::Session>,
|
||||
manifest_enabled: bool,
|
||||
namespace_client_properties: Option<HashMap<String, String>>,
|
||||
) -> PyResult<Bound<'_, PyAny>> {
|
||||
future_into_py(py, async move {
|
||||
let mut builder = lancedb::connect(&uri);
|
||||
@@ -556,6 +558,12 @@ pub fn connect(
|
||||
if let Some(storage_options) = storage_options {
|
||||
builder = builder.storage_options(storage_options);
|
||||
}
|
||||
if manifest_enabled {
|
||||
builder = builder.manifest_enabled(true);
|
||||
}
|
||||
if let Some(namespace_client_properties) = namespace_client_properties {
|
||||
builder = builder.namespace_client_properties(namespace_client_properties);
|
||||
}
|
||||
#[cfg(feature = "remote")]
|
||||
if let Some(client_config) = client_config {
|
||||
builder = builder.client_config(client_config.into());
|
||||
|
||||
Reference in New Issue
Block a user