mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-14 10:30:40 +00:00
refactor!: consolidate namespace related naming and enterprise integration (#3205)
1. Refactored every client (Rust core, Python, Node/TypeScript) so “namespace” usage is explicit: code now keeps namespace paths (namespace_path) separate from namespace clients (namespace_client). Connections propagate the client, table creation routes through it, and managed versioning defaults are resolved from namespace metadata. Python gained LanceNamespaceDBConnection/async counterparts, and the namespace-focused tests were rewritten to match the clarified API surface. 2. Synchronized the workspace with Lance 5.0.0-beta.3 (see https://github.com/lance-format/lance/pull/6186 for the upstream namespace refactor), updating Cargo/uv lockfiles and ensuring all bindings align with the new namespace semantics. 3. Added a namespace-backed code path to lancedb.connect() via new keyword arguments (namespace_client_impl, namespace_client_properties, plus the existing pushdown-ops flag). When those kwargs are supplied, connect() delegates to connect_namespace, so users can opt into namespace clients without changing APIs. (The async helper will gain parity in a later change)
This commit is contained in:
@@ -6,7 +6,7 @@ import importlib.metadata
|
||||
import os
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from datetime import timedelta
|
||||
from typing import Dict, Optional, Union, Any
|
||||
from typing import Dict, Optional, Union, Any, List
|
||||
import warnings
|
||||
|
||||
__version__ = importlib.metadata.version("lancedb")
|
||||
@@ -15,7 +15,6 @@ from ._lancedb import connect as lancedb_connect
|
||||
from .common import URI, sanitize_uri
|
||||
from urllib.parse import urlparse
|
||||
from .db import AsyncConnection, DBConnection, LanceDBConnection
|
||||
from .io import StorageOptionsProvider
|
||||
from .remote import ClientConfig
|
||||
from .remote.db import RemoteDBConnection
|
||||
from .expr import Expr, col, lit, func
|
||||
@@ -64,7 +63,7 @@ def _check_s3_bucket_with_dots(
|
||||
|
||||
|
||||
def connect(
|
||||
uri: URI,
|
||||
uri: Optional[URI] = None,
|
||||
*,
|
||||
api_key: Optional[str] = None,
|
||||
region: str = "us-east-1",
|
||||
@@ -74,14 +73,18 @@ def connect(
|
||||
client_config: Union[ClientConfig, Dict[str, Any], None] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
**kwargs: Any,
|
||||
) -> DBConnection:
|
||||
"""Connect to a LanceDB database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri: str or Path
|
||||
The uri of the database.
|
||||
uri: str or Path, optional
|
||||
The uri of the database. When ``namespace_client_impl`` is provided you may
|
||||
omit ``uri`` and connect through a namespace client instead.
|
||||
api_key: str, optional
|
||||
If presented, connect to LanceDB cloud.
|
||||
Otherwise, connect to a database on file system or cloud storage.
|
||||
@@ -114,6 +117,18 @@ def connect(
|
||||
cache sizes for index and metadata caches, which can significantly
|
||||
impact memory use and performance. They can also be re-used across
|
||||
multiple connections to share the same cache state.
|
||||
namespace_client_impl : str, optional
|
||||
When provided along with ``namespace_client_properties``, ``connect``
|
||||
returns a namespace-backed connection by delegating to
|
||||
:func:`connect_namespace`. The value identifies which namespace
|
||||
implementation to load (e.g., ``"dir"`` or ``"rest"``).
|
||||
namespace_client_properties : dict, optional
|
||||
Configuration to pass to the namespace client implementation. Required
|
||||
when ``namespace_client_impl`` is set.
|
||||
namespace_client_pushdown_operations : list[str], optional
|
||||
Only used when ``namespace_client_properties`` is provided. Forwards to
|
||||
:func:`connect_namespace` to control which operations are executed on the
|
||||
namespace service (e.g., ``["QueryTable", "CreateTable"]``).
|
||||
|
||||
Examples
|
||||
--------
|
||||
@@ -133,11 +148,42 @@ def connect(
|
||||
>>> db = lancedb.connect("db://my_database", api_key="ldb_...",
|
||||
... client_config={"retry_config": {"retries": 5}})
|
||||
|
||||
Connect to a namespace-backed database:
|
||||
|
||||
>>> db = lancedb.connect(namespace_client_impl="dir",
|
||||
... namespace_client_properties={"root": "/tmp/ns"})
|
||||
|
||||
Returns
|
||||
-------
|
||||
conn : DBConnection
|
||||
A connection to a LanceDB database.
|
||||
"""
|
||||
if namespace_client_impl is not None or namespace_client_properties is not None:
|
||||
if namespace_client_impl is None or namespace_client_properties is None:
|
||||
raise ValueError(
|
||||
"Both namespace_client_impl and "
|
||||
"namespace_client_properties must be provided"
|
||||
)
|
||||
if kwargs:
|
||||
raise ValueError(f"Unknown keyword arguments: {kwargs}")
|
||||
return connect_namespace(
|
||||
namespace_client_impl,
|
||||
namespace_client_properties,
|
||||
read_consistency_interval=read_consistency_interval,
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
)
|
||||
|
||||
if namespace_client_pushdown_operations is not None:
|
||||
raise ValueError(
|
||||
"namespace_client_pushdown_operations is only valid when "
|
||||
"connecting through a namespace"
|
||||
)
|
||||
if uri is None:
|
||||
raise ValueError(
|
||||
"uri is required when not connecting through a namespace client"
|
||||
)
|
||||
if isinstance(uri, str) and uri.startswith("db://"):
|
||||
if api_key is None:
|
||||
api_key = os.environ.get("LANCEDB_API_KEY")
|
||||
@@ -284,7 +330,6 @@ __all__ = [
|
||||
"LanceNamespaceDBConnection",
|
||||
"RemoteDBConnection",
|
||||
"Session",
|
||||
"StorageOptionsProvider",
|
||||
"Table",
|
||||
"__version__",
|
||||
]
|
||||
|
||||
@@ -14,7 +14,6 @@ from .index import (
|
||||
HnswSq,
|
||||
FTS,
|
||||
)
|
||||
from .io import StorageOptionsProvider
|
||||
from lance_namespace import (
|
||||
ListNamespacesResponse,
|
||||
CreateNamespaceResponse,
|
||||
@@ -72,35 +71,35 @@ class Connection(object):
|
||||
async def close(self): ...
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse: ...
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse: ...
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse: ...
|
||||
async def describe_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
) -> DescribeNamespaceResponse: ...
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse: ...
|
||||
async def table_names(
|
||||
self,
|
||||
namespace: Optional[List[str]],
|
||||
namespace_path: Optional[List[str]],
|
||||
start_after: Optional[str],
|
||||
limit: Optional[int],
|
||||
) -> list[str]: ... # Deprecated: Use list_tables instead
|
||||
@@ -109,9 +108,8 @@ class Connection(object):
|
||||
name: str,
|
||||
mode: str,
|
||||
data: pa.RecordBatchReader,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
async def create_empty_table(
|
||||
@@ -119,17 +117,15 @@ class Connection(object):
|
||||
name: str,
|
||||
mode: str,
|
||||
schema: pa.Schema,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
async def open_table(
|
||||
self,
|
||||
name: str,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional[StorageOptionsProvider] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> Table: ...
|
||||
@@ -137,7 +133,7 @@ class Connection(object):
|
||||
self,
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -146,13 +142,15 @@ class Connection(object):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
) -> None: ...
|
||||
async def drop_table(
|
||||
self, name: str, namespace: Optional[List[str]] = None
|
||||
self, name: str, namespace_path: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def drop_all_tables(
|
||||
self, namespace_path: Optional[List[str]] = None
|
||||
) -> None: ...
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None) -> None: ...
|
||||
|
||||
class Table:
|
||||
def name(self) -> str: ...
|
||||
|
||||
@@ -52,7 +52,6 @@ if TYPE_CHECKING:
|
||||
from ._lancedb import Connection as LanceDbConnection
|
||||
from .common import DATA, URI
|
||||
from .embeddings import EmbeddingFunctionConfig
|
||||
from .io import StorageOptionsProvider
|
||||
from ._lancedb import Session
|
||||
|
||||
from .namespace_utils import (
|
||||
@@ -67,7 +66,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
@@ -75,7 +74,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The parent namespace to list namespaces in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -89,13 +88,13 @@ class DBConnection(EnforceOverrides):
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return ListNamespacesResponse(namespaces=[], page_token=None)
|
||||
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
@@ -103,7 +102,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
@@ -122,7 +121,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
@@ -130,7 +129,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
@@ -147,12 +146,14 @@ class DBConnection(EnforceOverrides):
|
||||
"Namespace operations are not supported for this connection type"
|
||||
)
|
||||
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
@@ -166,7 +167,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
@@ -174,7 +175,7 @@ class DBConnection(EnforceOverrides):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -198,13 +199,13 @@ class DBConnection(EnforceOverrides):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List all tables in this database, in sorted order
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to list tables in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -231,9 +232,8 @@ class DBConnection(EnforceOverrides):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
) -> Table:
|
||||
@@ -243,7 +243,7 @@ class DBConnection(EnforceOverrides):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to create the table in.
|
||||
Empty list represents root namespace.
|
||||
data: The data to initialize the table, *optional*
|
||||
@@ -401,9 +401,8 @@ class DBConnection(EnforceOverrides):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> Table:
|
||||
"""Open a Lance Table in the database.
|
||||
@@ -412,7 +411,7 @@ class DBConnection(EnforceOverrides):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
None or empty list represents root namespace.
|
||||
index_cache_size: int, default 256
|
||||
@@ -440,27 +439,27 @@ class DBConnection(EnforceOverrides):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to drop the table from.
|
||||
Empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
raise NotImplementedError
|
||||
|
||||
def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -470,17 +469,17 @@ class DBConnection(EnforceOverrides):
|
||||
The current name of the table.
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
cur_namespace: List[str], optional
|
||||
cur_namespace_path: List[str], optional
|
||||
The namespace of the current table.
|
||||
None or empty list represents root namespace.
|
||||
new_namespace: List[str], optional
|
||||
new_namespace_path: List[str], optional
|
||||
The namespace to move the table to.
|
||||
If not specified, defaults to the same as cur_namespace.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
if cur_namespace_path is None:
|
||||
cur_namespace_path = []
|
||||
if new_namespace_path is None:
|
||||
new_namespace_path = []
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_database(self):
|
||||
@@ -490,18 +489,18 @@ class DBConnection(EnforceOverrides):
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||
"""
|
||||
Drop all tables from the database
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to drop all tables from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
@@ -642,7 +641,7 @@ class LanceDBConnection(DBConnection):
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
@@ -650,7 +649,7 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -664,18 +663,18 @@ class LanceDBConnection(DBConnection):
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
@@ -683,7 +682,7 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
@@ -698,14 +697,14 @@ class LanceDBConnection(DBConnection):
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
@@ -713,7 +712,7 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
@@ -727,16 +726,20 @@ class LanceDBConnection(DBConnection):
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
@@ -744,12 +747,12 @@ class LanceDBConnection(DBConnection):
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
@@ -757,7 +760,7 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -771,11 +774,11 @@ class LanceDBConnection(DBConnection):
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -785,7 +788,7 @@ class LanceDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""Get the names of all tables in the database. The names are sorted.
|
||||
|
||||
@@ -794,7 +797,7 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
page_token: str, optional
|
||||
The token to use for pagination.
|
||||
@@ -813,11 +816,11 @@ class LanceDBConnection(DBConnection):
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.table_names(
|
||||
namespace=namespace, start_after=page_token, limit=limit
|
||||
namespace_path=namespace_path, start_after=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -839,9 +842,8 @@ class LanceDBConnection(DBConnection):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
) -> LanceTable:
|
||||
@@ -849,15 +851,15 @@ class LanceDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to create the table in.
|
||||
|
||||
See
|
||||
---
|
||||
DBConnection.create_table
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if mode.lower() not in ["create", "overwrite"]:
|
||||
raise ValueError("mode must be either 'create' or 'overwrite'")
|
||||
validate_table_name(name)
|
||||
@@ -872,9 +874,8 @@ class LanceDBConnection(DBConnection):
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
)
|
||||
return tbl
|
||||
|
||||
@@ -883,9 +884,8 @@ class LanceDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> LanceTable:
|
||||
"""Open a table in the database.
|
||||
@@ -894,15 +894,15 @@ class LanceDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if index_cache_size is not None:
|
||||
import warnings
|
||||
|
||||
@@ -917,9 +917,8 @@ class LanceDBConnection(DBConnection):
|
||||
return LanceTable.open(
|
||||
self,
|
||||
name,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
index_cache_size=index_cache_size,
|
||||
)
|
||||
|
||||
@@ -928,7 +927,7 @@ class LanceDBConnection(DBConnection):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -946,7 +945,7 @@ class LanceDBConnection(DBConnection):
|
||||
The name of the target table to create.
|
||||
source_uri: str
|
||||
The URI of the source table to clone from.
|
||||
target_namespace: List[str], optional
|
||||
target_namespace_path: List[str], optional
|
||||
The namespace for the target table.
|
||||
None or empty list represents root namespace.
|
||||
source_version: int, optional
|
||||
@@ -961,13 +960,13 @@ class LanceDBConnection(DBConnection):
|
||||
-------
|
||||
A LanceTable object representing the cloned table.
|
||||
"""
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
if target_namespace_path is None:
|
||||
target_namespace_path = []
|
||||
LOOP.run(
|
||||
self._conn.clone_table(
|
||||
target_table_name,
|
||||
source_uri,
|
||||
target_namespace=target_namespace,
|
||||
target_namespace_path=target_namespace_path,
|
||||
source_version=source_version,
|
||||
source_tag=source_tag,
|
||||
is_shallow=is_shallow,
|
||||
@@ -976,14 +975,14 @@ class LanceDBConnection(DBConnection):
|
||||
return LanceTable.open(
|
||||
self,
|
||||
target_table_name,
|
||||
namespace=target_namespace,
|
||||
namespace_path=target_namespace_path,
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_table(
|
||||
self,
|
||||
name: str,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
ignore_missing: bool = False,
|
||||
):
|
||||
"""Drop a table from the database.
|
||||
@@ -992,32 +991,32 @@ class LanceDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to drop the table from.
|
||||
ignore_missing: bool, default False
|
||||
If True, ignore if the table does not exist.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
LOOP.run(
|
||||
self._conn.drop_table(
|
||||
name, namespace=namespace, ignore_missing=ignore_missing
|
||||
name, namespace_path=namespace_path, ignore_missing=ignore_missing
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(self._conn.drop_all_tables(namespace=namespace))
|
||||
def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
LOOP.run(self._conn.drop_all_tables(namespace_path=namespace_path))
|
||||
|
||||
@override
|
||||
def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -1027,21 +1026,21 @@ class LanceDBConnection(DBConnection):
|
||||
The current name of the table.
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
cur_namespace: List[str], optional
|
||||
cur_namespace_path: List[str], optional
|
||||
The namespace of the current table.
|
||||
new_namespace: List[str], optional
|
||||
new_namespace_path: List[str], optional
|
||||
The namespace to move the table to.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
if cur_namespace_path is None:
|
||||
cur_namespace_path = []
|
||||
if new_namespace_path is None:
|
||||
new_namespace_path = []
|
||||
LOOP.run(
|
||||
self._conn.rename_table(
|
||||
cur_name,
|
||||
new_name,
|
||||
cur_namespace=cur_namespace,
|
||||
new_namespace=new_namespace,
|
||||
cur_namespace_path=cur_namespace_path,
|
||||
new_namespace_path=new_namespace_path,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1125,7 +1124,7 @@ class AsyncConnection(object):
|
||||
|
||||
async def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
@@ -1133,7 +1132,7 @@ class AsyncConnection(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -1146,16 +1145,16 @@ class AsyncConnection(object):
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional pagination token
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
result = await self._inner.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListNamespacesResponse(**result)
|
||||
|
||||
async def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
@@ -1163,7 +1162,7 @@ class AsyncConnection(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create", "exist_ok", or "overwrite". Case insensitive.
|
||||
@@ -1176,7 +1175,7 @@ class AsyncConnection(object):
|
||||
Response containing namespace properties
|
||||
"""
|
||||
result = await self._inner.create_namespace(
|
||||
namespace,
|
||||
namespace_path,
|
||||
mode=_normalize_create_namespace_mode(mode),
|
||||
properties=properties,
|
||||
)
|
||||
@@ -1184,7 +1183,7 @@ class AsyncConnection(object):
|
||||
|
||||
async def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
@@ -1192,7 +1191,7 @@ class AsyncConnection(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
@@ -1206,20 +1205,20 @@ class AsyncConnection(object):
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
result = await self._inner.drop_namespace(
|
||||
namespace,
|
||||
namespace_path,
|
||||
mode=_normalize_drop_namespace_mode(mode),
|
||||
behavior=_normalize_drop_namespace_behavior(behavior),
|
||||
)
|
||||
return DropNamespaceResponse(**result)
|
||||
|
||||
async def describe_namespace(
|
||||
self, namespace: List[str]
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
@@ -1227,12 +1226,12 @@ class AsyncConnection(object):
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
result = await self._inner.describe_namespace(namespace)
|
||||
result = await self._inner.describe_namespace(namespace_path)
|
||||
return DescribeNamespaceResponse(**result)
|
||||
|
||||
async def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
@@ -1240,7 +1239,7 @@ class AsyncConnection(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -1254,17 +1253,17 @@ class AsyncConnection(object):
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
result = await self._inner.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
return ListTablesResponse(**result)
|
||||
|
||||
async def table_names(
|
||||
self,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
start_after: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> Iterable[str]:
|
||||
@@ -1275,7 +1274,7 @@ class AsyncConnection(object):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
start_after: str, optional
|
||||
@@ -1298,10 +1297,10 @@ class AsyncConnection(object):
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return await self._inner.table_names(
|
||||
namespace=namespace, start_after=start_after, limit=limit
|
||||
namespace_path=namespace_path, start_after=start_after, limit=limit
|
||||
)
|
||||
|
||||
async def create_table(
|
||||
@@ -1314,9 +1313,8 @@ class AsyncConnection(object):
|
||||
on_bad_vectors: Optional[str] = None,
|
||||
fill_value: Optional[float] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> AsyncTable:
|
||||
@@ -1326,7 +1324,7 @@ class AsyncConnection(object):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to create the table in.
|
||||
Empty list represents root namespace.
|
||||
data: The data to initialize the table, *optional*
|
||||
@@ -1477,8 +1475,8 @@ class AsyncConnection(object):
|
||||
... await db.create_table("table4", make_batches(), schema=schema)
|
||||
>>> asyncio.run(iterable_example())
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
metadata = None
|
||||
|
||||
if embedding_functions is not None:
|
||||
@@ -1513,9 +1511,8 @@ class AsyncConnection(object):
|
||||
name,
|
||||
mode,
|
||||
schema,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
location=location,
|
||||
)
|
||||
else:
|
||||
@@ -1524,9 +1521,8 @@ class AsyncConnection(object):
|
||||
name,
|
||||
mode,
|
||||
data,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
location=location,
|
||||
)
|
||||
|
||||
@@ -1536,9 +1532,8 @@ class AsyncConnection(object):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
@@ -1550,7 +1545,7 @@ class AsyncConnection(object):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
None or empty list represents root namespace.
|
||||
storage_options: dict, optional
|
||||
@@ -1583,13 +1578,12 @@ class AsyncConnection(object):
|
||||
-------
|
||||
A LanceTable object representing the table.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
table = await self._inner.open_table(
|
||||
name,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
index_cache_size=index_cache_size,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
@@ -1602,7 +1596,7 @@ class AsyncConnection(object):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -1620,7 +1614,7 @@ class AsyncConnection(object):
|
||||
The name of the target table to create.
|
||||
source_uri: str
|
||||
The URI of the source table to clone from.
|
||||
target_namespace: List[str], optional
|
||||
target_namespace_path: List[str], optional
|
||||
The namespace for the target table.
|
||||
None or empty list represents root namespace.
|
||||
source_version: int, optional
|
||||
@@ -1635,12 +1629,12 @@ class AsyncConnection(object):
|
||||
-------
|
||||
An AsyncTable object representing the cloned table.
|
||||
"""
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
if target_namespace_path is None:
|
||||
target_namespace_path = []
|
||||
table = await self._inner.clone_table(
|
||||
target_table_name,
|
||||
source_uri,
|
||||
target_namespace=target_namespace,
|
||||
target_namespace_path=target_namespace_path,
|
||||
source_version=source_version,
|
||||
source_tag=source_tag,
|
||||
is_shallow=is_shallow,
|
||||
@@ -1651,8 +1645,8 @@ class AsyncConnection(object):
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -1662,26 +1656,29 @@ class AsyncConnection(object):
|
||||
The current name of the table.
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
cur_namespace: List[str], optional
|
||||
cur_namespace_path: List[str], optional
|
||||
The namespace of the current table.
|
||||
None or empty list represents root namespace.
|
||||
new_namespace: List[str], optional
|
||||
new_namespace_path: List[str], optional
|
||||
The namespace to move the table to.
|
||||
If not specified, defaults to the same as cur_namespace.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
if cur_namespace_path is None:
|
||||
cur_namespace_path = []
|
||||
if new_namespace_path is None:
|
||||
new_namespace_path = []
|
||||
await self._inner.rename_table(
|
||||
cur_name, new_name, cur_namespace=cur_namespace, new_namespace=new_namespace
|
||||
cur_name,
|
||||
new_name,
|
||||
cur_namespace_path=cur_namespace_path,
|
||||
new_namespace_path=new_namespace_path,
|
||||
)
|
||||
|
||||
async def drop_table(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
ignore_missing: bool = False,
|
||||
):
|
||||
"""Drop a table from the database.
|
||||
@@ -1690,34 +1687,34 @@ class AsyncConnection(object):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to drop the table from.
|
||||
Empty list represents root namespace.
|
||||
ignore_missing: bool, default False
|
||||
If True, ignore if the table does not exist.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
try:
|
||||
await self._inner.drop_table(name, namespace=namespace)
|
||||
await self._inner.drop_table(name, namespace_path=namespace_path)
|
||||
except ValueError as e:
|
||||
if not ignore_missing:
|
||||
raise e
|
||||
if f"Table '{name}' was not found" not in str(e):
|
||||
raise e
|
||||
|
||||
async def drop_all_tables(self, namespace: Optional[List[str]] = None):
|
||||
async def drop_all_tables(self, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop all tables from the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to drop all tables from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
await self._inner.drop_all_tables(namespace=namespace)
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
await self._inner.drop_all_tables(namespace_path=namespace_path)
|
||||
|
||||
@deprecation.deprecated(
|
||||
deprecated_in="0.15.1",
|
||||
|
||||
@@ -2,70 +2,3 @@
|
||||
# SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
"""I/O utilities and interfaces for LanceDB."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict
|
||||
|
||||
|
||||
class StorageOptionsProvider(ABC):
|
||||
"""Abstract base class for providing storage options to LanceDB tables.
|
||||
|
||||
Storage options providers enable automatic credential refresh for cloud
|
||||
storage backends (e.g., AWS S3, Azure Blob Storage, GCS). When credentials
|
||||
have an expiration time, the provider's fetch_storage_options() method will
|
||||
be called periodically to get fresh credentials before they expire.
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> class MyProvider(StorageOptionsProvider):
|
||||
... def fetch_storage_options(self) -> Dict[str, str]:
|
||||
... # Fetch fresh credentials from your credential manager
|
||||
... return {
|
||||
... "aws_access_key_id": "...",
|
||||
... "aws_secret_access_key": "...",
|
||||
... "expires_at_millis": "1234567890000" # Optional
|
||||
... }
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def fetch_storage_options(self) -> Dict[str, str]:
|
||||
"""Fetch fresh storage credentials.
|
||||
|
||||
This method is called by LanceDB when credentials need to be refreshed.
|
||||
If the returned dictionary contains an "expires_at_millis" key with a
|
||||
Unix timestamp in milliseconds, LanceDB will automatically refresh the
|
||||
credentials before that time. If the key is not present, credentials
|
||||
are assumed to not expire.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dict[str, str]
|
||||
Dictionary containing cloud storage credentials and optionally an
|
||||
expiration time:
|
||||
- "expires_at_millis" (optional): Unix timestamp in milliseconds when
|
||||
credentials expire
|
||||
- Provider-specific credential keys (e.g., aws_access_key_id,
|
||||
aws_secret_access_key, etc.)
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
If credentials cannot be fetched or are invalid
|
||||
"""
|
||||
pass
|
||||
|
||||
def provider_id(self) -> str:
|
||||
"""Return a human-readable unique identifier for this provider instance.
|
||||
|
||||
This identifier is used for caching and equality comparison. Two providers
|
||||
with the same ID will share the same cached object store connection.
|
||||
|
||||
The default implementation uses the class name and string representation.
|
||||
Override this method if you need custom identification logic.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
A unique identifier for this provider instance
|
||||
"""
|
||||
return f"{self.__class__.__name__} {{ repr: {str(self)!r} }}"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -111,7 +111,7 @@ class RemoteDBConnection(DBConnection):
|
||||
@override
|
||||
def list_namespaces(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListNamespacesResponse:
|
||||
@@ -119,7 +119,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The parent namespace to list namespaces in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -133,18 +133,18 @@ class RemoteDBConnection(DBConnection):
|
||||
ListNamespacesResponse
|
||||
Response containing namespace names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_namespaces(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def create_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
properties: Optional[Dict[str, str]] = None,
|
||||
) -> CreateNamespaceResponse:
|
||||
@@ -152,7 +152,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to create.
|
||||
mode: str, optional
|
||||
Creation mode - "create" (fail if exists), "exist_ok" (skip if exists),
|
||||
@@ -167,14 +167,14 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.create_namespace(
|
||||
namespace=namespace, mode=mode, properties=properties
|
||||
namespace_path=namespace_path, mode=mode, properties=properties
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def drop_namespace(
|
||||
self,
|
||||
namespace: List[str],
|
||||
namespace_path: List[str],
|
||||
mode: Optional[str] = None,
|
||||
behavior: Optional[str] = None,
|
||||
) -> DropNamespaceResponse:
|
||||
@@ -182,7 +182,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to drop.
|
||||
mode: str, optional
|
||||
Whether to skip if not exists ("SKIP") or fail ("FAIL"). Case insensitive.
|
||||
@@ -196,16 +196,20 @@ class RemoteDBConnection(DBConnection):
|
||||
Response containing properties and transaction_id if applicable.
|
||||
"""
|
||||
return LOOP.run(
|
||||
self._conn.drop_namespace(namespace=namespace, mode=mode, behavior=behavior)
|
||||
self._conn.drop_namespace(
|
||||
namespace_path=namespace_path, mode=mode, behavior=behavior
|
||||
)
|
||||
)
|
||||
|
||||
@override
|
||||
def describe_namespace(self, namespace: List[str]) -> DescribeNamespaceResponse:
|
||||
def describe_namespace(
|
||||
self, namespace_path: List[str]
|
||||
) -> DescribeNamespaceResponse:
|
||||
"""Describe a namespace.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str]
|
||||
namespace_path: List[str]
|
||||
The namespace identifier to describe.
|
||||
|
||||
Returns
|
||||
@@ -213,12 +217,12 @@ class RemoteDBConnection(DBConnection):
|
||||
DescribeNamespaceResponse
|
||||
Response containing the namespace properties.
|
||||
"""
|
||||
return LOOP.run(self._conn.describe_namespace(namespace=namespace))
|
||||
return LOOP.run(self._conn.describe_namespace(namespace_path=namespace_path))
|
||||
|
||||
@override
|
||||
def list_tables(
|
||||
self,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
page_token: Optional[str] = None,
|
||||
limit: Optional[int] = None,
|
||||
) -> ListTablesResponse:
|
||||
@@ -226,7 +230,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to list tables in.
|
||||
None or empty list represents root namespace.
|
||||
page_token: str, optional
|
||||
@@ -240,11 +244,11 @@ class RemoteDBConnection(DBConnection):
|
||||
ListTablesResponse
|
||||
Response containing table names and optional page_token for pagination.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.list_tables(
|
||||
namespace=namespace, page_token=page_token, limit=limit
|
||||
namespace_path=namespace_path, page_token=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -254,7 +258,7 @@ class RemoteDBConnection(DBConnection):
|
||||
page_token: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
) -> Iterable[str]:
|
||||
"""List the names of all tables in the database.
|
||||
|
||||
@@ -263,7 +267,7 @@ class RemoteDBConnection(DBConnection):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace: List[str], default []
|
||||
namespace_path: List[str], default []
|
||||
The namespace to list tables in.
|
||||
Empty list represents root namespace.
|
||||
page_token: str
|
||||
@@ -282,11 +286,11 @@ class RemoteDBConnection(DBConnection):
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
return LOOP.run(
|
||||
self._conn.table_names(
|
||||
namespace=namespace, start_after=page_token, limit=limit
|
||||
namespace_path=namespace_path, start_after=page_token, limit=limit
|
||||
)
|
||||
)
|
||||
|
||||
@@ -295,7 +299,7 @@ class RemoteDBConnection(DBConnection):
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
) -> Table:
|
||||
@@ -305,7 +309,7 @@ class RemoteDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to open the table from.
|
||||
None or empty list represents root namespace.
|
||||
|
||||
@@ -315,15 +319,15 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
if index_cache_size is not None:
|
||||
logging.info(
|
||||
"index_cache_size is ignored in LanceDb Cloud"
|
||||
" (there is no local cache to configure)"
|
||||
)
|
||||
|
||||
table = LOOP.run(self._conn.open_table(name, namespace=namespace))
|
||||
table = LOOP.run(self._conn.open_table(name, namespace_path=namespace_path))
|
||||
return RemoteTable(table, self.db_name)
|
||||
|
||||
def clone_table(
|
||||
@@ -331,7 +335,7 @@ class RemoteDBConnection(DBConnection):
|
||||
target_table_name: str,
|
||||
source_uri: str,
|
||||
*,
|
||||
target_namespace: Optional[List[str]] = None,
|
||||
target_namespace_path: Optional[List[str]] = None,
|
||||
source_version: Optional[int] = None,
|
||||
source_tag: Optional[str] = None,
|
||||
is_shallow: bool = True,
|
||||
@@ -344,7 +348,7 @@ class RemoteDBConnection(DBConnection):
|
||||
The name of the target table to create.
|
||||
source_uri: str
|
||||
The URI of the source table to clone from.
|
||||
target_namespace: List[str], optional
|
||||
target_namespace_path: List[str], optional
|
||||
The namespace for the target table.
|
||||
None or empty list represents root namespace.
|
||||
source_version: int, optional
|
||||
@@ -361,13 +365,13 @@ class RemoteDBConnection(DBConnection):
|
||||
"""
|
||||
from .table import RemoteTable
|
||||
|
||||
if target_namespace is None:
|
||||
target_namespace = []
|
||||
if target_namespace_path is None:
|
||||
target_namespace_path = []
|
||||
table = LOOP.run(
|
||||
self._conn.clone_table(
|
||||
target_table_name,
|
||||
source_uri,
|
||||
target_namespace=target_namespace,
|
||||
target_namespace_path=target_namespace_path,
|
||||
source_version=source_version,
|
||||
source_tag=source_tag,
|
||||
is_shallow=is_shallow,
|
||||
@@ -387,7 +391,7 @@ class RemoteDBConnection(DBConnection):
|
||||
exist_ok: bool = False,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
) -> Table:
|
||||
"""Create a [Table][lancedb.table.Table] in the database.
|
||||
|
||||
@@ -395,7 +399,7 @@ class RemoteDBConnection(DBConnection):
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to create the table in.
|
||||
None or empty list represents root namespace.
|
||||
data: The data to initialize the table, *optional*
|
||||
@@ -495,8 +499,8 @@ class RemoteDBConnection(DBConnection):
|
||||
mode = "exist_ok"
|
||||
elif not mode:
|
||||
mode = "exist_ok"
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
validate_table_name(name)
|
||||
if embedding_functions is not None:
|
||||
logging.warning(
|
||||
@@ -511,7 +515,7 @@ class RemoteDBConnection(DBConnection):
|
||||
self._conn.create_table(
|
||||
name,
|
||||
data,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
mode=mode,
|
||||
schema=schema,
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
@@ -521,28 +525,28 @@ class RemoteDBConnection(DBConnection):
|
||||
return RemoteTable(table, self.db_name)
|
||||
|
||||
@override
|
||||
def drop_table(self, name: str, namespace: Optional[List[str]] = None):
|
||||
def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
"""Drop a table from the database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name: str
|
||||
The name of the table.
|
||||
namespace: List[str], optional
|
||||
namespace_path: List[str], optional
|
||||
The namespace to drop the table from.
|
||||
None or empty list represents root namespace.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
LOOP.run(self._conn.drop_table(name, namespace=namespace))
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
LOOP.run(self._conn.drop_table(name, namespace_path=namespace_path))
|
||||
|
||||
@override
|
||||
def rename_table(
|
||||
self,
|
||||
cur_name: str,
|
||||
new_name: str,
|
||||
cur_namespace: Optional[List[str]] = None,
|
||||
new_namespace: Optional[List[str]] = None,
|
||||
cur_namespace_path: Optional[List[str]] = None,
|
||||
new_namespace_path: Optional[List[str]] = None,
|
||||
):
|
||||
"""Rename a table in the database.
|
||||
|
||||
@@ -553,16 +557,16 @@ class RemoteDBConnection(DBConnection):
|
||||
new_name: str
|
||||
The new name of the table.
|
||||
"""
|
||||
if cur_namespace is None:
|
||||
cur_namespace = []
|
||||
if new_namespace is None:
|
||||
new_namespace = []
|
||||
if cur_namespace_path is None:
|
||||
cur_namespace_path = []
|
||||
if new_namespace_path is None:
|
||||
new_namespace_path = []
|
||||
LOOP.run(
|
||||
self._conn.rename_table(
|
||||
cur_name,
|
||||
new_name,
|
||||
cur_namespace=cur_namespace,
|
||||
new_namespace=new_namespace,
|
||||
cur_namespace_path=cur_namespace_path,
|
||||
new_namespace_path=new_namespace_path,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -89,7 +89,6 @@ from .index import lang_mapping
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .db import LanceDBConnection
|
||||
from .io import StorageOptionsProvider
|
||||
from ._lancedb import (
|
||||
Table as LanceDBTable,
|
||||
OptimizeStats,
|
||||
@@ -1776,30 +1775,30 @@ class LanceTable(Table):
|
||||
connection: "LanceDBConnection",
|
||||
name: str,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
_async: AsyncTable = None,
|
||||
):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
self._conn = connection
|
||||
self._namespace = namespace
|
||||
self._namespace_path = namespace_path
|
||||
self._location = location # Store location for use in _dataset_path
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
if _async is not None:
|
||||
self._table = _async
|
||||
else:
|
||||
self._table = LOOP.run(
|
||||
connection._conn.open_table(
|
||||
name,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
index_cache_size=index_cache_size,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
@@ -1814,13 +1813,13 @@ class LanceTable(Table):
|
||||
@property
|
||||
def namespace(self) -> List[str]:
|
||||
"""Return the namespace path of the table."""
|
||||
return self._namespace
|
||||
return self._namespace_path
|
||||
|
||||
@property
|
||||
def id(self) -> str:
|
||||
"""Return the full identifier of the table (namespace$name)."""
|
||||
if self._namespace:
|
||||
return "$".join(self._namespace + [self.name])
|
||||
if self._namespace_path:
|
||||
return "$".join(self._namespace_path + [self.name])
|
||||
return self.name
|
||||
|
||||
@classmethod
|
||||
@@ -1841,26 +1840,26 @@ class LanceTable(Table):
|
||||
db,
|
||||
name,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
index_cache_size: Optional[int] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
):
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
tbl = cls(
|
||||
db,
|
||||
name,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
index_cache_size=index_cache_size,
|
||||
location=location,
|
||||
namespace_client=namespace_client,
|
||||
managed_versioning=managed_versioning,
|
||||
pushdown_operations=pushdown_operations,
|
||||
)
|
||||
|
||||
# check the dataset exists
|
||||
@@ -1893,11 +1892,11 @@ class LanceTable(Table):
|
||||
)
|
||||
|
||||
if self._namespace_client is not None:
|
||||
table_id = self._namespace + [self.name]
|
||||
table_id = self._namespace_path + [self.name]
|
||||
return lance.dataset(
|
||||
version=self.version,
|
||||
storage_options=self._conn.storage_options,
|
||||
namespace=self._namespace_client,
|
||||
namespace_client=self._namespace_client,
|
||||
table_id=table_id,
|
||||
**kwargs,
|
||||
)
|
||||
@@ -2803,13 +2802,13 @@ class LanceTable(Table):
|
||||
fill_value: float = 0.0,
|
||||
embedding_functions: Optional[List[EmbeddingFunctionConfig]] = None,
|
||||
*,
|
||||
namespace: Optional[List[str]] = None,
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
storage_options: Optional[Dict[str, str | bool]] = None,
|
||||
storage_options_provider: Optional["StorageOptionsProvider"] = None,
|
||||
data_storage_version: Optional[str] = None,
|
||||
enable_v2_manifest_paths: Optional[bool] = None,
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
):
|
||||
"""
|
||||
Create a new table.
|
||||
@@ -2864,13 +2863,14 @@ class LanceTable(Table):
|
||||
Deprecated. Set `storage_options` when connecting to the database and set
|
||||
`new_table_enable_v2_manifest_paths` in the options.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = []
|
||||
if namespace_path is None:
|
||||
namespace_path = []
|
||||
self = cls.__new__(cls)
|
||||
self._conn = db
|
||||
self._namespace = namespace
|
||||
self._namespace_path = namespace_path
|
||||
self._location = location
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
|
||||
if data_storage_version is not None:
|
||||
warnings.warn(
|
||||
@@ -2903,9 +2903,8 @@ class LanceTable(Table):
|
||||
on_bad_vectors=on_bad_vectors,
|
||||
fill_value=fill_value,
|
||||
embedding_functions=embedding_functions,
|
||||
namespace=namespace,
|
||||
namespace_path=namespace_path,
|
||||
storage_options=storage_options,
|
||||
storage_options_provider=storage_options_provider,
|
||||
location=location,
|
||||
)
|
||||
)
|
||||
@@ -2974,6 +2973,15 @@ class LanceTable(Table):
|
||||
batch_size: Optional[int] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
if (
|
||||
"QueryTable" in self._pushdown_operations
|
||||
and self._namespace_client is not None
|
||||
):
|
||||
from lancedb.namespace import _execute_server_side_query
|
||||
|
||||
table_id = self._namespace_path + [self.name]
|
||||
return _execute_server_side_query(self._namespace_client, table_id, query)
|
||||
|
||||
async_iter = LOOP.run(
|
||||
self._table._execute_query(query, batch_size=batch_size, timeout=timeout)
|
||||
)
|
||||
|
||||
@@ -183,8 +183,8 @@ def test_table_names(tmp_db: lancedb.DBConnection):
|
||||
result = list(tmp_db.table_names("test2", limit=2))
|
||||
assert result == ["test3"], f"Expected ['test3'], got {result}"
|
||||
|
||||
# Test that namespace parameter can be passed as keyword
|
||||
result = list(tmp_db.table_names(namespace=[]))
|
||||
# Test that namespace_path parameter can be passed as keyword
|
||||
result = list(tmp_db.table_names(namespace_path=[]))
|
||||
assert len(result) == 3
|
||||
|
||||
|
||||
@@ -909,7 +909,7 @@ def test_local_namespace_operations(tmp_path):
|
||||
NotImplementedError,
|
||||
match="Namespace operations are not supported for listing database",
|
||||
):
|
||||
db.list_namespaces(namespace=["test"])
|
||||
db.list_namespaces(namespace_path=["test"])
|
||||
|
||||
|
||||
def test_local_create_namespace_not_supported(tmp_path):
|
||||
|
||||
@@ -33,6 +33,16 @@ class TestNamespaceConnection:
|
||||
# Initially no tables in root
|
||||
assert len(list(db.table_names())) == 0
|
||||
|
||||
def test_connect_via_connect_helper(self):
|
||||
"""Connecting via lancedb.connect should delegate to namespace connection."""
|
||||
db = lancedb.connect(
|
||||
namespace_client_impl="dir",
|
||||
namespace_client_properties={"root": self.temp_dir},
|
||||
)
|
||||
|
||||
assert isinstance(db, lancedb.LanceNamespaceDBConnection)
|
||||
assert len(list(db.table_names())) == 0
|
||||
|
||||
def test_create_table_through_namespace(self):
|
||||
"""Test creating a table through namespace."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
@@ -50,14 +60,14 @@ class TestNamespaceConnection:
|
||||
)
|
||||
|
||||
# Create empty table in child namespace
|
||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
assert table is not None
|
||||
assert table.name == "test_table"
|
||||
assert table.namespace == ["test_ns"]
|
||||
assert table.id == "test_ns$test_table"
|
||||
|
||||
# Table should appear in child namespace
|
||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
||||
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||
assert "test_table" in table_names
|
||||
assert len(table_names) == 1
|
||||
|
||||
@@ -80,10 +90,10 @@ class TestNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Open the table
|
||||
table = db.open_table("test_table", namespace=["test_ns"])
|
||||
table = db.open_table("test_table", namespace_path=["test_ns"])
|
||||
assert table is not None
|
||||
assert table.name == "test_table"
|
||||
assert table.namespace == ["test_ns"]
|
||||
@@ -108,31 +118,31 @@ class TestNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
db.create_table("table1", schema=schema, namespace=["test_ns"])
|
||||
db.create_table("table2", schema=schema, namespace=["test_ns"])
|
||||
db.create_table("table1", schema=schema, namespace_path=["test_ns"])
|
||||
db.create_table("table2", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Verify both tables exist in child namespace
|
||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
||||
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||
assert "table1" in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 2
|
||||
|
||||
# Drop one table
|
||||
db.drop_table("table1", namespace=["test_ns"])
|
||||
db.drop_table("table1", namespace_path=["test_ns"])
|
||||
|
||||
# Verify only table2 remains
|
||||
table_names = list(db.table_names(namespace=["test_ns"]))
|
||||
table_names = list(db.table_names(namespace_path=["test_ns"]))
|
||||
assert "table1" not in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 1
|
||||
|
||||
# Drop the second table
|
||||
db.drop_table("table2", namespace=["test_ns"])
|
||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
|
||||
db.drop_table("table2", namespace_path=["test_ns"])
|
||||
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
|
||||
|
||||
# Should not be able to open dropped table
|
||||
with pytest.raises(TableNotFoundError):
|
||||
db.open_table("table1", namespace=["test_ns"])
|
||||
db.open_table("table1", namespace_path=["test_ns"])
|
||||
|
||||
def test_create_table_with_schema(self):
|
||||
"""Test creating a table with explicit schema through namespace."""
|
||||
@@ -151,7 +161,7 @@ class TestNamespaceConnection:
|
||||
)
|
||||
|
||||
# Create table with schema in child namespace
|
||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
assert table is not None
|
||||
assert table.namespace == ["test_ns"]
|
||||
|
||||
@@ -175,7 +185,7 @@ class TestNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
db.create_table("old_name", schema=schema, namespace=["test_ns"])
|
||||
db.create_table("old_name", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Rename should raise NotImplementedError
|
||||
with pytest.raises(NotImplementedError, match="rename_table is not supported"):
|
||||
@@ -196,20 +206,20 @@ class TestNamespaceConnection:
|
||||
]
|
||||
)
|
||||
for i in range(3):
|
||||
db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
|
||||
db.create_table(f"table{i}", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Verify tables exist in child namespace
|
||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 3
|
||||
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 3
|
||||
|
||||
# Drop all tables in child namespace
|
||||
db.drop_all_tables(namespace=["test_ns"])
|
||||
db.drop_all_tables(namespace_path=["test_ns"])
|
||||
|
||||
# Verify all tables are gone from child namespace
|
||||
assert len(list(db.table_names(namespace=["test_ns"]))) == 0
|
||||
assert len(list(db.table_names(namespace_path=["test_ns"]))) == 0
|
||||
|
||||
# Test that table_names works with keyword-only namespace parameter
|
||||
db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
result = list(db.table_names(namespace=["test_ns"]))
|
||||
db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
result = list(db.table_names(namespace_path=["test_ns"]))
|
||||
assert "test_table" in result
|
||||
|
||||
def test_table_operations(self):
|
||||
@@ -227,7 +237,7 @@ class TestNamespaceConnection:
|
||||
pa.field("text", pa.string()),
|
||||
]
|
||||
)
|
||||
table = db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
table = db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Verify empty table was created
|
||||
result = table.to_pandas()
|
||||
@@ -298,25 +308,25 @@ class TestNamespaceConnection:
|
||||
]
|
||||
)
|
||||
table = db.create_table(
|
||||
"test_table", schema=schema, namespace=["test_namespace"]
|
||||
"test_table", schema=schema, namespace_path=["test_namespace"]
|
||||
)
|
||||
assert table is not None
|
||||
|
||||
# Verify table exists in namespace
|
||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
||||
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
|
||||
assert "test_table" in tables_in_namespace
|
||||
assert len(tables_in_namespace) == 1
|
||||
|
||||
# Open table from namespace
|
||||
table = db.open_table("test_table", namespace=["test_namespace"])
|
||||
table = db.open_table("test_table", namespace_path=["test_namespace"])
|
||||
assert table is not None
|
||||
assert table.name == "test_table"
|
||||
|
||||
# Drop table from namespace
|
||||
db.drop_table("test_table", namespace=["test_namespace"])
|
||||
db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||
|
||||
# Verify table no longer exists in namespace
|
||||
tables_in_namespace = list(db.table_names(namespace=["test_namespace"]))
|
||||
tables_in_namespace = list(db.table_names(namespace_path=["test_namespace"]))
|
||||
assert len(tables_in_namespace) == 0
|
||||
|
||||
# Drop namespace
|
||||
@@ -338,14 +348,14 @@ class TestNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
db.create_table("test_table", schema=schema, namespace=["test_namespace"])
|
||||
db.create_table("test_table", schema=schema, namespace_path=["test_namespace"])
|
||||
|
||||
# Try to drop namespace with tables - should fail
|
||||
with pytest.raises(NamespaceNotEmptyError):
|
||||
db.drop_namespace(["test_namespace"])
|
||||
|
||||
# Drop table first
|
||||
db.drop_table("test_table", namespace=["test_namespace"])
|
||||
db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||
|
||||
# Now dropping namespace should work
|
||||
db.drop_namespace(["test_namespace"])
|
||||
@@ -368,10 +378,10 @@ class TestNamespaceConnection:
|
||||
|
||||
# Create table with same name in both namespaces
|
||||
table_a = db.create_table(
|
||||
"same_name_table", schema=schema, namespace=["namespace_a"]
|
||||
"same_name_table", schema=schema, namespace_path=["namespace_a"]
|
||||
)
|
||||
table_b = db.create_table(
|
||||
"same_name_table", schema=schema, namespace=["namespace_b"]
|
||||
"same_name_table", schema=schema, namespace_path=["namespace_b"]
|
||||
)
|
||||
|
||||
# Add different data to each table
|
||||
@@ -389,7 +399,9 @@ class TestNamespaceConnection:
|
||||
table_b.add(data_b)
|
||||
|
||||
# Verify data in namespace_a table
|
||||
opened_table_a = db.open_table("same_name_table", namespace=["namespace_a"])
|
||||
opened_table_a = db.open_table(
|
||||
"same_name_table", namespace_path=["namespace_a"]
|
||||
)
|
||||
result_a = opened_table_a.to_pandas().sort_values("id").reset_index(drop=True)
|
||||
assert len(result_a) == 2
|
||||
assert result_a["id"].tolist() == [1, 2]
|
||||
@@ -400,7 +412,9 @@ class TestNamespaceConnection:
|
||||
assert [v.tolist() for v in result_a["vector"]] == [[1.0, 2.0], [3.0, 4.0]]
|
||||
|
||||
# Verify data in namespace_b table
|
||||
opened_table_b = db.open_table("same_name_table", namespace=["namespace_b"])
|
||||
opened_table_b = db.open_table(
|
||||
"same_name_table", namespace_path=["namespace_b"]
|
||||
)
|
||||
result_b = opened_table_b.to_pandas().sort_values("id").reset_index(drop=True)
|
||||
assert len(result_b) == 3
|
||||
assert result_b["id"].tolist() == [10, 20, 30]
|
||||
@@ -420,8 +434,8 @@ class TestNamespaceConnection:
|
||||
assert "same_name_table" not in root_tables
|
||||
|
||||
# Clean up
|
||||
db.drop_table("same_name_table", namespace=["namespace_a"])
|
||||
db.drop_table("same_name_table", namespace=["namespace_b"])
|
||||
db.drop_table("same_name_table", namespace_path=["namespace_a"])
|
||||
db.drop_table("same_name_table", namespace_path=["namespace_b"])
|
||||
db.drop_namespace(["namespace_a"])
|
||||
db.drop_namespace(["namespace_b"])
|
||||
|
||||
@@ -449,6 +463,8 @@ class TestAsyncNamespaceConnection:
|
||||
table_names = await db.table_names()
|
||||
assert len(list(table_names)) == 0
|
||||
|
||||
# Async connect via namespace helper is not enabled yet.
|
||||
|
||||
async def test_create_table_async(self):
|
||||
"""Test creating a table asynchronously through namespace."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
@@ -467,13 +483,13 @@ class TestAsyncNamespaceConnection:
|
||||
|
||||
# Create empty table in child namespace
|
||||
table = await db.create_table(
|
||||
"test_table", schema=schema, namespace=["test_ns"]
|
||||
"test_table", schema=schema, namespace_path=["test_ns"]
|
||||
)
|
||||
assert table is not None
|
||||
assert isinstance(table, lancedb.AsyncTable)
|
||||
|
||||
# Table should appear in child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||
assert "test_table" in list(table_names)
|
||||
|
||||
async def test_open_table_async(self):
|
||||
@@ -490,10 +506,10 @@ class TestAsyncNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
await db.create_table("test_table", schema=schema, namespace=["test_ns"])
|
||||
await db.create_table("test_table", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Open the table
|
||||
table = await db.open_table("test_table", namespace=["test_ns"])
|
||||
table = await db.open_table("test_table", namespace_path=["test_ns"])
|
||||
assert table is not None
|
||||
assert isinstance(table, lancedb.AsyncTable)
|
||||
|
||||
@@ -547,20 +563,20 @@ class TestAsyncNamespaceConnection:
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
]
|
||||
)
|
||||
await db.create_table("table1", schema=schema, namespace=["test_ns"])
|
||||
await db.create_table("table2", schema=schema, namespace=["test_ns"])
|
||||
await db.create_table("table1", schema=schema, namespace_path=["test_ns"])
|
||||
await db.create_table("table2", schema=schema, namespace_path=["test_ns"])
|
||||
|
||||
# Verify both tables exist in child namespace
|
||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
||||
table_names = list(await db.table_names(namespace_path=["test_ns"]))
|
||||
assert "table1" in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 2
|
||||
|
||||
# Drop one table
|
||||
await db.drop_table("table1", namespace=["test_ns"])
|
||||
await db.drop_table("table1", namespace_path=["test_ns"])
|
||||
|
||||
# Verify only table2 remains
|
||||
table_names = list(await db.table_names(namespace=["test_ns"]))
|
||||
table_names = list(await db.table_names(namespace_path=["test_ns"]))
|
||||
assert "table1" not in table_names
|
||||
assert "table2" in table_names
|
||||
assert len(table_names) == 1
|
||||
@@ -589,20 +605,24 @@ class TestAsyncNamespaceConnection:
|
||||
]
|
||||
)
|
||||
table = await db.create_table(
|
||||
"test_table", schema=schema, namespace=["test_namespace"]
|
||||
"test_table", schema=schema, namespace_path=["test_namespace"]
|
||||
)
|
||||
assert table is not None
|
||||
|
||||
# Verify table exists in namespace
|
||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
||||
tables_in_namespace = list(
|
||||
await db.table_names(namespace_path=["test_namespace"])
|
||||
)
|
||||
assert "test_table" in tables_in_namespace
|
||||
assert len(tables_in_namespace) == 1
|
||||
|
||||
# Drop table from namespace
|
||||
await db.drop_table("test_table", namespace=["test_namespace"])
|
||||
await db.drop_table("test_table", namespace_path=["test_namespace"])
|
||||
|
||||
# Verify table no longer exists in namespace
|
||||
tables_in_namespace = list(await db.table_names(namespace=["test_namespace"]))
|
||||
tables_in_namespace = list(
|
||||
await db.table_names(namespace_path=["test_namespace"])
|
||||
)
|
||||
assert len(tables_in_namespace) == 0
|
||||
|
||||
# Drop namespace
|
||||
@@ -627,15 +647,98 @@ class TestAsyncNamespaceConnection:
|
||||
]
|
||||
)
|
||||
for i in range(3):
|
||||
await db.create_table(f"table{i}", schema=schema, namespace=["test_ns"])
|
||||
await db.create_table(
|
||||
f"table{i}", schema=schema, namespace_path=["test_ns"]
|
||||
)
|
||||
|
||||
# Verify tables exist in child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||
assert len(list(table_names)) == 3
|
||||
|
||||
# Drop all tables in child namespace
|
||||
await db.drop_all_tables(namespace=["test_ns"])
|
||||
await db.drop_all_tables(namespace_path=["test_ns"])
|
||||
|
||||
# Verify all tables are gone from child namespace
|
||||
table_names = await db.table_names(namespace=["test_ns"])
|
||||
table_names = await db.table_names(namespace_path=["test_ns"])
|
||||
assert len(list(table_names)) == 0
|
||||
|
||||
|
||||
class TestPushdownOperations:
|
||||
"""Test pushdown operations on namespace connections."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up test fixtures."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
def test_query_table_pushdown_stored(self):
|
||||
"""Test that QueryTable pushdown is stored on sync connection."""
|
||||
db = lancedb.connect_namespace(
|
||||
"dir",
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
|
||||
def test_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on sync connection."""
|
||||
db = lancedb.connect_namespace(
|
||||
"dir",
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
def test_both_pushdowns_stored(self):
|
||||
"""Test that both pushdown operations can be set together."""
|
||||
db = lancedb.connect_namespace(
|
||||
"dir",
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable", "CreateTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
def test_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestAsyncPushdownOperations:
|
||||
"""Test pushdown operations on async namespace connections."""
|
||||
|
||||
def setup_method(self):
|
||||
"""Set up test fixtures."""
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
def teardown_method(self):
|
||||
"""Clean up test fixtures."""
|
||||
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
||||
|
||||
async def test_async_query_table_pushdown_stored(self):
|
||||
"""Test that QueryTable pushdown is stored on async connection."""
|
||||
db = lancedb.connect_namespace_async(
|
||||
"dir",
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert "QueryTable" in db._pushdown_operations
|
||||
|
||||
async def test_async_create_table_pushdown_stored(self):
|
||||
"""Test that CreateTable pushdown is stored on async connection."""
|
||||
db = lancedb.connect_namespace_async(
|
||||
"dir",
|
||||
{"root": self.temp_dir},
|
||||
namespace_client_pushdown_operations=["CreateTable"],
|
||||
)
|
||||
assert "CreateTable" in db._pushdown_operations
|
||||
|
||||
async def test_async_pushdown_defaults_to_empty(self):
|
||||
"""Test that pushdown operations default to empty on async connection."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert len(db._pushdown_operations) == 0
|
||||
|
||||
@@ -4,9 +4,11 @@
|
||||
"""
|
||||
Integration tests for LanceDB Namespace with S3 and credential refresh.
|
||||
|
||||
This test simulates a namespace server that returns incrementing credentials
|
||||
and verifies that the credential refresh mechanism works correctly for both
|
||||
create_table and open_table operations.
|
||||
This test uses DirectoryNamespace with native ops_metrics and vend_input_storage_options
|
||||
features to track API calls and test credential refresh mechanisms.
|
||||
|
||||
Tests are parameterized to run with both DirectoryNamespace and a CustomNamespace
|
||||
wrapper to verify Python-Rust binding works correctly for custom implementations.
|
||||
|
||||
Tests verify:
|
||||
- Storage options provider is auto-created and used
|
||||
@@ -18,22 +20,136 @@ Tests verify:
|
||||
import copy
|
||||
import time
|
||||
import uuid
|
||||
from threading import Lock
|
||||
from typing import Dict
|
||||
from typing import Dict, Optional
|
||||
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from lance_namespace import (
|
||||
CreateEmptyTableRequest,
|
||||
CreateEmptyTableResponse,
|
||||
from lance.namespace import (
|
||||
DeclareTableRequest,
|
||||
DeclareTableResponse,
|
||||
DescribeTableRequest,
|
||||
DescribeTableResponse,
|
||||
DirectoryNamespace,
|
||||
LanceNamespace,
|
||||
)
|
||||
from lance_namespace import (
|
||||
CreateNamespaceRequest,
|
||||
CreateNamespaceResponse,
|
||||
CreateTableRequest,
|
||||
CreateTableResponse,
|
||||
CreateTableVersionRequest,
|
||||
CreateTableVersionResponse,
|
||||
DeregisterTableRequest,
|
||||
DeregisterTableResponse,
|
||||
DescribeNamespaceRequest,
|
||||
DescribeNamespaceResponse,
|
||||
DescribeTableVersionRequest,
|
||||
DescribeTableVersionResponse,
|
||||
DropNamespaceRequest,
|
||||
DropNamespaceResponse,
|
||||
DropTableRequest,
|
||||
DropTableResponse,
|
||||
ListNamespacesRequest,
|
||||
ListNamespacesResponse,
|
||||
ListTablesRequest,
|
||||
ListTablesResponse,
|
||||
ListTableVersionsRequest,
|
||||
ListTableVersionsResponse,
|
||||
NamespaceExistsRequest,
|
||||
RegisterTableRequest,
|
||||
RegisterTableResponse,
|
||||
TableExistsRequest,
|
||||
)
|
||||
from lancedb.namespace import LanceNamespaceDBConnection
|
||||
|
||||
|
||||
class CustomNamespace(LanceNamespace):
|
||||
"""A custom namespace wrapper that delegates to DirectoryNamespace.
|
||||
|
||||
This class verifies that the Python-Rust binding works correctly for
|
||||
custom namespace implementations that wrap the native DirectoryNamespace.
|
||||
All methods simply delegate to the underlying DirectoryNamespace instance.
|
||||
"""
|
||||
|
||||
def __init__(self, inner: DirectoryNamespace):
|
||||
self._inner = inner
|
||||
|
||||
def namespace_id(self) -> str:
|
||||
return f"CustomNamespace[{self._inner.namespace_id()}]"
|
||||
|
||||
def create_namespace(
|
||||
self, request: CreateNamespaceRequest
|
||||
) -> CreateNamespaceResponse:
|
||||
return self._inner.create_namespace(request)
|
||||
|
||||
def describe_namespace(
|
||||
self, request: DescribeNamespaceRequest
|
||||
) -> DescribeNamespaceResponse:
|
||||
return self._inner.describe_namespace(request)
|
||||
|
||||
def namespace_exists(self, request: NamespaceExistsRequest) -> None:
|
||||
return self._inner.namespace_exists(request)
|
||||
|
||||
def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse:
|
||||
return self._inner.drop_namespace(request)
|
||||
|
||||
def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesResponse:
|
||||
return self._inner.list_namespaces(request)
|
||||
|
||||
def create_table(
|
||||
self, request: CreateTableRequest, data: bytes
|
||||
) -> CreateTableResponse:
|
||||
return self._inner.create_table(request, data)
|
||||
|
||||
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
||||
return self._inner.declare_table(request)
|
||||
|
||||
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
||||
return self._inner.describe_table(request)
|
||||
|
||||
def table_exists(self, request: TableExistsRequest) -> None:
|
||||
return self._inner.table_exists(request)
|
||||
|
||||
def drop_table(self, request: DropTableRequest) -> DropTableResponse:
|
||||
return self._inner.drop_table(request)
|
||||
|
||||
def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
|
||||
return self._inner.list_tables(request)
|
||||
|
||||
def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse:
|
||||
return self._inner.register_table(request)
|
||||
|
||||
def deregister_table(
|
||||
self, request: DeregisterTableRequest
|
||||
) -> DeregisterTableResponse:
|
||||
return self._inner.deregister_table(request)
|
||||
|
||||
def list_table_versions(
|
||||
self, request: ListTableVersionsRequest
|
||||
) -> ListTableVersionsResponse:
|
||||
return self._inner.list_table_versions(request)
|
||||
|
||||
def describe_table_version(
|
||||
self, request: DescribeTableVersionRequest
|
||||
) -> DescribeTableVersionResponse:
|
||||
return self._inner.describe_table_version(request)
|
||||
|
||||
def create_table_version(
|
||||
self, request: CreateTableVersionRequest
|
||||
) -> CreateTableVersionResponse:
|
||||
return self._inner.create_table_version(request)
|
||||
|
||||
def retrieve_ops_metrics(self) -> Optional[Dict[str, int]]:
|
||||
return self._inner.retrieve_ops_metrics()
|
||||
|
||||
|
||||
def _wrap_if_custom(ns_client: DirectoryNamespace, use_custom: bool):
|
||||
"""Wrap namespace client in CustomNamespace if use_custom is True."""
|
||||
if use_custom:
|
||||
return CustomNamespace(ns_client)
|
||||
return ns_client
|
||||
|
||||
|
||||
# LocalStack S3 configuration
|
||||
CONFIG = {
|
||||
"allow_http": "true",
|
||||
@@ -89,162 +205,88 @@ def delete_bucket(s3, bucket_name):
|
||||
pass
|
||||
|
||||
|
||||
class TrackingNamespace(LanceNamespace):
|
||||
def create_tracking_namespace(
|
||||
bucket_name: str,
|
||||
storage_options: dict,
|
||||
credential_expires_in_seconds: int = 60,
|
||||
use_custom: bool = False,
|
||||
):
|
||||
"""Create a DirectoryNamespace with ops metrics and credential vending enabled.
|
||||
|
||||
Uses native DirectoryNamespace features:
|
||||
- ops_metrics_enabled=true: Tracks API call counts via retrieve_ops_metrics()
|
||||
- vend_input_storage_options=true: Returns input storage options in responses
|
||||
- vend_input_storage_options_refresh_interval_millis: Adds expires_at_millis
|
||||
|
||||
Args:
|
||||
bucket_name: S3 bucket name or local path
|
||||
storage_options: Storage options to pass through (credentials, endpoint, etc.)
|
||||
credential_expires_in_seconds: Interval in seconds for credential expiration
|
||||
use_custom: If True, wrap in CustomNamespace for testing custom implementations
|
||||
|
||||
Returns:
|
||||
Tuple of (namespace_client, inner_namespace_client) where inner is always
|
||||
the DirectoryNamespace (used for metrics retrieval)
|
||||
"""
|
||||
Mock namespace that wraps DirectoryNamespace and tracks API calls.
|
||||
# Add refresh_offset_millis to storage options so that credentials are not
|
||||
# considered expired immediately. Set to 1 second (1000ms) so that refresh
|
||||
# checks work correctly with short-lived credentials in tests.
|
||||
storage_options_with_refresh = dict(storage_options)
|
||||
storage_options_with_refresh["refresh_offset_millis"] = "1000"
|
||||
|
||||
This namespace returns incrementing credentials with each API call to simulate
|
||||
credential rotation. It also tracks the number of times each API is called
|
||||
to verify caching behavior.
|
||||
"""
|
||||
dir_props = {f"storage.{k}": v for k, v in storage_options_with_refresh.items()}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
bucket_name: str,
|
||||
storage_options: Dict[str, str],
|
||||
credential_expires_in_seconds: int = 60,
|
||||
):
|
||||
from lance.namespace import DirectoryNamespace
|
||||
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
|
||||
dir_props["root"] = f"{bucket_name}/namespace_root"
|
||||
else:
|
||||
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
|
||||
|
||||
self.bucket_name = bucket_name
|
||||
self.base_storage_options = storage_options
|
||||
self.credential_expires_in_seconds = credential_expires_in_seconds
|
||||
self.describe_call_count = 0
|
||||
self.create_call_count = 0
|
||||
self.lock = Lock()
|
||||
# Enable ops metrics tracking
|
||||
dir_props["ops_metrics_enabled"] = "true"
|
||||
# Enable storage options vending
|
||||
dir_props["vend_input_storage_options"] = "true"
|
||||
# Set refresh interval in milliseconds
|
||||
dir_props["vend_input_storage_options_refresh_interval_millis"] = str(
|
||||
credential_expires_in_seconds * 1000
|
||||
)
|
||||
|
||||
# Create underlying DirectoryNamespace with storage options
|
||||
dir_props = {f"storage.{k}": v for k, v in storage_options.items()}
|
||||
inner_ns_client = DirectoryNamespace(**dir_props)
|
||||
ns_client = _wrap_if_custom(inner_ns_client, use_custom)
|
||||
return ns_client, inner_ns_client
|
||||
|
||||
# Use S3 path for bucket name, local path for file paths
|
||||
if bucket_name.startswith("/") or bucket_name.startswith("file://"):
|
||||
dir_props["root"] = f"{bucket_name}/namespace_root"
|
||||
else:
|
||||
dir_props["root"] = f"s3://{bucket_name}/namespace_root"
|
||||
|
||||
self.inner = DirectoryNamespace(**dir_props)
|
||||
def get_describe_call_count(namespace_client) -> int:
|
||||
"""Get the number of describe_table calls made to the namespace client."""
|
||||
return namespace_client.retrieve_ops_metrics().get("describe_table", 0)
|
||||
|
||||
def get_describe_call_count(self) -> int:
|
||||
"""Thread-safe getter for describe call count."""
|
||||
with self.lock:
|
||||
return self.describe_call_count
|
||||
|
||||
def get_create_call_count(self) -> int:
|
||||
"""Thread-safe getter for create call count."""
|
||||
with self.lock:
|
||||
return self.create_call_count
|
||||
|
||||
def namespace_id(self) -> str:
|
||||
"""Return namespace identifier."""
|
||||
return f"TrackingNamespace {{ inner: {self.inner.namespace_id()} }}"
|
||||
|
||||
def _modify_storage_options(
|
||||
self, storage_options: Dict[str, str], count: int
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
Add incrementing credentials with expiration timestamp.
|
||||
|
||||
This simulates a credential rotation system where each call returns
|
||||
new credentials that expire after credential_expires_in_seconds.
|
||||
"""
|
||||
# Start from base storage options (endpoint, region, allow_http, etc.)
|
||||
# because DirectoryNamespace returns None for storage_options from
|
||||
# describe_table/declare_table when no credential vendor is configured.
|
||||
modified = copy.deepcopy(self.base_storage_options)
|
||||
if storage_options:
|
||||
modified.update(storage_options)
|
||||
|
||||
# Increment credentials to simulate rotation
|
||||
modified["aws_access_key_id"] = f"AKID_{count}"
|
||||
modified["aws_secret_access_key"] = f"SECRET_{count}"
|
||||
modified["aws_session_token"] = f"TOKEN_{count}"
|
||||
|
||||
# Set expiration time
|
||||
expires_at_millis = int(
|
||||
(time.time() + self.credential_expires_in_seconds) * 1000
|
||||
)
|
||||
modified["expires_at_millis"] = str(expires_at_millis)
|
||||
|
||||
return modified
|
||||
|
||||
def declare_table(self, request: DeclareTableRequest) -> DeclareTableResponse:
|
||||
"""Track declare_table calls and inject rotating credentials."""
|
||||
with self.lock:
|
||||
self.create_call_count += 1
|
||||
count = self.create_call_count
|
||||
|
||||
response = self.inner.declare_table(request)
|
||||
response.storage_options = self._modify_storage_options(
|
||||
response.storage_options, count
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def create_empty_table(
|
||||
self, request: CreateEmptyTableRequest
|
||||
) -> CreateEmptyTableResponse:
|
||||
"""Track create_empty_table calls and inject rotating credentials."""
|
||||
with self.lock:
|
||||
self.create_call_count += 1
|
||||
count = self.create_call_count
|
||||
|
||||
response = self.inner.create_empty_table(request)
|
||||
response.storage_options = self._modify_storage_options(
|
||||
response.storage_options, count
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse:
|
||||
"""Track describe_table calls and inject rotating credentials."""
|
||||
with self.lock:
|
||||
self.describe_call_count += 1
|
||||
count = self.describe_call_count
|
||||
|
||||
response = self.inner.describe_table(request)
|
||||
response.storage_options = self._modify_storage_options(
|
||||
response.storage_options, count
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
# Pass through other methods to inner namespace
|
||||
def list_tables(self, request):
|
||||
return self.inner.list_tables(request)
|
||||
|
||||
def drop_table(self, request):
|
||||
return self.inner.drop_table(request)
|
||||
|
||||
def list_namespaces(self, request):
|
||||
return self.inner.list_namespaces(request)
|
||||
|
||||
def create_namespace(self, request):
|
||||
return self.inner.create_namespace(request)
|
||||
|
||||
def drop_namespace(self, request):
|
||||
return self.inner.drop_namespace(request)
|
||||
def get_declare_call_count(namespace_client) -> int:
|
||||
"""Get the number of declare_table calls made to the namespace client."""
|
||||
return namespace_client.retrieve_ops_metrics().get("declare_table", 0)
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_create_table_with_provider(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_create_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test creating a table through namespace with storage options provider.
|
||||
|
||||
Verifies:
|
||||
- create_empty_table is called once to reserve location
|
||||
- declare_table is called once to reserve location
|
||||
- Storage options provider is auto-created
|
||||
- Table can be written successfully
|
||||
- Credentials are cached during write operations
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3600, # 1 hour
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -254,8 +296,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
||||
namespace_path = [namespace_name]
|
||||
|
||||
# Verify initial state
|
||||
assert namespace.get_create_call_count() == 0
|
||||
assert namespace.get_describe_call_count() == 0
|
||||
assert get_declare_call_count(inner_ns_client) == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
# Create table with data
|
||||
data = pa.table(
|
||||
@@ -266,12 +308,12 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
||||
}
|
||||
)
|
||||
|
||||
table = db.create_table(table_name, data, namespace=namespace_path)
|
||||
table = db.create_table(table_name, data, namespace_path=namespace_path)
|
||||
|
||||
# Verify create_empty_table was called exactly once
|
||||
assert namespace.get_create_call_count() == 1
|
||||
# Verify declare_table was called exactly once
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
# describe_table should NOT be called during create in create mode
|
||||
assert namespace.get_describe_call_count() == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
# Verify table was created successfully
|
||||
assert table.name == table_name
|
||||
@@ -281,7 +323,8 @@ def test_namespace_create_table_with_provider(s3_bucket: str):
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_open_table_with_provider(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_open_table_with_provider(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test opening a table through namespace with storage options provider.
|
||||
|
||||
@@ -293,13 +336,14 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -317,21 +361,21 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
||||
}
|
||||
)
|
||||
|
||||
db.create_table(table_name, data, namespace=namespace_path)
|
||||
db.create_table(table_name, data, namespace_path=namespace_path)
|
||||
|
||||
initial_create_count = namespace.get_create_call_count()
|
||||
assert initial_create_count == 1
|
||||
initial_declare_count = get_declare_call_count(inner_ns_client)
|
||||
assert initial_declare_count == 1
|
||||
|
||||
# Open the table
|
||||
opened_table = db.open_table(table_name, namespace=namespace_path)
|
||||
opened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||
|
||||
# Verify describe_table was called exactly once
|
||||
assert namespace.get_describe_call_count() == 1
|
||||
# create_empty_table should not be called again
|
||||
assert namespace.get_create_call_count() == initial_create_count
|
||||
assert get_describe_call_count(inner_ns_client) == 1
|
||||
# declare_table should not be called again
|
||||
assert get_declare_call_count(inner_ns_client) == initial_declare_count
|
||||
|
||||
# Perform multiple read operations
|
||||
describe_count_after_open = namespace.get_describe_call_count()
|
||||
describe_count_after_open = get_describe_call_count(inner_ns_client)
|
||||
|
||||
for _ in range(3):
|
||||
result = opened_table.to_pandas()
|
||||
@@ -340,11 +384,12 @@ def test_namespace_open_table_with_provider(s3_bucket: str):
|
||||
assert count == 5
|
||||
|
||||
# Verify credentials were cached (no additional describe_table calls)
|
||||
assert namespace.get_describe_call_count() == describe_count_after_open
|
||||
assert get_describe_call_count(inner_ns_client) == describe_count_after_open
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_credential_refresh_on_read(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test credential refresh when credentials expire during read operations.
|
||||
|
||||
@@ -355,13 +400,14 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3, # Short expiration for testing
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -378,16 +424,16 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
||||
}
|
||||
)
|
||||
|
||||
db.create_table(table_name, data, namespace=namespace_path)
|
||||
db.create_table(table_name, data, namespace_path=namespace_path)
|
||||
|
||||
# Open table (triggers describe_table)
|
||||
opened_table = db.open_table(table_name, namespace=namespace_path)
|
||||
opened_table = db.open_table(table_name, namespace_path=namespace_path)
|
||||
|
||||
# Perform an immediate read (should use credentials from open)
|
||||
result = opened_table.to_pandas()
|
||||
assert len(result) == 3
|
||||
|
||||
describe_count_after_first_read = namespace.get_describe_call_count()
|
||||
describe_count_after_first_read = get_describe_call_count(inner_ns_client)
|
||||
|
||||
# Wait for credentials to expire (3 seconds + buffer)
|
||||
time.sleep(5)
|
||||
@@ -396,7 +442,7 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
||||
result = opened_table.to_pandas()
|
||||
assert len(result) == 3
|
||||
|
||||
describe_count_after_refresh = namespace.get_describe_call_count()
|
||||
describe_count_after_refresh = get_describe_call_count(inner_ns_client)
|
||||
# Verify describe_table was called again (credential refresh)
|
||||
refresh_delta = describe_count_after_refresh - describe_count_after_first_read
|
||||
|
||||
@@ -409,7 +455,8 @@ def test_namespace_credential_refresh_on_read(s3_bucket: str):
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_credential_refresh_on_write(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test credential refresh when credentials expire during write operations.
|
||||
|
||||
@@ -420,13 +467,14 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3, # Short expiration
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -443,7 +491,7 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
||||
}
|
||||
)
|
||||
|
||||
table = db.create_table(table_name, initial_data, namespace=namespace_path)
|
||||
table = db.create_table(table_name, initial_data, namespace_path=namespace_path)
|
||||
|
||||
# Add more data (should use cached credentials)
|
||||
new_data = pa.table(
|
||||
@@ -471,24 +519,26 @@ def test_namespace_credential_refresh_on_write(s3_bucket: str):
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_overwrite_mode(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_overwrite_mode(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test creating table in overwrite mode with credential tracking.
|
||||
|
||||
Verifies:
|
||||
- First create calls create_empty_table exactly once
|
||||
- First create calls declare_table exactly once
|
||||
- Overwrite mode calls describe_table exactly once to check existence
|
||||
- Storage options provider works in overwrite mode
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -505,11 +555,11 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
||||
}
|
||||
)
|
||||
|
||||
table = db.create_table(table_name, data1, namespace=namespace_path)
|
||||
# Exactly one create_empty_table call for initial create
|
||||
assert namespace.get_create_call_count() == 1
|
||||
table = db.create_table(table_name, data1, namespace_path=namespace_path)
|
||||
# Exactly one declare_table call for initial create
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
# No describe_table calls in create mode
|
||||
assert namespace.get_describe_call_count() == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
assert table.count_rows() == 2
|
||||
|
||||
# Overwrite the table
|
||||
@@ -521,14 +571,14 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
||||
)
|
||||
|
||||
table2 = db.create_table(
|
||||
table_name, data2, namespace=namespace_path, mode="overwrite"
|
||||
table_name, data2, namespace_path=namespace_path, mode="overwrite"
|
||||
)
|
||||
|
||||
# Should still have only 1 create_empty_table call
|
||||
# Should still have only 1 declare_table call
|
||||
# (overwrite reuses location from describe_table)
|
||||
assert namespace.get_create_call_count() == 1
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
# Should have called describe_table exactly once to get existing table location
|
||||
assert namespace.get_describe_call_count() == 1
|
||||
assert get_describe_call_count(inner_ns_client) == 1
|
||||
|
||||
# Verify new data
|
||||
assert table2.count_rows() == 3
|
||||
@@ -537,7 +587,8 @@ def test_namespace_overwrite_mode(s3_bucket: str):
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_multiple_tables(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_multiple_tables(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test creating and opening multiple tables in the same namespace.
|
||||
|
||||
@@ -548,13 +599,14 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -564,22 +616,22 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
||||
# Create first table
|
||||
table1_name = f"table1_{uuid.uuid4().hex}"
|
||||
data1 = pa.table({"id": [1, 2], "value": [10, 20]})
|
||||
db.create_table(table1_name, data1, namespace=namespace_path)
|
||||
db.create_table(table1_name, data1, namespace_path=namespace_path)
|
||||
|
||||
# Create second table
|
||||
table2_name = f"table2_{uuid.uuid4().hex}"
|
||||
data2 = pa.table({"id": [3, 4], "value": [30, 40]})
|
||||
db.create_table(table2_name, data2, namespace=namespace_path)
|
||||
db.create_table(table2_name, data2, namespace_path=namespace_path)
|
||||
|
||||
# Should have 2 create calls (one per table)
|
||||
assert namespace.get_create_call_count() == 2
|
||||
# Should have 2 declare calls (one per table)
|
||||
assert get_declare_call_count(inner_ns_client) == 2
|
||||
|
||||
# Open both tables
|
||||
opened1 = db.open_table(table1_name, namespace=namespace_path)
|
||||
opened2 = db.open_table(table2_name, namespace=namespace_path)
|
||||
opened1 = db.open_table(table1_name, namespace_path=namespace_path)
|
||||
opened2 = db.open_table(table2_name, namespace_path=namespace_path)
|
||||
|
||||
# Should have 2 describe calls (one per open)
|
||||
assert namespace.get_describe_call_count() == 2
|
||||
assert get_describe_call_count(inner_ns_client) == 2
|
||||
|
||||
# Verify both tables work independently
|
||||
assert opened1.count_rows() == 2
|
||||
@@ -593,7 +645,8 @@ def test_namespace_multiple_tables(s3_bucket: str):
|
||||
|
||||
|
||||
@pytest.mark.s3_test
|
||||
def test_namespace_with_schema_only(s3_bucket: str):
|
||||
@pytest.mark.parametrize("use_custom", [False, True], ids=["DirectoryNS", "CustomNS"])
|
||||
def test_namespace_with_schema_only(s3_bucket: str, use_custom: bool):
|
||||
"""
|
||||
Test creating empty table with schema only (no data).
|
||||
|
||||
@@ -604,13 +657,14 @@ def test_namespace_with_schema_only(s3_bucket: str):
|
||||
"""
|
||||
storage_options = copy.deepcopy(CONFIG)
|
||||
|
||||
namespace = TrackingNamespace(
|
||||
ns_client, inner_ns_client = create_tracking_namespace(
|
||||
bucket_name=s3_bucket,
|
||||
storage_options=storage_options,
|
||||
credential_expires_in_seconds=3600,
|
||||
use_custom=use_custom,
|
||||
)
|
||||
|
||||
db = LanceNamespaceDBConnection(namespace)
|
||||
db = LanceNamespaceDBConnection(ns_client)
|
||||
|
||||
# Create unique namespace for this test
|
||||
namespace_name = f"test_ns_{uuid.uuid4().hex[:8]}"
|
||||
@@ -628,12 +682,12 @@ def test_namespace_with_schema_only(s3_bucket: str):
|
||||
]
|
||||
)
|
||||
|
||||
table = db.create_table(table_name, schema=schema, namespace=namespace_path)
|
||||
table = db.create_table(table_name, schema=schema, namespace_path=namespace_path)
|
||||
|
||||
# Should have called create_empty_table once
|
||||
assert namespace.get_create_call_count() == 1
|
||||
# Should have called declare_table once
|
||||
assert get_declare_call_count(inner_ns_client) == 1
|
||||
# Should NOT have called describe_table in create mode
|
||||
assert namespace.get_describe_call_count() == 0
|
||||
assert get_describe_call_count(inner_ns_client) == 0
|
||||
|
||||
# Verify empty table
|
||||
assert table.count_rows() == 0
|
||||
|
||||
@@ -2108,7 +2108,7 @@ def test_stats(mem_db: DBConnection):
|
||||
stats = table.stats()
|
||||
print(f"{stats=}")
|
||||
assert stats == {
|
||||
"total_bytes": 38,
|
||||
"total_bytes": 60,
|
||||
"num_rows": 2,
|
||||
"num_indices": 0,
|
||||
"fragment_stats": {
|
||||
|
||||
Reference in New Issue
Block a user