mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-02 18:40:40 +00:00
fix: ensure read freshness provider is built into namespace client (#3571)
By default the read freshness provider was not included in the namespace client, preventing the read freshness headers from being included in the request. This prevents checkout_latest() from working as expected when using the namespace client. This fix ensures the provided is built into the client when the namespace impl and properties are provided.
This commit is contained in:
@@ -373,6 +373,19 @@ def _convert_pyarrow_schema_to_json(schema: pa.Schema) -> JsonArrowSchema:
|
||||
return JsonArrowSchema(fields=fields, metadata=meta)
|
||||
|
||||
|
||||
def _builds_namespace_natively(
|
||||
namespace_client_impl: Optional[str],
|
||||
namespace_client_properties: Optional[Dict[str, str]],
|
||||
) -> bool:
|
||||
"""Whether ``connect_namespace_client`` builds the namespace client natively
|
||||
in Rust (installing the read-freshness context provider) rather than wrapping
|
||||
the pre-built Python client.
|
||||
|
||||
Must mirror Rust ``build_namespace_natively`` in ``python/src/connection.rs``.
|
||||
"""
|
||||
return namespace_client_impl == "rest" and bool(namespace_client_properties)
|
||||
|
||||
|
||||
class LanceNamespaceDBConnection(DBConnection):
|
||||
"""
|
||||
A LanceDB connection that uses a namespace for table management.
|
||||
@@ -432,6 +445,13 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
)
|
||||
self._namespace_client_impl = namespace_client_impl
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
# When the namespace client is built natively (see Rust
|
||||
# ``build_namespace_natively``), the underlying Rust table performs
|
||||
# QueryTable pushdown through the read-freshness context provider, which
|
||||
# the pure-Python ``query_table`` path bypasses.
|
||||
self._route_pushdown_to_rust = _builds_namespace_natively(
|
||||
namespace_client_impl, namespace_client_properties
|
||||
)
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
@@ -543,6 +563,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
route_pushdown_to_rust=self._route_pushdown_to_rust,
|
||||
_async=async_table,
|
||||
)
|
||||
|
||||
@@ -580,6 +601,7 @@ class LanceNamespaceDBConnection(DBConnection):
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
route_pushdown_to_rust=self._route_pushdown_to_rust,
|
||||
_async=async_table,
|
||||
)
|
||||
if branch is not None:
|
||||
@@ -875,6 +897,8 @@ class AsyncLanceNamespaceDBConnection:
|
||||
storage_options: Optional[Dict[str, str]] = None,
|
||||
session: Optional[Session] = None,
|
||||
namespace_client_pushdown_operations: Optional[List[str]] = None,
|
||||
namespace_client_impl: Optional[str] = None,
|
||||
namespace_client_properties: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""
|
||||
Initialize an async namespace-based LanceDB connection.
|
||||
@@ -900,6 +924,12 @@ class AsyncLanceNamespaceDBConnection:
|
||||
namespace.create_table() instead of using declare_table + local write.
|
||||
|
||||
Default is None (no pushdown, all operations run locally).
|
||||
namespace_client_impl : Optional[str]
|
||||
The namespace implementation name used to create this connection.
|
||||
Required (with ``namespace_client_properties``) for the Rust client to
|
||||
be built natively and install the read-freshness provider.
|
||||
namespace_client_properties : Optional[Dict[str, str]]
|
||||
The namespace properties used to create this connection.
|
||||
"""
|
||||
self._namespace_client = namespace_client
|
||||
self.read_consistency_interval = read_consistency_interval
|
||||
@@ -908,6 +938,14 @@ class AsyncLanceNamespaceDBConnection:
|
||||
self._namespace_client_pushdown_operations = set(
|
||||
namespace_client_pushdown_operations or []
|
||||
)
|
||||
self._namespace_client_impl = namespace_client_impl
|
||||
self._namespace_client_properties = namespace_client_properties
|
||||
# See LanceNamespaceDBConnection: when built natively the Rust table runs
|
||||
# QueryTable pushdown through the read-freshness provider, so defer to it
|
||||
# rather than the urllib3 client (which omits x-lancedb-min-timestamp).
|
||||
self._route_pushdown_to_rust = _builds_namespace_natively(
|
||||
namespace_client_impl, namespace_client_properties
|
||||
)
|
||||
self._inner = AsyncConnection(
|
||||
_connect_namespace_client(
|
||||
namespace_client,
|
||||
@@ -921,8 +959,8 @@ class AsyncLanceNamespaceDBConnection:
|
||||
namespace_client_pushdown_operations=(
|
||||
list(self._namespace_client_pushdown_operations)
|
||||
),
|
||||
namespace_client_impl=None,
|
||||
namespace_client_properties=None,
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -992,6 +1030,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
route_pushdown_to_rust=self._route_pushdown_to_rust,
|
||||
)
|
||||
|
||||
async def open_table(
|
||||
@@ -1029,6 +1068,7 @@ class AsyncLanceNamespaceDBConnection:
|
||||
namespace_path=namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._namespace_client_pushdown_operations,
|
||||
route_pushdown_to_rust=self._route_pushdown_to_rust,
|
||||
)
|
||||
|
||||
async def drop_table(self, name: str, namespace_path: Optional[List[str]] = None):
|
||||
@@ -1387,4 +1427,6 @@ def connect_namespace_async(
|
||||
storage_options=storage_options,
|
||||
session=session,
|
||||
namespace_client_pushdown_operations=namespace_client_pushdown_operations,
|
||||
namespace_client_impl=namespace_client_impl,
|
||||
namespace_client_properties=namespace_client_properties,
|
||||
)
|
||||
|
||||
@@ -2022,6 +2022,7 @@ class LanceTable(Table):
|
||||
namespace_client: Optional[Any] = None,
|
||||
managed_versioning: Optional[bool] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
route_pushdown_to_rust: bool = False,
|
||||
_async: AsyncTable = None,
|
||||
):
|
||||
if namespace_path is None:
|
||||
@@ -2031,6 +2032,14 @@ class LanceTable(Table):
|
||||
self._location = location # Store location for use in _dataset_path
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
# When the connection built the namespace client natively (e.g. an
|
||||
# enterprise "rest" connection), the underlying Rust table already
|
||||
# executes QueryTable pushdown itself -- and, unlike this Python urllib3
|
||||
# path, it routes through the read-freshness context provider that emits
|
||||
# the ``x-lancedb-min-timestamp`` header. So we must defer pushdown to
|
||||
# Rust instead of calling the Python ``namespace_client.query_table``
|
||||
# directly, or reads silently bypass read-freshness (stale results).
|
||||
self._route_pushdown_to_rust = route_pushdown_to_rust
|
||||
if _async is not None:
|
||||
self._table = _async
|
||||
else:
|
||||
@@ -2241,6 +2250,7 @@ class LanceTable(Table):
|
||||
namespace_path=self._namespace_path,
|
||||
namespace_client=self._namespace_client,
|
||||
pushdown_operations=self._pushdown_operations,
|
||||
route_pushdown_to_rust=self._route_pushdown_to_rust,
|
||||
location=self._location,
|
||||
_async=async_table,
|
||||
)
|
||||
@@ -2391,8 +2401,11 @@ class LanceTable(Table):
|
||||
Returns
|
||||
-------
|
||||
pa.Table"""
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
if (
|
||||
_should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
)
|
||||
and not self._route_pushdown_to_rust
|
||||
):
|
||||
return self._execute_query(Query()).read_all()
|
||||
|
||||
@@ -3344,6 +3357,7 @@ class LanceTable(Table):
|
||||
location: Optional[str] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
route_pushdown_to_rust: bool = False,
|
||||
):
|
||||
"""
|
||||
Create a new table.
|
||||
@@ -3406,6 +3420,7 @@ class LanceTable(Table):
|
||||
self._location = location
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
self._route_pushdown_to_rust = route_pushdown_to_rust
|
||||
|
||||
if data_storage_version is not None:
|
||||
warnings.warn(
|
||||
@@ -3519,6 +3534,7 @@ class LanceTable(Table):
|
||||
_should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
)
|
||||
and not self._route_pushdown_to_rust
|
||||
and self.current_branch() is None
|
||||
):
|
||||
from lancedb.namespace import _execute_server_side_query
|
||||
@@ -4260,6 +4276,7 @@ class AsyncTable:
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
route_pushdown_to_rust: bool = False,
|
||||
):
|
||||
"""Create a new AsyncTable object.
|
||||
|
||||
@@ -4272,6 +4289,9 @@ class AsyncTable:
|
||||
self._namespace_path = namespace_path or []
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
# See LanceTable.__init__: defer QueryTable pushdown to Rust (which emits
|
||||
# the read-freshness header) for natively-built namespace clients.
|
||||
self._route_pushdown_to_rust = route_pushdown_to_rust
|
||||
|
||||
def _set_namespace_context(
|
||||
self,
|
||||
@@ -4279,10 +4299,12 @@ class AsyncTable:
|
||||
namespace_path: Optional[List[str]] = None,
|
||||
namespace_client: Optional[Any] = None,
|
||||
pushdown_operations: Optional[set] = None,
|
||||
route_pushdown_to_rust: bool = False,
|
||||
) -> "AsyncTable":
|
||||
self._namespace_path = namespace_path or []
|
||||
self._namespace_client = namespace_client
|
||||
self._pushdown_operations = pushdown_operations or set()
|
||||
self._route_pushdown_to_rust = route_pushdown_to_rust
|
||||
return self
|
||||
|
||||
def __repr__(self):
|
||||
@@ -4492,8 +4514,11 @@ class AsyncTable:
|
||||
-------
|
||||
pa.Table
|
||||
"""
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
if (
|
||||
_should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
)
|
||||
and not self._route_pushdown_to_rust
|
||||
):
|
||||
return (await self._execute_query(Query())).read_all()
|
||||
|
||||
@@ -5177,8 +5202,11 @@ class AsyncTable:
|
||||
batch_size: Optional[int] = None,
|
||||
timeout: Optional[timedelta] = None,
|
||||
) -> pa.RecordBatchReader:
|
||||
if _should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
if (
|
||||
_should_push_down_query_table(
|
||||
self._namespace_client, self._pushdown_operations
|
||||
)
|
||||
and not self._route_pushdown_to_rust
|
||||
):
|
||||
from lancedb.namespace import _execute_server_side_query
|
||||
|
||||
|
||||
@@ -65,6 +65,9 @@ def _namespace_lance_table(namespace_client: _NamespaceClient) -> LanceTable:
|
||||
table._namespace_path = ["geneva"]
|
||||
table._namespace_client = namespace_client
|
||||
table._pushdown_operations = {"QueryTable"}
|
||||
# This test exercises the Python-side pushdown path (non-native client), so
|
||||
# pushdown is not routed to Rust.
|
||||
table._route_pushdown_to_rust = False
|
||||
return table
|
||||
|
||||
|
||||
@@ -805,6 +808,37 @@ class TestPushdownOperations:
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert len(db._namespace_client_pushdown_operations) == 0
|
||||
|
||||
def test_route_pushdown_to_rust_for_native_rest(self):
|
||||
"""A natively-built rest connection must defer QueryTable pushdown to
|
||||
Rust so reads carry the x-lancedb-min-timestamp read-freshness header."""
|
||||
db = lancedb.connect_namespace(
|
||||
"rest",
|
||||
{"uri": "http://localhost:12345"},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert db._route_pushdown_to_rust is True
|
||||
|
||||
def test_route_pushdown_to_rust_false_for_dir(self):
|
||||
"""A non-native (dir) connection keeps the Python pushdown path."""
|
||||
db = lancedb.connect_namespace("dir", {"root": self.temp_dir})
|
||||
assert db._route_pushdown_to_rust is False
|
||||
|
||||
def test_async_route_pushdown_to_rust_for_native_rest(self):
|
||||
"""The async connection must not silently bypass the read-freshness fix:
|
||||
a natively-built rest connection defers pushdown to Rust (regression test
|
||||
for the async path omitting the freshness header)."""
|
||||
db = lancedb.connect_namespace_async(
|
||||
"rest",
|
||||
{"uri": "http://localhost:12345"},
|
||||
namespace_client_pushdown_operations=["QueryTable"],
|
||||
)
|
||||
assert db._route_pushdown_to_rust is True
|
||||
|
||||
def test_async_route_pushdown_to_rust_false_for_dir(self):
|
||||
"""The async non-native (dir) connection keeps the Python pushdown path."""
|
||||
db = lancedb.connect_namespace_async("dir", {"root": self.temp_dir})
|
||||
assert db._route_pushdown_to_rust is False
|
||||
|
||||
def test_lance_table_to_arrow_uses_query_pushdown(self):
|
||||
namespace_client = _NamespaceClient()
|
||||
table = _namespace_lance_table(namespace_client)
|
||||
|
||||
Reference in New Issue
Block a user