diff --git a/python/python/lancedb/namespace.py b/python/python/lancedb/namespace.py index cb2d9b5a7..8825c1d61 100644 --- a/python/python/lancedb/namespace.py +++ b/python/python/lancedb/namespace.py @@ -71,6 +71,9 @@ from lancedb.embeddings import EmbeddingFunctionConfig from ._lancedb import Session +_MAX_QUERY_K = 2**31 - 1 + + def _query_to_namespace_request( table_id: List[str], query: "Query", @@ -148,7 +151,8 @@ def _query_to_namespace_request( if query.limit is not None: k = query.limit elif query.vector is None and query.full_text_query is None: - k = sys.maxsize + # limit k to max i32 value to avoid client overflows + k = _MAX_QUERY_K else: k = 10 diff --git a/python/python/tests/test_namespace.py b/python/python/tests/test_namespace.py index cd2b3ee44..e944df7bd 100644 --- a/python/python/tests/test_namespace.py +++ b/python/python/tests/test_namespace.py @@ -5,11 +5,11 @@ import tempfile import shutil -import sys import pytest import pyarrow as pa import lancedb from lance_namespace.errors import NamespaceNotEmptyError, TableNotFoundError +from lancedb.namespace import _MAX_QUERY_K from lancedb.table import AsyncTable, LanceTable @@ -816,10 +816,13 @@ class TestPushdownOperations: ["geneva", "hist"], ["geneva", "hist"], ] + # Unlimited reads cap k at i32::MAX (the namespace query_table `k` + # field is i32); sys.maxsize would overflow the Rust binding. assert [request.k for request in namespace_client.requests] == [ - sys.maxsize, - sys.maxsize, + _MAX_QUERY_K, + _MAX_QUERY_K, ] + assert all(r.k <= 2**31 - 1 for r in namespace_client.requests) @pytest.mark.asyncio @@ -874,10 +877,13 @@ class TestAsyncPushdownOperations: ["geneva", "hist"], ["geneva", "hist"], ] + # Unlimited reads cap k at i32::MAX (the namespace query_table `k` + # field is i32); sys.maxsize would overflow the Rust binding. assert [request.k for request in namespace_client.requests] == [ - sys.maxsize, - sys.maxsize, + _MAX_QUERY_K, + _MAX_QUERY_K, ] + assert all(r.k <= 2**31 - 1 for r in namespace_client.requests) def test_local_table_to_arrow_and_to_pandas_are_unchanged(tmp_path):