feat: add maximum and minimum nprobes properties (#2430)

This exposes the maximum_nprobes and minimum_nprobes feature that was
added in https://github.com/lancedb/lance/pull/3903

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Added support for specifying minimum and maximum probe counts in
vector search queries, allowing finer control over search behavior.
- Users can now independently set minimum and maximum probes for vector
and hybrid queries via new methods and parameters in Python, Node.js,
and Rust APIs.

- **Bug Fixes**
- Improved parameter validation to ensure correct usage of minimum and
maximum probe values.

- **Tests**
- Expanded test coverage to validate correct handling, serialization,
and error cases for the new probe parameters.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
Weston Pace
2025-06-13 15:18:29 -07:00
committed by GitHub
parent fec8d58f06
commit 59b57e30ed
12 changed files with 505 additions and 32 deletions

View File

@@ -496,6 +496,8 @@ def test_query_sync_minimal():
"ef": None,
"vector": [1.0, 2.0, 3.0],
"nprobes": 20,
"minimum_nprobes": 20,
"maximum_nprobes": 20,
"version": None,
}
@@ -536,6 +538,8 @@ def test_query_sync_maximal():
"refine_factor": 10,
"vector": [1.0, 2.0, 3.0],
"nprobes": 5,
"minimum_nprobes": 5,
"maximum_nprobes": 5,
"lower_bound": None,
"upper_bound": None,
"ef": None,
@@ -564,6 +568,66 @@ def test_query_sync_maximal():
)
def test_query_sync_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
"vector_column": "vector2",
"refine_factor": None,
"lower_bound": None,
"upper_bound": None,
"ef": None,
"vector": [1.0, 2.0, 3.0],
"nprobes": 5,
"minimum_nprobes": 5,
"maximum_nprobes": 15,
"version": None,
}
return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
with query_test_table(handler) as table:
(
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
.minimum_nprobes(5)
.maximum_nprobes(15)
.to_list()
)
def test_query_sync_no_max_nprobes():
def handler(body):
assert body == {
"distance_type": "l2",
"k": 10,
"prefilter": True,
"fast_search": True,
"vector_column": "vector2",
"refine_factor": None,
"lower_bound": None,
"upper_bound": None,
"ef": None,
"vector": [1.0, 2.0, 3.0],
"nprobes": 5,
"minimum_nprobes": 5,
"maximum_nprobes": 0,
"version": None,
}
return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
with query_test_table(handler) as table:
(
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
.minimum_nprobes(5)
.maximum_nprobes(0)
.to_list()
)
@pytest.mark.parametrize("server_version", [Version("0.1.0"), Version("0.2.0")])
def test_query_sync_batch_queries(server_version):
def handler(body):
@@ -666,6 +730,8 @@ def test_query_sync_hybrid():
"refine_factor": None,
"vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
"nprobes": 20,
"minimum_nprobes": 20,
"maximum_nprobes": 20,
"lower_bound": None,
"upper_bound": None,
"ef": None,