mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 04:42:57 +00:00
feat: add maximum and minimum nprobes properties (#2430)
This exposes the maximum_nprobes and minimum_nprobes feature that was added in https://github.com/lancedb/lance/pull/3903 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Added support for specifying minimum and maximum probe counts in vector search queries, allowing finer control over search behavior. - Users can now independently set minimum and maximum probes for vector and hybrid queries via new methods and parameters in Python, Node.js, and Rust APIs. - **Bug Fixes** - Improved parameter validation to ensure correct usage of minimum and maximum probe values. - **Tests** - Expanded test coverage to validate correct handling, serialization, and error cases for the new probe parameters. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
@@ -496,6 +496,8 @@ def test_query_sync_minimal():
|
||||
"ef": None,
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
"version": None,
|
||||
}
|
||||
|
||||
@@ -536,6 +538,8 @@ def test_query_sync_maximal():
|
||||
"refine_factor": 10,
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"nprobes": 5,
|
||||
"minimum_nprobes": 5,
|
||||
"maximum_nprobes": 5,
|
||||
"lower_bound": None,
|
||||
"upper_bound": None,
|
||||
"ef": None,
|
||||
@@ -564,6 +568,66 @@ def test_query_sync_maximal():
|
||||
)
|
||||
|
||||
|
||||
def test_query_sync_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
"vector_column": "vector2",
|
||||
"refine_factor": None,
|
||||
"lower_bound": None,
|
||||
"upper_bound": None,
|
||||
"ef": None,
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"nprobes": 5,
|
||||
"minimum_nprobes": 5,
|
||||
"maximum_nprobes": 15,
|
||||
"version": None,
|
||||
}
|
||||
|
||||
return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
|
||||
|
||||
with query_test_table(handler) as table:
|
||||
(
|
||||
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
|
||||
.minimum_nprobes(5)
|
||||
.maximum_nprobes(15)
|
||||
.to_list()
|
||||
)
|
||||
|
||||
|
||||
def test_query_sync_no_max_nprobes():
|
||||
def handler(body):
|
||||
assert body == {
|
||||
"distance_type": "l2",
|
||||
"k": 10,
|
||||
"prefilter": True,
|
||||
"fast_search": True,
|
||||
"vector_column": "vector2",
|
||||
"refine_factor": None,
|
||||
"lower_bound": None,
|
||||
"upper_bound": None,
|
||||
"ef": None,
|
||||
"vector": [1.0, 2.0, 3.0],
|
||||
"nprobes": 5,
|
||||
"minimum_nprobes": 5,
|
||||
"maximum_nprobes": 0,
|
||||
"version": None,
|
||||
}
|
||||
|
||||
return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
|
||||
|
||||
with query_test_table(handler) as table:
|
||||
(
|
||||
table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
|
||||
.minimum_nprobes(5)
|
||||
.maximum_nprobes(0)
|
||||
.to_list()
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("server_version", [Version("0.1.0"), Version("0.2.0")])
|
||||
def test_query_sync_batch_queries(server_version):
|
||||
def handler(body):
|
||||
@@ -666,6 +730,8 @@ def test_query_sync_hybrid():
|
||||
"refine_factor": None,
|
||||
"vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
||||
"nprobes": 20,
|
||||
"minimum_nprobes": 20,
|
||||
"maximum_nprobes": 20,
|
||||
"lower_bound": None,
|
||||
"upper_bound": None,
|
||||
"ef": None,
|
||||
|
||||
Reference in New Issue
Block a user