feat: add maximum and minimum nprobes properties (#2430)

This exposes the maximum_nprobes and minimum_nprobes feature that was added in https://github.com/lancedb/lance/pull/3903  ## Summary by CodeRabbit - **New Features** - Added support for specifying minimum and maximum probe counts in vector search queries, allowing finer control over search behavior. - Users can now independently set minimum and maximum probes for vector and hybrid queries via new methods and parameters in Python, Node.js, and Rust APIs. - **Bug Fixes** - Improved parameter validation to ensure correct usage of minimum and maximum probe values. - **Tests** - Expanded test coverage to validate correct handling, serialization, and error cases for the new probe parameters.
2026-01-08 21:02:58 +00:00 · 2025-06-13 15:18:29 -07:00
parent fec8d58f06
commit 59b57e30ed
12 changed files with 505 additions and 32 deletions
--- a/nodejs/test/table.test.ts
+++ b/nodejs/test/table.test.ts
@@ -559,6 +559,32 @@ describe("When creating an index", () => {
    rst = await tbl.query().limit(2).offset(1).nearestTo(queryVec).toArrow();
    expect(rst.numRows).toBe(1);

+    // test nprobes
+    rst = await tbl.query().nearestTo(queryVec).limit(2).nprobes(50).toArrow();
+    expect(rst.numRows).toBe(2);
+    rst = await tbl
+      .query()
+      .nearestTo(queryVec)
+      .limit(2)
+      .minimumNprobes(15)
+      .toArrow();
+    expect(rst.numRows).toBe(2);
+    rst = await tbl
+      .query()
+      .nearestTo(queryVec)
+      .limit(2)
+      .minimumNprobes(10)
+      .maximumNprobes(20)
+      .toArrow();
+    expect(rst.numRows).toBe(2);
+
+    expect(() => tbl.query().nearestTo(queryVec).minimumNprobes(0)).toThrow(
+      "Invalid input, minimum_nprobes must be greater than 0",
+    );
+    expect(() => tbl.query().nearestTo(queryVec).maximumNprobes(5)).toThrow(
+      "Invalid input, maximum_nprobes must be greater than minimum_nprobes",
+    );
+
    await tbl.dropIndex("vec_idx");
    const indices2 = await tbl.listIndices();
    expect(indices2.length).toBe(0);
--- a/nodejs/lancedb/query.ts
+++ b/nodejs/lancedb/query.ts
@@ -448,6 +448,10 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
   * For best results we recommend tuning this parameter with a benchmark against
   * your actual data to find the smallest possible value that will still give
   * you the desired recall.
+   *
+   * For more fine grained control over behavior when you have a very narrow filter
+   * you can use `minimumNprobes` and `maximumNprobes`.  This method sets both
+   * the minimum and maximum to the same value.
   */
  nprobes(nprobes: number): VectorQuery {
    super.doCall((inner) => inner.nprobes(nprobes));
@@ -455,6 +459,33 @@ export class VectorQuery extends QueryBase<NativeVectorQuery> {
    return this;
  }

+  /**
+   * Set the minimum number of probes used.
+   *
+   * This controls the minimum number of partitions that will be searched.  This
+   * parameter will impact every query against a vector index, regardless of the
+   * filter.  See `nprobes` for more details.  Higher values will increase recall
+   * but will also increase latency.
+   */
+  minimumNprobes(minimumNprobes: number): VectorQuery {
+    super.doCall((inner) => inner.minimumNprobes(minimumNprobes));
+    return this;
+  }
+
+  /**
+   * Set the maximum number of probes used.
+   *
+   * This controls the maximum number of partitions that will be searched.  If this
+   * number is greater than minimumNprobes then the excess partitions will _only_ be
+   * searched if we have not found enough results.  This can be useful when there is
+   * a narrow filter to allow these queries to spend more time searching and avoid
+   * potential false negatives.
+   */
+  maximumNprobes(maximumNprobes: number): VectorQuery {
+    super.doCall((inner) => inner.maximumNprobes(maximumNprobes));
+    return this;
+  }
+
  /*
   * Set the distance range to use
   *
--- a/nodejs/src/query.rs
+++ b/nodejs/src/query.rs
@@ -178,6 +178,31 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    #[napi]
+    pub fn minimum_nprobes(&mut self, minimum_nprobe: u32) -> napi::Result<()> {
+        self.inner = self
+            .inner
+            .clone()
+            .minimum_nprobes(minimum_nprobe as usize)
+            .default_error()?;
+        Ok(())
+    }
+
+    #[napi]
+    pub fn maximum_nprobes(&mut self, maximum_nprobes: u32) -> napi::Result<()> {
+        let maximum_nprobes = if maximum_nprobes == 0 {
+            None
+        } else {
+            Some(maximum_nprobes as usize)
+        };
+        self.inner = self
+            .inner
+            .clone()
+            .maximum_nprobes(maximum_nprobes)
+            .default_error()?;
+        Ok(())
+    }
+
    #[napi]
    pub fn distance_range(&mut self, lower_bound: Option<f64>, upper_bound: Option<f64>) {
        // napi doesn't support f32, so we have to convert to f32
--- a/python/python/lancedb/_lancedb.pyi
+++ b/python/python/lancedb/_lancedb.pyi
@@ -143,6 +143,8 @@ class VectorQuery:
    def postfilter(self): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
+    def minimum_nprobes(self, minimum_nprobes: int): ...
+    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def nearest_to_text(self, query: dict) -> HybridQuery: ...
    def to_query_request(self) -> PyQueryRequest: ...
@@ -158,6 +160,8 @@ class HybridQuery:
    def distance_type(self, distance_type: str): ...
    def refine_factor(self, refine_factor: int): ...
    def nprobes(self, nprobes: int): ...
+    def minimum_nprobes(self, minimum_nprobes: int): ...
+    def maximum_nprobes(self, maximum_nprobes: int): ...
    def bypass_vector_index(self): ...
    def to_vector_query(self) -> VectorQuery: ...
    def to_fts_query(self) -> FTSQuery: ...
@@ -178,7 +182,8 @@ class PyQueryRequest:
    with_row_id: Optional[bool]
    column: Optional[str]
    query_vector: Optional[List[pa.Array]]
-    nprobes: Optional[int]
+    minimum_nprobes: Optional[int]
+    maximum_nprobes: Optional[int]
    lower_bound: Optional[float]
    upper_bound: Optional[float]
    ef: Optional[int]
--- a/python/python/lancedb/query.py
+++ b/python/python/lancedb/query.py
@@ -437,8 +437,18 @@ class Query(pydantic.BaseModel):
    # which columns to return in the results
    columns: Optional[Union[List[str], Dict[str, str]]] = None

-    # number of IVF partitions to search
-    nprobes: Optional[int] = None
+    # minimum number of IVF partitions to search
+    #
+    # If None then a default value (20) will be used.
+    minimum_nprobes: Optional[int] = None
+
+    # maximum number of IVF partitions to search
+    #
+    # If None then a default value (20) will be used.
+    #
+    # If 0 then no limit will be applied and all partitions could be searched
+    # if needed to satisfy the limit.
+    maximum_nprobes: Optional[int] = None

    # lower bound for distance search
    lower_bound: Optional[float] = None
@@ -476,7 +486,8 @@ class Query(pydantic.BaseModel):
        query.vector_column = req.column
        query.vector = req.query_vector
        query.distance_type = req.distance_type
-        query.nprobes = req.nprobes
+        query.minimum_nprobes = req.minimum_nprobes
+        query.maximum_nprobes = req.maximum_nprobes
        query.lower_bound = req.lower_bound
        query.upper_bound = req.upper_bound
        query.ef = req.ef
@@ -1037,7 +1048,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        super().__init__(table)
        self._query = query
        self._distance_type = None
-        self._nprobes = None
+        self._minimum_nprobes = None
+        self._maximum_nprobes = None
        self._lower_bound = None
        self._upper_bound = None
        self._refine_factor = None
@@ -1100,6 +1112,10 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        See discussion in [Querying an ANN Index][querying-an-ann-index] for
        tuning advice.

+        This method sets both the minimum and maximum number of probes to the same
+        value. See `minimum_nprobes` and `maximum_nprobes` for more fine-grained
+        control.
+
        Parameters
        ----------
        nprobes: int
@@ -1110,7 +1126,36 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
        LanceVectorQueryBuilder
            The LanceQueryBuilder object.
        """
-        self._nprobes = nprobes
+        self._minimum_nprobes = nprobes
+        self._maximum_nprobes = nprobes
+        return self
+
+    def minimum_nprobes(self, minimum_nprobes: int) -> LanceVectorQueryBuilder:
+        """Set the minimum number of probes to use.
+
+        See `nprobes` for more details.
+
+        These partitions will be searched on every vector query and will increase recall
+        at the expense of latency.
+        """
+        self._minimum_nprobes = minimum_nprobes
+        return self
+
+    def maximum_nprobes(self, maximum_nprobes: int) -> LanceVectorQueryBuilder:
+        """Set the maximum number of probes to use.
+
+        See `nprobes` for more details.
+
+        If this value is greater than `minimum_nprobes` then the excess partitions
+        will be searched only if we have not found enough results.
+
+        This can be useful when there is a narrow filter to allow these queries to
+        spend more time searching and avoid potential false negatives.
+
+        If this value is 0 then no limit will be applied and all partitions could be
+        searched if needed to satisfy the limit.
+        """
+        self._maximum_nprobes = maximum_nprobes
        return self

    def distance_range(
@@ -1214,7 +1259,8 @@ class LanceVectorQueryBuilder(LanceQueryBuilder):
            limit=self._limit,
            distance_type=self._distance_type,
            columns=self._columns,
-            nprobes=self._nprobes,
+            minimum_nprobes=self._minimum_nprobes,
+            maximum_nprobes=self._maximum_nprobes,
            lower_bound=self._lower_bound,
            upper_bound=self._upper_bound,
            refine_factor=self._refine_factor,
@@ -1578,7 +1624,8 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        self._fts_columns = fts_columns
        self._norm = None
        self._reranker = None
-        self._nprobes = None
+        self._minimum_nprobes = None
+        self._maximum_nprobes = None
        self._refine_factor = None
        self._distance_type = None
        self._phrase_query = None
@@ -1810,7 +1857,24 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
        LanceHybridQueryBuilder
            The LanceHybridQueryBuilder object.
        """
-        self._nprobes = nprobes
+        self._minimum_nprobes = nprobes
+        self._maximum_nprobes = nprobes
+        return self
+
+    def minimum_nprobes(self, minimum_nprobes: int) -> LanceHybridQueryBuilder:
+        """Set the minimum number of probes to use.
+
+        See `nprobes` for more details.
+        """
+        self._minimum_nprobes = minimum_nprobes
+        return self
+
+    def maximum_nprobes(self, maximum_nprobes: int) -> LanceHybridQueryBuilder:
+        """Set the maximum number of probes to use.
+
+        See `nprobes` for more details.
+        """
+        self._maximum_nprobes = maximum_nprobes
        return self

    def distance_range(
@@ -2039,8 +2103,10 @@ class LanceHybridQueryBuilder(LanceQueryBuilder):
            self._fts_query.phrase_query(True)
        if self._distance_type:
            self._vector_query.metric(self._distance_type)
-        if self._nprobes:
-            self._vector_query.nprobes(self._nprobes)
+        if self._minimum_nprobes:
+            self._vector_query.minimum_nprobes(self._minimum_nprobes)
+        if self._maximum_nprobes is not None:
+            self._vector_query.maximum_nprobes(self._maximum_nprobes)
        if self._refine_factor:
            self._vector_query.refine_factor(self._refine_factor)
        if self._ef:
@@ -2651,6 +2717,34 @@ class AsyncVectorQueryBase:
        self._inner.nprobes(nprobes)
        return self

+    def minimum_nprobes(self, minimum_nprobes: int) -> Self:
+        """Set the minimum number of probes to use.
+
+        See `nprobes` for more details.
+
+        These partitions will be searched on every indexed vector query and will
+        increase recall at the expense of latency.
+        """
+        self._inner.minimum_nprobes(minimum_nprobes)
+        return self
+
+    def maximum_nprobes(self, maximum_nprobes: int) -> Self:
+        """Set the maximum number of probes to use.
+
+        See `nprobes` for more details.
+
+        If this value is greater than `minimum_nprobes` then the excess partitions
+        will be searched only if we have not found enough results.
+
+        This can be useful when there is a narrow filter to allow these queries to
+        spend more time searching and avoid potential false negatives.
+
+        If this value is 0 then no limit will be applied and all partitions could be
+        searched if needed to satisfy the limit.
+        """
+        self._inner.maximum_nprobes(maximum_nprobes)
+        return self
+
    def distance_range(
        self, lower_bound: Optional[float] = None, upper_bound: Optional[float] = None
    ) -> Self:
--- a/python/python/lancedb/table.py
+++ b/python/python/lancedb/table.py
@@ -3637,8 +3637,10 @@ class AsyncTable:
            )
            if query.distance_type is not None:
                async_query = async_query.distance_type(query.distance_type)
-            if query.nprobes is not None:
-                async_query = async_query.nprobes(query.nprobes)
+            if query.minimum_nprobes is not None:
+                async_query = async_query.minimum_nprobes(query.minimum_nprobes)
+            if query.maximum_nprobes is not None:
+                async_query = async_query.maximum_nprobes(query.maximum_nprobes)
            if query.refine_factor is not None:
                async_query = async_query.refine_factor(query.refine_factor)
            if query.vector_column:
--- a/python/python/tests/test_query.py
+++ b/python/python/tests/test_query.py
@@ -439,6 +439,33 @@ def test_query_builder_with_filter(table):
    assert all(np.array(rs[0]["vector"]) == [3, 4])


+def test_invalid_nprobes_sync(table):
+    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
+        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(0).to_list()
+    with pytest.raises(
+        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+    ):
+        LanceVectorQueryBuilder(table, [0, 0], "vector").maximum_nprobes(5).to_list()
+    with pytest.raises(
+        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+    ):
+        LanceVectorQueryBuilder(table, [0, 0], "vector").minimum_nprobes(100).to_list()
+
+
+@pytest.mark.asyncio
+async def test_invalid_nprobes_async(table_async: AsyncTable):
+    with pytest.raises(ValueError, match="minimum_nprobes must be greater than 0"):
+        await table_async.vector_search([0, 0]).minimum_nprobes(0).to_list()
+    with pytest.raises(
+        ValueError, match="maximum_nprobes must be greater than minimum_nprobes"
+    ):
+        await table_async.vector_search([0, 0]).maximum_nprobes(5).to_list()
+    with pytest.raises(
+        ValueError, match="minimum_nprobes must be less or equal to maximum_nprobes"
+    ):
+        await table_async.vector_search([0, 0]).minimum_nprobes(100).to_list()
+
+
 def test_query_builder_with_prefilter(table):
    df = (
        LanceVectorQueryBuilder(table, [0, 0], "vector")
@@ -585,6 +612,21 @@ async def test_query_async(table_async: AsyncTable):
        table_async.query().nearest_to(pa.array([1, 2])).nprobes(10),
        expected_num_rows=2,
    )
+    await check_query(
+        table_async.query().nearest_to(pa.array([1, 2])).minimum_nprobes(10),
+        expected_num_rows=2,
+    )
+    await check_query(
+        table_async.query().nearest_to(pa.array([1, 2])).maximum_nprobes(30),
+        expected_num_rows=2,
+    )
+    await check_query(
+        table_async.query()
+        .nearest_to(pa.array([1, 2]))
+        .minimum_nprobes(10)
+        .maximum_nprobes(20),
+        expected_num_rows=2,
+    )
    await check_query(
        table_async.query().nearest_to(pa.array([1, 2])).bypass_vector_index(),
        expected_num_rows=2,
@@ -911,7 +953,39 @@ def test_query_serialization_sync(table: lancedb.table.Table):

    q = table.search([5.0, 6.0]).nprobes(10).refine_factor(5).to_query_object()
    check_set_props(
-        q, vector_column="vector", vector=[5.0, 6.0], nprobes=10, refine_factor=5
+        q,
+        vector_column="vector",
+        vector=[5.0, 6.0],
+        minimum_nprobes=10,
+        maximum_nprobes=10,
+        refine_factor=5,
+    )
+
+    q = table.search([5.0, 6.0]).minimum_nprobes(10).to_query_object()
+    check_set_props(
+        q,
+        vector_column="vector",
+        vector=[5.0, 6.0],
+        minimum_nprobes=10,
+        maximum_nprobes=None,
+    )
+
+    q = table.search([5.0, 6.0]).nprobes(50).to_query_object()
+    check_set_props(
+        q,
+        vector_column="vector",
+        vector=[5.0, 6.0],
+        minimum_nprobes=50,
+        maximum_nprobes=50,
+    )
+
+    q = table.search([5.0, 6.0]).maximum_nprobes(10).to_query_object()
+    check_set_props(
+        q,
+        vector_column="vector",
+        vector=[5.0, 6.0],
+        maximum_nprobes=10,
+        minimum_nprobes=None,
    )

    q = table.search([5.0, 6.0]).distance_range(0.0, 1.0).to_query_object()
@@ -963,7 +1037,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        limit=10,
        vector=sample_vector,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
    )
@@ -973,7 +1048,20 @@ async def test_query_serialization_async(table_async: AsyncTable):
        q,
        vector=sample_vector,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
+        with_row_id=False,
+        bypass_vector_index=False,
+        limit=10,
+    )
+
+    q = (await table_async.search([5.0, 6.0])).nprobes(50).to_query_object()
+    check_set_props(
+        q,
+        vector=sample_vector,
+        postfilter=False,
+        minimum_nprobes=50,
+        maximum_nprobes=50,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -992,7 +1080,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        filter="id = 1",
        postfilter=True,
        vector=sample_vector,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
    )
@@ -1006,7 +1095,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
    check_set_props(
        q,
        vector=sample_vector,
-        nprobes=10,
+        minimum_nprobes=10,
+        maximum_nprobes=10,
        refine_factor=5,
        postfilter=False,
        with_row_id=False,
@@ -1014,6 +1104,18 @@ async def test_query_serialization_async(table_async: AsyncTable):
        limit=10,
    )

+    q = (await table_async.search([5.0, 6.0])).minimum_nprobes(5).to_query_object()
+    check_set_props(
+        q,
+        vector=sample_vector,
+        minimum_nprobes=5,
+        maximum_nprobes=20,
+        postfilter=False,
+        with_row_id=False,
+        bypass_vector_index=False,
+        limit=10,
+    )
+
    q = (
        (await table_async.search([5.0, 6.0]))
        .distance_range(0.0, 1.0)
@@ -1025,7 +1127,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        lower_bound=0.0,
        upper_bound=1.0,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1037,7 +1140,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        distance_type="cosine",
        vector=sample_vector,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1049,7 +1153,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        ef=7,
        vector=sample_vector,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        bypass_vector_index=False,
        limit=10,
@@ -1061,7 +1166,8 @@ async def test_query_serialization_async(table_async: AsyncTable):
        bypass_vector_index=True,
        vector=sample_vector,
        postfilter=False,
-        nprobes=20,
+        minimum_nprobes=20,
+        maximum_nprobes=20,
        with_row_id=False,
        limit=10,
    )
--- a/python/python/tests/test_remote_db.py
+++ b/python/python/tests/test_remote_db.py
@@ -496,6 +496,8 @@ def test_query_sync_minimal():
            "ef": None,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 20,
+            "minimum_nprobes": 20,
+            "maximum_nprobes": 20,
            "version": None,
        }

@@ -536,6 +538,8 @@ def test_query_sync_maximal():
            "refine_factor": 10,
            "vector": [1.0, 2.0, 3.0],
            "nprobes": 5,
+            "minimum_nprobes": 5,
+            "maximum_nprobes": 5,
            "lower_bound": None,
            "upper_bound": None,
            "ef": None,
@@ -564,6 +568,66 @@ def test_query_sync_maximal():
        )


+def test_query_sync_nprobes():
+    def handler(body):
+        assert body == {
+            "distance_type": "l2",
+            "k": 10,
+            "prefilter": True,
+            "fast_search": True,
+            "vector_column": "vector2",
+            "refine_factor": None,
+            "lower_bound": None,
+            "upper_bound": None,
+            "ef": None,
+            "vector": [1.0, 2.0, 3.0],
+            "nprobes": 5,
+            "minimum_nprobes": 5,
+            "maximum_nprobes": 15,
+            "version": None,
+        }
+
+        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
+
+    with query_test_table(handler) as table:
+        (
+            table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
+            .minimum_nprobes(5)
+            .maximum_nprobes(15)
+            .to_list()
+        )
+
+
+def test_query_sync_no_max_nprobes():
+    def handler(body):
+        assert body == {
+            "distance_type": "l2",
+            "k": 10,
+            "prefilter": True,
+            "fast_search": True,
+            "vector_column": "vector2",
+            "refine_factor": None,
+            "lower_bound": None,
+            "upper_bound": None,
+            "ef": None,
+            "vector": [1.0, 2.0, 3.0],
+            "nprobes": 5,
+            "minimum_nprobes": 5,
+            "maximum_nprobes": 0,
+            "version": None,
+        }
+
+        return pa.table({"id": [1, 2, 3], "name": ["a", "b", "c"]})
+
+    with query_test_table(handler) as table:
+        (
+            table.search([1, 2, 3], vector_column_name="vector2", fast_search=True)
+            .minimum_nprobes(5)
+            .maximum_nprobes(0)
+            .to_list()
+        )
+
+
@pytest.mark.parametrize("server_version", [Version("0.1.0"), Version("0.2.0")])
 def test_query_sync_batch_queries(server_version):
    def handler(body):
@@ -666,6 +730,8 @@ def test_query_sync_hybrid():
                "refine_factor": None,
                "vector": [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                "nprobes": 20,
+                "minimum_nprobes": 20,
+                "maximum_nprobes": 20,
                "lower_bound": None,
                "upper_bound": None,
                "ef": None,
--- a/python/src/query.rs
+++ b/python/src/query.rs
@@ -235,7 +235,10 @@ pub struct PyQueryRequest {
    pub with_row_id: Option<bool>,
    pub column: Option<String>,
    pub query_vector: Option<PyQueryVectors>,
-    pub nprobes: Option<usize>,
+    pub minimum_nprobes: Option<usize>,
+    // None means user did not set it and default shoud be used (currenty 20)
+    // Some(0) means user set it to None and there is no limit
+    pub maximum_nprobes: Option<usize>,
    pub lower_bound: Option<f32>,
    pub upper_bound: Option<f32>,
    pub ef: Option<usize>,
@@ -261,7 +264,8 @@ impl From<AnyQuery> for PyQueryRequest {
                with_row_id: Some(query_request.with_row_id),
                column: None,
                query_vector: None,
-                nprobes: None,
+                minimum_nprobes: None,
+                maximum_nprobes: None,
                lower_bound: None,
                upper_bound: None,
                ef: None,
@@ -281,7 +285,11 @@ impl From<AnyQuery> for PyQueryRequest {
                with_row_id: Some(vector_query.base.with_row_id),
                column: vector_query.column,
                query_vector: Some(PyQueryVectors(vector_query.query_vector)),
-                nprobes: Some(vector_query.nprobes),
+                minimum_nprobes: Some(vector_query.minimum_nprobes),
+                maximum_nprobes: match vector_query.maximum_nprobes {
+                    None => Some(0),
+                    Some(value) => Some(value),
+                },
                lower_bound: vector_query.lower_bound,
                upper_bound: vector_query.upper_bound,
                ef: vector_query.ef,
@@ -655,6 +663,29 @@ impl VectorQuery {
        self.inner = self.inner.clone().nprobes(nprobe as usize);
    }

+    pub fn minimum_nprobes(&mut self, minimum_nprobes: u32) -> PyResult<()> {
+        self.inner = self
+            .inner
+            .clone()
+            .minimum_nprobes(minimum_nprobes as usize)
+            .infer_error()?;
+        Ok(())
+    }
+
+    pub fn maximum_nprobes(&mut self, maximum_nprobes: u32) -> PyResult<()> {
+        let maximum_nprobes = if maximum_nprobes == 0 {
+            None
+        } else {
+            Some(maximum_nprobes as usize)
+        };
+        self.inner = self
+            .inner
+            .clone()
+            .maximum_nprobes(maximum_nprobes)
+            .infer_error()?;
+        Ok(())
+    }
+
    #[pyo3(signature = (lower_bound=None, upper_bound=None))]
    pub fn distance_range(&mut self, lower_bound: Option<f32>, upper_bound: Option<f32>) {
        self.inner = self.inner.clone().distance_range(lower_bound, upper_bound);
--- a/rust/lancedb/src/query.rs
+++ b/rust/lancedb/src/query.rs
@@ -796,8 +796,10 @@ pub struct VectorQueryRequest {
    pub column: Option<String>,
    /// The vector(s) to search for
    pub query_vector: Vec<Arc<dyn Array>>,
-    /// The number of partitions to search
-    pub nprobes: usize,
+    /// The minimum number of partitions to search
+    pub minimum_nprobes: usize,
+    /// The maximum number of partitions to search
+    pub maximum_nprobes: Option<usize>,
    /// The lower bound (inclusive) of the distance to search for.
    pub lower_bound: Option<f32>,
    /// The upper bound (exclusive) of the distance to search for.
@@ -819,7 +821,8 @@ impl Default for VectorQueryRequest {
            base: QueryRequest::default(),
            column: None,
            query_vector: Vec::new(),
-            nprobes: 20,
+            minimum_nprobes: 20,
+            maximum_nprobes: Some(20),
            lower_bound: None,
            upper_bound: None,
            ef: None,
@@ -925,11 +928,75 @@ impl VectorQuery {
    /// For best results we recommend tuning this parameter with a benchmark against
    /// your actual data to find the smallest possible value that will still give
    /// you the desired recall.
+    ///
+    /// This method sets both the minimum and maximum number of partitions to search.
+    /// For more fine-grained control see [`VectorQuery::minimum_nprobes`] and
+    /// [`VectorQuery::maximum_nprobes`].
    pub fn nprobes(mut self, nprobes: usize) -> Self {
-        self.request.nprobes = nprobes;
+        self.request.minimum_nprobes = nprobes;
+        self.request.maximum_nprobes = Some(nprobes);
        self
    }

+    /// Set the minimum number of partitions to search
+    ///
+    /// This argument is only used when the vector column has an IVF PQ index.
+    /// If there is no index then this value is ignored.
+    ///
+    /// See [`VectorQuery::nprobes`] for more details.
+    ///
+    /// These partitions will be searched on every indexed vector query.
+    ///
+    /// Will return an error if the value is not greater than 0 or if maximum_nprobes
+    /// has been set and is less than the minimum_nprobes.
+    pub fn minimum_nprobes(mut self, minimum_nprobes: usize) -> Result<Self> {
+        if minimum_nprobes == 0 {
+            return Err(Error::InvalidInput {
+                message: "minimum_nprobes must be greater than 0".to_string(),
+            });
+        }
+        if let Some(maximum_nprobes) = self.request.maximum_nprobes {
+            if minimum_nprobes > maximum_nprobes {
+                return Err(Error::InvalidInput {
+                    message: "minimum_nprobes must be less or equal to maximum_nprobes".to_string(),
+                });
+            }
+        }
+        self.request.minimum_nprobes = minimum_nprobes;
+        Ok(self)
+    }
+
+    /// Set the maximum number of partitions to search
+    ///
+    /// This argument is only used when the vector column has an IVF PQ index.
+    /// If there is no index then this value is ignored.
+    ///
+    /// See [`VectorQuery::nprobes`] for more details.
+    ///
+    /// If this value is greater than minimum_nprobes then the excess partitions will
+    /// only be searched if the initial search does not return enough results.
+    ///
+    /// This can be useful when there is a narrow filter to allow these queries to
+    /// spend more time searching and avoid potential false negatives.
+    ///
+    /// Set to None to search all partitions, if needed, to satsify the limit
+    pub fn maximum_nprobes(mut self, maximum_nprobes: Option<usize>) -> Result<Self> {
+        if let Some(maximum_nprobes) = maximum_nprobes {
+            if maximum_nprobes == 0 {
+                return Err(Error::InvalidInput {
+                    message: "maximum_nprobes must be greater than 0".to_string(),
+                });
+            }
+            if maximum_nprobes < self.request.minimum_nprobes {
+                return Err(Error::InvalidInput {
+                    message: "maximum_nprobes must be greater than minimum_nprobes".to_string(),
+                });
+            }
+        }
+        self.request.maximum_nprobes = maximum_nprobes;
+        Ok(self)
+    }
+
    /// Set the distance range for vector search,
    /// only rows with distances in the range [lower_bound, upper_bound) will be returned
    pub fn distance_range(mut self, lower_bound: Option<f32>, upper_bound: Option<f32>) -> Self {
@@ -1208,7 +1275,8 @@ mod tests {
        );
        assert_eq!(query.request.base.limit.unwrap(), 100);
        assert_eq!(query.request.base.offset.unwrap(), 1);
-        assert_eq!(query.request.nprobes, 1000);
+        assert_eq!(query.request.minimum_nprobes, 1000);
+        assert_eq!(query.request.maximum_nprobes, Some(1000));
        assert!(query.request.use_index);
        assert_eq!(query.request.distance_type, Some(DistanceType::Cosine));
        assert_eq!(query.request.refine_factor, Some(999));
--- a/rust/lancedb/src/remote/table.rs
+++ b/rust/lancedb/src/remote/table.rs
@@ -32,6 +32,7 @@ use lance::dataset::{ColumnAlteration, NewColumnTransform, Version};
 use lance_datafusion::exec::{execute_plan, OneShotExec};
 use reqwest::{RequestBuilder, Response};
 use serde::{Deserialize, Serialize};
+use serde_json::Number;
 use std::collections::HashMap;
 use std::io::Cursor;
 use std::pin::Pin;
@@ -438,7 +439,18 @@ impl<S: HttpSend> RemoteTable<S> {

        // Apply general parameters, before we dispatch based on number of query vectors.
        body["distance_type"] = serde_json::json!(query.distance_type.unwrap_or_default());
-        body["nprobes"] = query.nprobes.into();
+        // In 0.23.1 we migrated from `nprobes` to `minimum_nprobes` and `maximum_nprobes`.
+        // Old client / new server: since minimum_nprobes is missing, fallback to nprobes
+        // New client / old server: old server will only see nprobes, make sure to set both
+        //                          nprobes and minimum_nprobes
+        // New client / new server: since minimum_nprobes is present, server can ignore nprobes
+        body["nprobes"] = query.minimum_nprobes.into();
+        body["minimum_nprobes"] = query.minimum_nprobes.into();
+        if let Some(maximum_nprobes) = query.maximum_nprobes {
+            body["maximum_nprobes"] = maximum_nprobes.into();
+        } else {
+            body["maximum_nprobes"] = serde_json::Value::Number(Number::from_u128(0).unwrap())
+        }
        body["lower_bound"] = query.lower_bound.into();
        body["upper_bound"] = query.upper_bound.into();
        body["ef"] = query.ef.into();
@@ -2075,6 +2087,8 @@ mod tests {
                "prefilter": true,
                "distance_type": "l2",
                "nprobes": 20,
+                "minimum_nprobes": 20,
+                "maximum_nprobes": 20,
                "lower_bound": Option::<f32>::None,
                "upper_bound": Option::<f32>::None,
                "k": 10,
@@ -2175,6 +2189,8 @@ mod tests {
                "bypass_vector_index": true,
                "columns": ["a", "b"],
                "nprobes": 12,
+                "minimum_nprobes": 12,
+                "maximum_nprobes": 12,
                "lower_bound": Option::<f32>::None,
                "upper_bound": Option::<f32>::None,
                "ef": Option::<usize>::None,
--- a/rust/lancedb/src/table.rs
+++ b/rust/lancedb/src/table.rs
@@ -2354,12 +2354,15 @@ impl BaseTable for NativeTable {
                    query.base.limit.unwrap_or(DEFAULT_TOP_K),
                )?;
            }
+            scanner.minimum_nprobes(query.minimum_nprobes);
+            if let Some(maximum_nprobes) = query.maximum_nprobes {
+                scanner.maximum_nprobes(maximum_nprobes);
+            }
        }
        scanner.limit(
            query.base.limit.map(|limit| limit as i64),
            query.base.offset.map(|offset| offset as i64),
        )?;
-        scanner.nprobs(query.nprobes);
        if let Some(ef) = query.ef {
            scanner.ef(ef);
        }