diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 5bf01dad..9c6a6d06 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -402,6 +402,29 @@ describe("When creating an index", () => { expect(rst.numRows).toBe(1); }); + it("should be able to query unindexed data", async () => { + await tbl.createIndex("vec"); + await tbl.add([ + { + id: 300, + vec: Array(32) + .fill(1) + .map(() => Math.random()), + tags: [], + }, + ]); + + const plan1 = await tbl.query().nearestTo(queryVec).explainPlan(true); + expect(plan1).toMatch("LanceScan"); + + const plan2 = await tbl + .query() + .nearestTo(queryVec) + .fastSearch() + .explainPlan(true); + expect(plan2).not.toMatch("LanceScan"); + }); + it("should allow parameters to be specified", async () => { await tbl.createIndex("vec", { config: Index.ivfPq({ diff --git a/nodejs/lancedb/query.ts b/nodejs/lancedb/query.ts index 8c0f51cf..00e0b5fb 100644 --- a/nodejs/lancedb/query.ts +++ b/nodejs/lancedb/query.ts @@ -239,6 +239,17 @@ export class QueryBase return this; } + /** + * Skip searching un-indexed data. This can make search faster, but will miss + * any data that is not yet indexed. + * + * Use {@link lancedb.Table#optimize} to index all un-indexed data. + */ + fastSearch(): this { + this.doCall((inner: NativeQueryType) => inner.fastSearch()); + return this; + } + protected nativeExecute( options?: Partial, ): Promise { diff --git a/nodejs/src/query.rs b/nodejs/src/query.rs index d0132699..6ae95142 100644 --- a/nodejs/src/query.rs +++ b/nodejs/src/query.rs @@ -80,6 +80,11 @@ impl Query { Ok(VectorQuery { inner }) } + #[napi] + pub fn fast_search(&mut self) { + self.inner = self.inner.clone().fast_search(); + } + #[napi(catch_unwind)] pub async fn execute( &self, @@ -183,6 +188,11 @@ impl VectorQuery { self.inner = self.inner.clone().offset(offset as usize); } + #[napi] + pub fn fast_search(&mut self) { + self.inner = self.inner.clone().fast_search(); + } + #[napi(catch_unwind)] pub async fn execute( &self, diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index 1062289e..9e62101f 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -1315,6 +1315,20 @@ class AsyncQueryBase(object): self._inner.offset(offset) return self + def fast_search(self) -> AsyncQuery: + """ + Skip searching un-indexed data. + + This can make queries faster, but will miss any data that has not been + indexed. + + !!! tip + You can add new data into an existing index by calling + [AsyncTable.optimize][lancedb.table.AsyncTable.optimize]. + """ + self._inner.fast_search() + return self + async def to_batches( self, *, max_batch_length: Optional[int] = None ) -> AsyncRecordBatchReader: diff --git a/python/python/tests/test_query.py b/python/python/tests/test_query.py index 11750e4d..75733bf6 100644 --- a/python/python/tests/test_query.py +++ b/python/python/tests/test_query.py @@ -17,6 +17,7 @@ from typing import Optional import lance import lancedb +from lancedb.index import IvfPq import numpy as np import pandas.testing as tm import pyarrow as pa @@ -358,6 +359,25 @@ async def test_query_to_pandas_async(table_async: AsyncTable): assert df.shape == (0, 4) +@pytest.mark.asyncio +async def test_fast_search_async(tmp_path): + db = await lancedb.connect_async(tmp_path) + vectors = pa.FixedShapeTensorArray.from_numpy_ndarray( + np.random.rand(256, 32) + ).storage + table = await db.create_table("test", pa.table({"vector": vectors})) + await table.create_index( + "vector", config=IvfPq(num_partitions=1, num_sub_vectors=1) + ) + await table.add(pa.table({"vector": vectors})) + + q = [1.0] * 32 + plan = await table.query().nearest_to(q).explain_plan(True) + assert "LanceScan" in plan + plan = await table.query().nearest_to(q).fast_search().explain_plan(True) + assert "LanceScan" not in plan + + def test_explain_plan(table): q = LanceVectorQueryBuilder(table, [0, 0], "vector") plan = q.explain_plan(verbose=True) diff --git a/python/src/query.rs b/python/src/query.rs index 42bd4a13..b68e96b7 100644 --- a/python/src/query.rs +++ b/python/src/query.rs @@ -68,6 +68,10 @@ impl Query { self.inner = self.inner.clone().offset(offset as usize); } + pub fn fast_search(&mut self) { + self.inner = self.inner.clone().fast_search(); + } + pub fn nearest_to(&mut self, vector: Bound<'_, PyAny>) -> PyResult { let data: ArrayData = ArrayData::from_pyarrow_bound(&vector)?; let array = make_array(data); @@ -146,6 +150,10 @@ impl VectorQuery { self.inner = self.inner.clone().offset(offset as usize); } + pub fn fast_search(&mut self) { + self.inner = self.inner.clone().fast_search(); + } + pub fn column(&mut self, column: String) { self.inner = self.inner.clone().column(&column); }