feat: fast_search in Python and Node (#1623)

Sometimes it is acceptable to users to only search indexed data and skip
and new un-indexed data. For example, if un-indexed data will be shortly
indexed and they don't mind the delay. In these cases, we can save a lot
of CPU time in search, and provide better latency. Users can activate
this on queries using `fast_search()`.
This commit is contained in:
Will Jones
2024-11-01 09:29:09 -07:00
committed by GitHub
parent f3fc339ef6
commit 96181ab421
6 changed files with 86 additions and 0 deletions

View File

@@ -17,6 +17,7 @@ from typing import Optional
import lance
import lancedb
from lancedb.index import IvfPq
import numpy as np
import pandas.testing as tm
import pyarrow as pa
@@ -358,6 +359,25 @@ async def test_query_to_pandas_async(table_async: AsyncTable):
assert df.shape == (0, 4)
@pytest.mark.asyncio
async def test_fast_search_async(tmp_path):
db = await lancedb.connect_async(tmp_path)
vectors = pa.FixedShapeTensorArray.from_numpy_ndarray(
np.random.rand(256, 32)
).storage
table = await db.create_table("test", pa.table({"vector": vectors}))
await table.create_index(
"vector", config=IvfPq(num_partitions=1, num_sub_vectors=1)
)
await table.add(pa.table({"vector": vectors}))
q = [1.0] * 32
plan = await table.query().nearest_to(q).explain_plan(True)
assert "LanceScan" in plan
plan = await table.query().nearest_to(q).fast_search().explain_plan(True)
assert "LanceScan" not in plan
def test_explain_plan(table):
q = LanceVectorQueryBuilder(table, [0, 0], "vector")
plan = q.explain_plan(verbose=True)