mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 14:49:57 +00:00
SDK Python Description Exposes pyarrow batch api during query execution - relevant when there is no vector search query, dataset is large and the filtered result is larger than memory. --------- Co-authored-by: Ishani Ghose <isghose@amazon.com> Co-authored-by: Chang She <759245+changhiskhan@users.noreply.github.com>
This commit is contained in:
committed by
Weston Pace
parent
968c62cb8f
commit
0838e12b30
@@ -13,6 +13,7 @@
|
||||
|
||||
import unittest.mock as mock
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
|
||||
import lance
|
||||
import lancedb
|
||||
@@ -35,9 +36,9 @@ class MockTable:
|
||||
def to_lance(self):
|
||||
return lance.dataset(self.uri)
|
||||
|
||||
def _execute_query(self, query):
|
||||
def _execute_query(self, query, batch_size: Optional[int] = None):
|
||||
ds = self.to_lance()
|
||||
return ds.to_table(
|
||||
return ds.scanner(
|
||||
columns=query.columns,
|
||||
filter=query.filter,
|
||||
prefilter=query.prefilter,
|
||||
@@ -49,7 +50,8 @@ class MockTable:
|
||||
"nprobes": query.nprobes,
|
||||
"refine_factor": query.refine_factor,
|
||||
},
|
||||
)
|
||||
batch_size=batch_size,
|
||||
).to_reader()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -115,6 +117,25 @@ def test_query_builder(table):
|
||||
assert all(np.array(rs[0]["vector"]) == [1, 2])
|
||||
|
||||
|
||||
def test_query_builder_batches(table):
|
||||
rs = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
.limit(2)
|
||||
.select(["id", "vector"])
|
||||
.to_batches(1)
|
||||
)
|
||||
rs_list = []
|
||||
for item in rs:
|
||||
rs_list.append(item)
|
||||
assert isinstance(item, pa.RecordBatch)
|
||||
assert len(rs_list) == 1
|
||||
assert len(rs_list[0]["id"]) == 2
|
||||
assert all(rs_list[0].to_pandas()["vector"][0] == [1.0, 2.0])
|
||||
assert rs_list[0].to_pandas()["id"][0] == 1
|
||||
assert all(rs_list[0].to_pandas()["vector"][1] == [3.0, 4.0])
|
||||
assert rs_list[0].to_pandas()["id"][1] == 2
|
||||
|
||||
|
||||
def test_dynamic_projection(table):
|
||||
rs = (
|
||||
LanceVectorQueryBuilder(table, [0, 0], "vector")
|
||||
@@ -199,7 +220,8 @@ def test_query_builder_with_different_vector_column():
|
||||
nprobes=20,
|
||||
refine_factor=None,
|
||||
vector_column="foo_vector",
|
||||
)
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user