From 2616a50502d1cfabedcb8db4e6b610b990e119c6 Mon Sep 17 00:00:00 2001 From: QianZhu Date: Tue, 26 Nov 2024 16:03:16 -0800 Subject: [PATCH] fix: test errors after setting default limit (#1891) --- nodejs/__test__/connection.test.ts | 5 ++++- nodejs/__test__/table.test.ts | 4 ++-- python/python/lancedb/query.py | 9 +++++---- python/python/tests/test_db.py | 4 +++- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/nodejs/__test__/connection.test.ts b/nodejs/__test__/connection.test.ts index 070121f7..38b5afad 100644 --- a/nodejs/__test__/connection.test.ts +++ b/nodejs/__test__/connection.test.ts @@ -110,7 +110,10 @@ describe("given a connection", () => { let table = await db.createTable("test", data, { useLegacyFormat: true }); const isV2 = async (table: Table) => { - const data = await table.query().toArrow({ maxBatchLength: 100000 }); + const data = await table + .query() + .limit(10000) + .toArrow({ maxBatchLength: 100000 }); console.log(data.batches.length); return data.batches.length < 5; }; diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 6e002361..456fda15 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -585,11 +585,11 @@ describe("When creating an index", () => { expect(fs.readdirSync(indexDir)).toHaveLength(1); for await (const r of tbl.query().where("id > 1").select(["id"])) { - expect(r.numRows).toBe(298); + expect(r.numRows).toBe(10); } // should also work with 'filter' alias for await (const r of tbl.query().filter("id > 1").select(["id"])) { - expect(r.numRows).toBe(298); + expect(r.numRows).toBe(10); } }); diff --git a/python/python/lancedb/query.py b/python/python/lancedb/query.py index e81e1e02..dbd0295c 100644 --- a/python/python/lancedb/query.py +++ b/python/python/lancedb/query.py @@ -1502,10 +1502,11 @@ class AsyncQueryBase(object): ... print(plan) >>> asyncio.run(doctest_example()) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE ProjectionExec: expr=[vector@0 as vector, _distance@2 as _distance] - FilterExec: _distance@2 IS NOT NULL - SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] - KNNVectorDistance: metric=l2 - LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false + GlobalLimitExec: skip=0, fetch=10 + FilterExec: _distance@2 IS NOT NULL + SortExec: TopK(fetch=10), expr=[_distance@2 ASC NULLS LAST], preserve_partitioning=[false] + KNNVectorDistance: metric=l2 + LanceScan: uri=..., projection=[vector], row_id=true, row_addr=false, ordered=false Parameters ---------- diff --git a/python/python/tests/test_db.py b/python/python/tests/test_db.py index 2e01343b..93cd2aa8 100644 --- a/python/python/tests/test_db.py +++ b/python/python/tests/test_db.py @@ -599,7 +599,9 @@ async def test_create_in_v2_mode(tmp_path): ) async def is_in_v2_mode(tbl): - batches = await tbl.query().to_batches(max_batch_length=1024 * 10) + batches = ( + await tbl.query().limit(10 * 1024).to_batches(max_batch_length=1024 * 10) + ) num_batches = 0 async for batch in batches: num_batches += 1