From f09db4a6d6ad1867e14ceb008ba336ccbdd8c20f Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Tue, 18 Jul 2023 17:11:17 -0700 Subject: [PATCH] [Python] Do not return Table count for every add operation (#328) `Table::count()` will be linearly slower with more fragments ingested. --- python/lancedb/remote/table.py | 1 - python/lancedb/table.py | 11 ++--------- python/tests/test_table.py | 4 ++-- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/python/lancedb/remote/table.py b/python/lancedb/remote/table.py index c4a4f2a7..14e1818d 100644 --- a/python/lancedb/remote/table.py +++ b/python/lancedb/remote/table.py @@ -79,7 +79,6 @@ class RemoteTable(Table): content_type=ARROW_STREAM_CONTENT_TYPE, ) ) - return len(data) def search( self, query: Union[VEC, str], vector_column: str = VECTOR_COLUMN_NAME diff --git a/python/lancedb/table.py b/python/lancedb/table.py index e2b3fec1..2f8dd5d8 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -74,7 +74,6 @@ class Table(ABC): Can append new data with [Table.add()][lancedb.table.Table.add]. >>> table.add([{"vector": [0.5, 1.3], "b": 4}]) - 2 Can query the table with [Table.search][lancedb.table.Table.search]. @@ -151,7 +150,7 @@ class Table(ABC): mode: str = "append", on_bad_vectors: str = "error", fill_value: float = 0.0, - ) -> int: + ): """Add more data to the [Table](Table). Parameters @@ -167,10 +166,6 @@ class Table(ABC): fill_value: float, default 0. The value to use when filling vectors. Only used if on_bad_vectors="fill". - Returns - ------- - int - The number of vectors in the table. """ raise NotImplementedError @@ -307,7 +302,6 @@ class LanceTable(Table): vector type 0 [1.1, 0.9] vector >>> table.add([{"vector": [0.5, 0.2], "type": "vector"}]) - 2 >>> table.version 2 >>> table.checkout(1) @@ -409,7 +403,7 @@ class LanceTable(Table): mode: str = "append", on_bad_vectors: str = "error", fill_value: float = 0.0, - ) -> int: + ): """Add data to the table. Parameters @@ -436,7 +430,6 @@ class LanceTable(Table): ) lance.write_dataset(data, self._dataset_uri, mode=mode) self._reset_dataset() - return len(self) def search( self, query: Union[VEC, str], vector_column_name=VECTOR_COLUMN_NAME diff --git a/python/tests/test_table.py b/python/tests/test_table.py index a3695d43..f7239590 100644 --- a/python/tests/test_table.py +++ b/python/tests/test_table.py @@ -139,8 +139,8 @@ def _add(table, schema): # table = LanceTable(db, "test") assert len(table) == 2 - count = table.add([{"vector": [6.3, 100.5], "item": "new", "price": 30.0}]) - assert count == 3 + table.add([{"vector": [6.3, 100.5], "item": "new", "price": 30.0}]) + assert len(table) == 3 expected = pa.Table.from_arrays( [