From bbfadfe58d720baf69b09ce5904fc0b72cfcdb47 Mon Sep 17 00:00:00 2001 From: Ayush Chaurasia Date: Sat, 5 Aug 2023 01:19:44 +0530 Subject: [PATCH] [python] Allow adding via iterators (#391) Makes the following work so all the formats accepted by `create_table()` are also accepted by `add()` ``` import lancedb import pyarrow as pa db = lancedb.connect("/tmp") def make_batches(): for i in range(5): yield pa.RecordBatch.from_arrays( [ pa.array([[3.1, 4.1], [5.9, 26.5]]), pa.array(["foo", "bar"]), pa.array([10.0, 20.0]), ], ["vector", "item", "price"], ) schema = pa.schema([ pa.field("vector", pa.list_(pa.float32())), pa.field("item", pa.utf8()), pa.field("price", pa.float32()), ]) tbl = db.create_table("table4", make_batches(), schema=schema) tbl.add(make_batches()) ``` --- python/lancedb/table.py | 2 +- python/tests/test_db.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/python/lancedb/table.py b/python/lancedb/table.py index 3a08af53..66ed83bc 100644 --- a/python/lancedb/table.py +++ b/python/lancedb/table.py @@ -434,7 +434,7 @@ class LanceTable(Table): data = _sanitize_data( data, self.schema, on_bad_vectors=on_bad_vectors, fill_value=fill_value ) - lance.write_dataset(data, self._dataset_uri, mode=mode) + lance.write_dataset(data, self._dataset_uri, schema=self.schema, mode=mode) self._reset_dataset() def search( diff --git a/python/tests/test_db.py b/python/tests/test_db.py index d3173a76..8e71e43d 100644 --- a/python/tests/test_db.py +++ b/python/tests/test_db.py @@ -101,6 +101,11 @@ def test_ingest_record_batch_iterator(tmp_path): ), ) + tbl_len = len(tbl) + tbl.add(batch_reader()) + assert len(tbl) == tbl_len * 2 + assert len(tbl.list_versions()) == 2 + def test_create_mode(tmp_path): db = lancedb.connect(tmp_path)