feat: support to create table from record batch iterator (#1593)

This commit is contained in:
BubbleCal
2024-09-06 10:41:38 +08:00
committed by GitHub
parent 1d61717d0e
commit 8dcd328dce
6 changed files with 119 additions and 91 deletions

View File

@@ -233,6 +233,43 @@ def test_create_mode(tmp_path):
assert tbl.to_pandas().item.tolist() == ["fizz", "buzz"]
def test_create_table_from_iterator(tmp_path):
db = lancedb.connect(tmp_path)
def gen_data():
for _ in range(10):
yield pa.RecordBatch.from_arrays(
[
pa.array([[3.1, 4.1]], pa.list_(pa.float32(), 2)),
pa.array(["foo"]),
pa.array([10.0]),
],
["vector", "item", "price"],
)
table = db.create_table("test", data=gen_data())
assert table.count_rows() == 10
@pytest.mark.asyncio
async def test_create_table_from_iterator_async(tmp_path):
db = await lancedb.connect_async(tmp_path)
def gen_data():
for _ in range(10):
yield pa.RecordBatch.from_arrays(
[
pa.array([[3.1, 4.1]], pa.list_(pa.float32(), 2)),
pa.array(["foo"]),
pa.array([10.0]),
],
["vector", "item", "price"],
)
table = await db.create_table("test", data=gen_data())
assert await table.count_rows() == 10
def test_create_exist_ok(tmp_path):
db = lancedb.connect(tmp_path)
data = pd.DataFrame(