mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-26 22:59:57 +00:00
fix: handle empty list with schema in table creation (#2548)
## Summary Fixes IndexError when creating tables with empty list data and a provided schema. Previously, `_into_pyarrow_reader()` would attempt to access `data[0]` on empty lists, causing an IndexError. Now properly handles empty lists by using the provided schema. Also adds regression tests for GitHub issues #1968 and #303 to prevent future regressions with empty table scenarios. ## Changes - Fix IndexError in `_into_pyarrow_reader()` for empty list + schema case - Add Optional[pa.Schema] parameter to handle empty data gracefully - Add `test_create_table_empty_list_with_schema` for the IndexError fix - Add `test_create_empty_then_add_data` for issue #1968 - Add `test_search_empty_table` for issue #303 ## Test plan - [x] All new regression tests pass - [x] Existing tests continue to pass - [x] Code formatted with `make format`
This commit is contained in:
@@ -1804,3 +1804,45 @@ def test_stats(mem_db: DBConnection):
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_create_table_empty_list_with_schema(mem_db: DBConnection):
|
||||
"""Test creating table with empty list data and schema
|
||||
|
||||
Regression test for IndexError: list index out of range
|
||||
when calling create_table(name, data=[], schema=schema)
|
||||
"""
|
||||
schema = pa.schema(
|
||||
[pa.field("vector", pa.list_(pa.float32(), 2)), pa.field("id", pa.int64())]
|
||||
)
|
||||
table = mem_db.create_table("test_empty_list", data=[], schema=schema)
|
||||
assert table.count_rows() == 0
|
||||
assert table.schema == schema
|
||||
|
||||
|
||||
def test_create_table_empty_list_no_schema_error(mem_db: DBConnection):
|
||||
"""Test that creating table with empty list and no schema raises error"""
|
||||
with pytest.raises(
|
||||
ValueError, match="Cannot create table from empty list without a schema"
|
||||
):
|
||||
mem_db.create_table("test_empty_no_schema", data=[])
|
||||
|
||||
|
||||
def test_add_table_with_empty_embeddings(tmp_path):
|
||||
"""Test exact scenario from issue #1968
|
||||
|
||||
Regression test for issue #1968:
|
||||
https://github.com/lancedb/lancedb/issues/1968
|
||||
"""
|
||||
db = lancedb.connect(tmp_path)
|
||||
|
||||
class MySchema(LanceModel):
|
||||
text: str
|
||||
embedding: Vector(16)
|
||||
|
||||
table = db.create_table("test", schema=MySchema)
|
||||
table.add(
|
||||
[{"text": "bar", "embedding": [0.1] * 16}],
|
||||
on_bad_vectors="drop",
|
||||
)
|
||||
assert table.count_rows() == 1
|
||||
|
||||
Reference in New Issue
Block a user