diff --git a/python/python/lancedb/common.py b/python/python/lancedb/common.py index cc894a72..80e3254d 100644 --- a/python/python/lancedb/common.py +++ b/python/python/lancedb/common.py @@ -20,7 +20,7 @@ from .util import safe_import_pandas pd = safe_import_pandas() -DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]] +DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]] VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray] URI = Union[str, Path] VECTOR_COLUMN_NAME = "vector" diff --git a/python/python/lancedb/db.py b/python/python/lancedb/db.py index 4a72b4b0..6af4cdb8 100644 --- a/python/python/lancedb/db.py +++ b/python/python/lancedb/db.py @@ -96,7 +96,7 @@ class DBConnection(EnforceOverrides): User must provide at least one of `data` or `schema`. Acceptable types are: - - dict or list-of-dict + - list-of-dict - pandas.DataFrame @@ -579,7 +579,7 @@ class AsyncConnection(object): User must provide at least one of `data` or `schema`. Acceptable types are: - - dict or list-of-dict + - list-of-dict - pandas.DataFrame diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index b4f8a2e6..b7e01190 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -31,7 +31,6 @@ import pyarrow.compute as pc import pyarrow.fs as pa_fs from lance import LanceDataset from lance.dependencies import _check_for_hugging_face -from lance.vector import vec_to_table from .common import DATA, VEC, VECTOR_COLUMN_NAME from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry @@ -87,6 +86,9 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table: if isinstance(data, LanceModel): raise ValueError("Cannot add a single LanceModel to a table. Use a list.") + if isinstance(data, dict): + raise ValueError("Cannot add a single dictionary to a table. Use a list.") + if isinstance(data, list): # convert to list of dict if data is a bunch of LanceModels if isinstance(data[0], LanceModel): @@ -98,8 +100,6 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table: return pa.Table.from_batches(data, schema=schema) else: return pa.Table.from_pylist(data) - elif isinstance(data, dict): - return vec_to_table(data) elif _check_for_pandas(data) and isinstance(data, pd.DataFrame): # Do not add schema here, since schema may contains the vector column table = pa.Table.from_pandas(data, preserve_index=False) @@ -554,7 +554,7 @@ class Table(ABC): data: DATA The data to insert into the table. Acceptable types are: - - dict or list-of-dict + - list-of-dict - pandas.DataFrame @@ -1409,7 +1409,7 @@ class LanceTable(Table): Parameters ---------- - data: list-of-dict, dict, pd.DataFrame + data: list-of-dict, pd.DataFrame The data to insert into the table. mode: str The mode to use when writing the data. Valid values are @@ -2348,7 +2348,7 @@ class AsyncTable: data: DATA The data to insert into the table. Acceptable types are: - - dict or list-of-dict + - list-of-dict - pandas.DataFrame diff --git a/python/python/tests/test_table.py b/python/python/tests/test_table.py index cc5ecbd2..65ec7b3c 100644 --- a/python/python/tests/test_table.py +++ b/python/python/tests/test_table.py @@ -193,6 +193,24 @@ def test_empty_table(db): tbl.add(data=data) +def test_add_dictionary(db): + schema = pa.schema( + [ + pa.field("vector", pa.list_(pa.float32(), 2)), + pa.field("item", pa.string()), + pa.field("price", pa.float32()), + ] + ) + tbl = LanceTable.create(db, "test", schema=schema) + data = {"vector": [3.1, 4.1], "item": "foo", "price": 10.0} + with pytest.raises(ValueError) as excep_info: + tbl.add(data=data) + assert ( + str(excep_info.value) + == "Cannot add a single dictionary to a table. Use a list." + ) + + def test_add(db): schema = pa.schema( [