mirror of
https://github.com/lancedb/lancedb.git
synced 2025-12-27 23:12:58 +00:00
feat(python): drop support for dictionary in Table.add (#1725)
PR closes #1706
This commit is contained in:
@@ -20,7 +20,7 @@ from .util import safe_import_pandas
|
||||
|
||||
pd = safe_import_pandas()
|
||||
|
||||
DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
|
||||
DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
|
||||
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
|
||||
URI = Union[str, Path]
|
||||
VECTOR_COLUMN_NAME = "vector"
|
||||
|
||||
@@ -96,7 +96,7 @@ class DBConnection(EnforceOverrides):
|
||||
User must provide at least one of `data` or `schema`.
|
||||
Acceptable types are:
|
||||
|
||||
- dict or list-of-dict
|
||||
- list-of-dict
|
||||
|
||||
- pandas.DataFrame
|
||||
|
||||
@@ -579,7 +579,7 @@ class AsyncConnection(object):
|
||||
User must provide at least one of `data` or `schema`.
|
||||
Acceptable types are:
|
||||
|
||||
- dict or list-of-dict
|
||||
- list-of-dict
|
||||
|
||||
- pandas.DataFrame
|
||||
|
||||
|
||||
@@ -31,7 +31,6 @@ import pyarrow.compute as pc
|
||||
import pyarrow.fs as pa_fs
|
||||
from lance import LanceDataset
|
||||
from lance.dependencies import _check_for_hugging_face
|
||||
from lance.vector import vec_to_table
|
||||
|
||||
from .common import DATA, VEC, VECTOR_COLUMN_NAME
|
||||
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
|
||||
@@ -87,6 +86,9 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
|
||||
if isinstance(data, LanceModel):
|
||||
raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
|
||||
|
||||
if isinstance(data, dict):
|
||||
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
|
||||
|
||||
if isinstance(data, list):
|
||||
# convert to list of dict if data is a bunch of LanceModels
|
||||
if isinstance(data[0], LanceModel):
|
||||
@@ -98,8 +100,6 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
|
||||
return pa.Table.from_batches(data, schema=schema)
|
||||
else:
|
||||
return pa.Table.from_pylist(data)
|
||||
elif isinstance(data, dict):
|
||||
return vec_to_table(data)
|
||||
elif _check_for_pandas(data) and isinstance(data, pd.DataFrame):
|
||||
# Do not add schema here, since schema may contains the vector column
|
||||
table = pa.Table.from_pandas(data, preserve_index=False)
|
||||
@@ -554,7 +554,7 @@ class Table(ABC):
|
||||
data: DATA
|
||||
The data to insert into the table. Acceptable types are:
|
||||
|
||||
- dict or list-of-dict
|
||||
- list-of-dict
|
||||
|
||||
- pandas.DataFrame
|
||||
|
||||
@@ -1409,7 +1409,7 @@ class LanceTable(Table):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data: list-of-dict, dict, pd.DataFrame
|
||||
data: list-of-dict, pd.DataFrame
|
||||
The data to insert into the table.
|
||||
mode: str
|
||||
The mode to use when writing the data. Valid values are
|
||||
@@ -2348,7 +2348,7 @@ class AsyncTable:
|
||||
data: DATA
|
||||
The data to insert into the table. Acceptable types are:
|
||||
|
||||
- dict or list-of-dict
|
||||
- list-of-dict
|
||||
|
||||
- pandas.DataFrame
|
||||
|
||||
|
||||
@@ -193,6 +193,24 @@ def test_empty_table(db):
|
||||
tbl.add(data=data)
|
||||
|
||||
|
||||
def test_add_dictionary(db):
|
||||
schema = pa.schema(
|
||||
[
|
||||
pa.field("vector", pa.list_(pa.float32(), 2)),
|
||||
pa.field("item", pa.string()),
|
||||
pa.field("price", pa.float32()),
|
||||
]
|
||||
)
|
||||
tbl = LanceTable.create(db, "test", schema=schema)
|
||||
data = {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}
|
||||
with pytest.raises(ValueError) as excep_info:
|
||||
tbl.add(data=data)
|
||||
assert (
|
||||
str(excep_info.value)
|
||||
== "Cannot add a single dictionary to a table. Use a list."
|
||||
)
|
||||
|
||||
|
||||
def test_add(db):
|
||||
schema = pa.schema(
|
||||
[
|
||||
|
||||
Reference in New Issue
Block a user