feat(python): drop support for dictionary in Table.add (#1725)

PR closes #1706
This commit is contained in:
Gagan Bhullar
2024-10-08 20:41:08 -06:00
committed by GitHub
parent e61ba7f4e2
commit 4d458d5829
4 changed files with 27 additions and 9 deletions

View File

@@ -20,7 +20,7 @@ from .util import safe_import_pandas
pd = safe_import_pandas()
DATA = Union[List[dict], dict, "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
DATA = Union[List[dict], "pd.DataFrame", pa.Table, Iterable[pa.RecordBatch]]
VEC = Union[list, np.ndarray, pa.Array, pa.ChunkedArray]
URI = Union[str, Path]
VECTOR_COLUMN_NAME = "vector"

View File

@@ -96,7 +96,7 @@ class DBConnection(EnforceOverrides):
User must provide at least one of `data` or `schema`.
Acceptable types are:
- dict or list-of-dict
- list-of-dict
- pandas.DataFrame
@@ -579,7 +579,7 @@ class AsyncConnection(object):
User must provide at least one of `data` or `schema`.
Acceptable types are:
- dict or list-of-dict
- list-of-dict
- pandas.DataFrame

View File

@@ -31,7 +31,6 @@ import pyarrow.compute as pc
import pyarrow.fs as pa_fs
from lance import LanceDataset
from lance.dependencies import _check_for_hugging_face
from lance.vector import vec_to_table
from .common import DATA, VEC, VECTOR_COLUMN_NAME
from .embeddings import EmbeddingFunctionConfig, EmbeddingFunctionRegistry
@@ -87,6 +86,9 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
if isinstance(data, LanceModel):
raise ValueError("Cannot add a single LanceModel to a table. Use a list.")
if isinstance(data, dict):
raise ValueError("Cannot add a single dictionary to a table. Use a list.")
if isinstance(data, list):
# convert to list of dict if data is a bunch of LanceModels
if isinstance(data[0], LanceModel):
@@ -98,8 +100,6 @@ def _coerce_to_table(data, schema: Optional[pa.Schema] = None) -> pa.Table:
return pa.Table.from_batches(data, schema=schema)
else:
return pa.Table.from_pylist(data)
elif isinstance(data, dict):
return vec_to_table(data)
elif _check_for_pandas(data) and isinstance(data, pd.DataFrame):
# Do not add schema here, since schema may contains the vector column
table = pa.Table.from_pandas(data, preserve_index=False)
@@ -554,7 +554,7 @@ class Table(ABC):
data: DATA
The data to insert into the table. Acceptable types are:
- dict or list-of-dict
- list-of-dict
- pandas.DataFrame
@@ -1409,7 +1409,7 @@ class LanceTable(Table):
Parameters
----------
data: list-of-dict, dict, pd.DataFrame
data: list-of-dict, pd.DataFrame
The data to insert into the table.
mode: str
The mode to use when writing the data. Valid values are
@@ -2348,7 +2348,7 @@ class AsyncTable:
data: DATA
The data to insert into the table. Acceptable types are:
- dict or list-of-dict
- list-of-dict
- pandas.DataFrame

View File

@@ -193,6 +193,24 @@ def test_empty_table(db):
tbl.add(data=data)
def test_add_dictionary(db):
schema = pa.schema(
[
pa.field("vector", pa.list_(pa.float32(), 2)),
pa.field("item", pa.string()),
pa.field("price", pa.float32()),
]
)
tbl = LanceTable.create(db, "test", schema=schema)
data = {"vector": [3.1, 4.1], "item": "foo", "price": 10.0}
with pytest.raises(ValueError) as excep_info:
tbl.add(data=data)
assert (
str(excep_info.value)
== "Cannot add a single dictionary to a table. Use a list."
)
def test_add(db):
schema = pa.schema(
[