feat: schema evolution APIs in all SDKs (#1851)

* Support `add_columns`, `alter_columns`, `drop_columns` in Remote SDK
and async Python
* Add `data_type` parameter to node
* Docs updates
This commit is contained in:
Will Jones
2024-12-04 14:47:50 -08:00
committed by GitHub
parent bd82e1f66d
commit 79eaa52184
10 changed files with 535 additions and 44 deletions

View File

@@ -1292,6 +1292,19 @@ def test_add_columns(tmp_path):
assert table.to_arrow().column_names == ["id", "new_col"]
assert table.to_arrow()["new_col"].to_pylist() == [2, 3]
table.add_columns({"null_int": "cast(null as bigint)"})
assert table.schema.field("null_int").type == pa.int64()
@pytest.mark.asyncio
async def test_add_columns_async(db_async: AsyncConnection):
data = pa.table({"id": [0, 1]})
table = await db_async.create_table("my_table", data=data)
await table.add_columns({"new_col": "id + 2"})
data = await table.to_arrow()
assert data.column_names == ["id", "new_col"]
assert data["new_col"].to_pylist() == [2, 3]
def test_alter_columns(tmp_path):
db = lancedb.connect(tmp_path)
@@ -1301,6 +1314,18 @@ def test_alter_columns(tmp_path):
assert table.to_arrow().column_names == ["new_id"]
@pytest.mark.asyncio
async def test_alter_columns_async(db_async: AsyncConnection):
data = pa.table({"id": [0, 1]})
table = await db_async.create_table("my_table", data=data)
await table.alter_columns({"path": "id", "rename": "new_id"})
assert (await table.to_arrow()).column_names == ["new_id"]
await table.alter_columns(dict(path="new_id", data_type=pa.int16(), nullable=True))
data = await table.to_arrow()
assert data.column(0).type == pa.int16()
assert data.schema.field(0).nullable
def test_drop_columns(tmp_path):
db = lancedb.connect(tmp_path)
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
@@ -1309,6 +1334,14 @@ def test_drop_columns(tmp_path):
assert table.to_arrow().column_names == ["id"]
@pytest.mark.asyncio
async def test_drop_columns_async(db_async: AsyncConnection):
data = pa.table({"id": [0, 1], "category": ["a", "b"]})
table = await db_async.create_table("my_table", data=data)
await table.drop_columns(["category"])
assert (await table.to_arrow()).column_names == ["id"]
@pytest.mark.asyncio
async def test_time_travel(db_async: AsyncConnection):
# Setup