feat(node): parse arrow types in alterColumns() (#2208)

Previously, users could only specify new data types in `alterColumns` as
strings:

```ts
await tbl.alterColumns([
  path: "price",
  dataType: "float"
]);
```

But this has some problems:

1. It wasn't clear what were valid types
2. It was impossible to specify nested types, like lists and vector
columns.

This PR changes it to take an Arrow data type, similar to how the Python
API works. This allows casting vector types:

```ts
await tbl.alterColumns([
  {
    path: "vector",
    dataType: new arrow.FixedSizeList(
      2,
      new arrow.Field("item", new arrow.Float16(), false),
    ),
  },
]);
```

Closes #2185
This commit is contained in:
Will Jones
2025-03-12 09:57:36 -07:00
committed by GitHub
parent c9d6fc43a6
commit 7747c9bcbf
10 changed files with 365 additions and 12 deletions

View File

@@ -83,6 +83,21 @@ def test_quickstart(tmp_path):
}
)
# --8<-- [end:alter_columns]
# --8<-- [start:alter_columns_vector]
tbl.alter_columns(
{
"path": "vector",
"data_type": pa.list_(pa.float16(), list_size=2),
}
)
# --8<-- [end:alter_columns_vector]
# Change it back since we can get a panic with fp16
tbl.alter_columns(
{
"path": "vector",
"data_type": pa.list_(pa.float32(), list_size=2),
}
)
# --8<-- [start:drop_columns]
tbl.drop_columns(["dbl_price"])
# --8<-- [end:drop_columns]
@@ -162,6 +177,21 @@ async def test_quickstart_async(tmp_path):
}
)
# --8<-- [end:alter_columns_async]
# --8<-- [start:alter_columns_async_vector]
await tbl.alter_columns(
{
"path": "vector",
"data_type": pa.list_(pa.float16(), list_size=2),
}
)
# --8<-- [end:alter_columns_async_vector]
# Change it back since we can get a panic with fp16
await tbl.alter_columns(
{
"path": "vector",
"data_type": pa.list_(pa.float32(), list_size=2),
}
)
# --8<-- [start:drop_columns_async]
await tbl.drop_columns(["dbl_price"])
# --8<-- [end:drop_columns_async]