feat: schema evolution APIs in all SDKs (#1851)

* Support `add_columns`, `alter_columns`, `drop_columns` in Remote SDK
and async Python
* Add `data_type` parameter to node
* Docs updates
This commit is contained in:
Will Jones
2024-12-04 14:47:50 -08:00
committed by GitHub
parent bd82e1f66d
commit 79eaa52184
10 changed files with 535 additions and 44 deletions

View File

@@ -825,6 +825,18 @@ describe("schema evolution", function () {
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
await table.alterColumns([{ path: "new_id", dataType: "int32" }]);
const expectedSchema2 = new Schema([
new Field("new_id", new Int32(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema2);
});
it("can drop a column from the schema", async function () {

View File

@@ -116,6 +116,26 @@ test("basic table examples", async () => {
await tbl.add(data);
// --8<-- [end:add_data]
}
{
// --8<-- [start:add_columns]
await tbl.addColumns([{ name: "double_price", valueSql: "price * 2" }]);
// --8<-- [end:add_columns]
// --8<-- [start:alter_columns]
await tbl.alterColumns([
{
path: "double_price",
rename: "dbl_price",
dataType: "float",
nullable: true,
},
]);
// --8<-- [end:alter_columns]
// --8<-- [start:drop_columns]
await tbl.dropColumns(["dbl_price"]);
// --8<-- [end:drop_columns]
}
{
// --8<-- [start:vector_search]
const res = await tbl.search([100, 100]).limit(2).toArray();

View File

@@ -178,16 +178,20 @@ impl Table {
#[napi(catch_unwind)]
pub async fn alter_columns(&self, alterations: Vec<ColumnAlteration>) -> napi::Result<()> {
for alteration in &alterations {
if alteration.rename.is_none() && alteration.nullable.is_none() {
if alteration.rename.is_none()
&& alteration.nullable.is_none()
&& alteration.data_type.is_none()
{
return Err(napi::Error::from_reason(
"Alteration must have a 'rename' or 'nullable' field.",
"Alteration must have a 'rename', 'dataType', or 'nullable' field.",
));
}
}
let alterations = alterations
.into_iter()
.map(LanceColumnAlteration::from)
.collect::<Vec<_>>();
.map(LanceColumnAlteration::try_from)
.collect::<std::result::Result<Vec<_>, String>>()
.map_err(napi::Error::from_reason)?;
self.inner_ref()?
.alter_columns(&alterations)
@@ -433,24 +437,43 @@ pub struct ColumnAlteration {
/// The new name of the column. If not provided then the name will not be changed.
/// This must be distinct from the names of all other columns in the table.
pub rename: Option<String>,
/// A new data type for the column. If not provided then the data type will not be changed.
/// Changing data types is limited to casting to the same general type. For example, these
/// changes are valid:
/// * `int32` -> `int64` (integers)
/// * `double` -> `float` (floats)
/// * `string` -> `large_string` (strings)
/// But these changes are not:
/// * `int32` -> `double` (mix integers and floats)
/// * `string` -> `int32` (mix strings and integers)
pub data_type: Option<String>,
/// Set the new nullability. Note that a nullable column cannot be made non-nullable.
pub nullable: Option<bool>,
}
impl From<ColumnAlteration> for LanceColumnAlteration {
fn from(js: ColumnAlteration) -> Self {
impl TryFrom<ColumnAlteration> for LanceColumnAlteration {
type Error = String;
fn try_from(js: ColumnAlteration) -> std::result::Result<Self, Self::Error> {
let ColumnAlteration {
path,
rename,
nullable,
data_type,
} = js;
Self {
let data_type = if let Some(data_type) = data_type {
Some(
lancedb::utils::string_to_datatype(&data_type)
.ok_or_else(|| format!("Invalid data type: {}", data_type))?,
)
} else {
None
};
Ok(Self {
path,
rename,
nullable,
// TODO: wire up this field
data_type: None,
}
data_type,
})
}
}