feat(nodejs): support field/data type input in add_columns() method (#3114)

Add support for passing field/data type information into add_columns()
method, bringing parity with Python bindings. The method now accepts:

- AddColumnsSql[] - SQL expressions (existing functionality)
- Field - single Arrow field with explicit data type
- Field[] - array of Arrow fields with explicit data types
- Schema - Arrow schema with explicit data types

New columns added via Field/Schema are initialized with null values. All
field-based columns must be nullable due to null initialization.

Resolves #3107

---------

Signed-off-by: Pratik <pratikrocks.dey11@gmail.com>
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Pratik Dey
2026-03-14 01:27:14 +05:30
committed by GitHub
parent c2e543f1b7
commit d1d720d08a
4 changed files with 158 additions and 13 deletions

View File

@@ -71,11 +71,12 @@ Add new columns with defined values.
#### Parameters
* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
pairs of column names and
the SQL expression to use to calculate the value of the new column. These
expressions will be evaluated for each row in the table, and can
reference existing columns in the table.
* **newColumnTransforms**: `Field`&lt;`any`&gt; \| `Field`&lt;`any`&gt;[] \| `Schema`&lt;`any`&gt; \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[]
Either:
- An array of objects with column names and SQL expressions to calculate values
- A single Arrow Field defining one column with its data type (column will be initialized with null values)
- An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
- An Arrow Schema defining columns with their data types (columns will be initialized with null values)
#### Returns

View File

@@ -1259,6 +1259,98 @@ describe("schema evolution", function () {
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add columns with schema for explicit data types", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Define schema for new columns with explicit data types
// Note: All columns must be nullable when using addColumns with Schema
// because they are initially populated with null values
const newColumnsSchema = new Schema([
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
const result = await table.addColumns(newColumnsSchema);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
new Field("rating", new Int32(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
// Verify that new columns are populated with null values
const results = await table.query().toArray();
expect(results).toHaveLength(1);
expect(results[0].price).toBeNull();
expect(results[0].category).toBeNull();
expect(results[0].rating).toBeNull();
});
it("can add a single column using Field", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add a single field
const priceField = new Field("price", new Float64(), true);
const result = await table.addColumns(priceField);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can add multiple columns using array of Fields", async function () {
const con = await connect(tmpDir.name);
const table = await con.createTable("vectors", [
{ id: 1n, vector: [0.1, 0.2] },
]);
// Add multiple fields as array
const fields = [
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
];
const result = await table.addColumns(fields);
expect(result).toHaveProperty("version");
expect(result.version).toBe(2);
const expectedSchema = new Schema([
new Field("id", new Int64(), true),
new Field(
"vector",
new FixedSizeList(2, new Field("item", new Float32(), true)),
true,
),
new Field("price", new Float64(), true),
new Field("category", new Utf8(), true),
]);
expect(await table.schema()).toEqual(expectedSchema);
});
it("can alter the columns in the schema", async function () {
const con = await connect(tmpDir.name);
const schema = new Schema([

View File

@@ -5,12 +5,15 @@ import {
Table as ArrowTable,
Data,
DataType,
Field,
IntoVector,
MultiVector,
Schema,
dataTypeToJson,
fromDataToBuffer,
fromTableToBuffer,
isMultiVector,
makeEmptyTable,
tableFromIPC,
} from "./arrow";
@@ -391,15 +394,16 @@ export abstract class Table {
abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery;
/**
* Add new columns with defined values.
* @param {AddColumnsSql[]} newColumnTransforms pairs of column names and
* the SQL expression to use to calculate the value of the new column. These
* expressions will be evaluated for each row in the table, and can
* reference existing columns in the table.
* @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either:
* - An array of objects with column names and SQL expressions to calculate values
* - A single Arrow Field defining one column with its data type (column will be initialized with null values)
* - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values)
* - An Arrow Schema defining columns with their data types (columns will be initialized with null values)
* @returns {Promise<AddColumnsResult>} A promise that resolves to an object
* containing the new version number of the table after adding the columns.
*/
abstract addColumns(
newColumnTransforms: AddColumnsSql[],
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
): Promise<AddColumnsResult>;
/**
@@ -804,9 +808,40 @@ export class LocalTable extends Table {
// TODO: Support BatchUDF
async addColumns(
newColumnTransforms: AddColumnsSql[],
newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema,
): Promise<AddColumnsResult> {
return await this.inner.addColumns(newColumnTransforms);
// Handle single Field -> convert to array of Fields
if (newColumnTransforms instanceof Field) {
newColumnTransforms = [newColumnTransforms];
}
// Handle array of Fields -> convert to Schema
if (
Array.isArray(newColumnTransforms) &&
newColumnTransforms.length > 0 &&
newColumnTransforms[0] instanceof Field
) {
const fields = newColumnTransforms as Field[];
newColumnTransforms = new Schema(fields);
}
// Handle Schema -> use schema-based approach
if (newColumnTransforms instanceof Schema) {
const schema = newColumnTransforms;
// Convert schema to buffer using Arrow IPC format
const emptyTable = makeEmptyTable(schema);
const schemaBuf = await fromTableToBuffer(emptyTable);
return await this.inner.addColumnsWithSchema(schemaBuf);
}
// Handle SQL expressions (existing functionality)
if (Array.isArray(newColumnTransforms)) {
return await this.inner.addColumns(
newColumnTransforms as AddColumnsSql[],
);
}
throw new Error("Invalid input type for addColumns");
}
async alterColumns(

View File

@@ -3,7 +3,7 @@
use std::collections::HashMap;
use lancedb::ipc::ipc_file_to_batches;
use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema};
use lancedb::table::{
AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform,
OptimizeAction, OptimizeOptions, Table as LanceDbTable,
@@ -279,6 +279,23 @@ impl Table {
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn add_columns_with_schema(
&self,
schema_buf: Buffer,
) -> napi::Result<AddColumnsResult> {
let schema = ipc_file_to_schema(schema_buf.to_vec())
.map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?;
let transforms = NewColumnTransform::AllNulls(schema);
let res = self
.inner_ref()?
.add_columns(transforms, None)
.await
.default_error()?;
Ok(res.into())
}
#[napi(catch_unwind)]
pub async fn alter_columns(
&self,