diff --git a/docs/src/js/classes/Table.md b/docs/src/js/classes/Table.md index e3b575948..5b6b55089 100644 --- a/docs/src/js/classes/Table.md +++ b/docs/src/js/classes/Table.md @@ -71,11 +71,12 @@ Add new columns with defined values. #### Parameters -* **newColumnTransforms**: [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[] - pairs of column names and - the SQL expression to use to calculate the value of the new column. These - expressions will be evaluated for each row in the table, and can - reference existing columns in the table. +* **newColumnTransforms**: `Field`<`any`> \| `Field`<`any`>[] \| `Schema`<`any`> \| [`AddColumnsSql`](../interfaces/AddColumnsSql.md)[] + Either: + - An array of objects with column names and SQL expressions to calculate values + - A single Arrow Field defining one column with its data type (column will be initialized with null values) + - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values) + - An Arrow Schema defining columns with their data types (columns will be initialized with null values) #### Returns diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 0930ffc8a..2f25606bc 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -1259,6 +1259,98 @@ describe("schema evolution", function () { expect(await table.schema()).toEqual(expectedSchema); }); + it("can add columns with schema for explicit data types", async function () { + const con = await connect(tmpDir.name); + const table = await con.createTable("vectors", [ + { id: 1n, vector: [0.1, 0.2] }, + ]); + + // Define schema for new columns with explicit data types + // Note: All columns must be nullable when using addColumns with Schema + // because they are initially populated with null values + const newColumnsSchema = new Schema([ + new Field("price", new Float64(), true), + new Field("category", new Utf8(), true), + new Field("rating", new Int32(), true), + ]); + + const result = await table.addColumns(newColumnsSchema); + expect(result).toHaveProperty("version"); + expect(result.version).toBe(2); + + const expectedSchema = new Schema([ + new Field("id", new Int64(), true), + new Field( + "vector", + new FixedSizeList(2, new Field("item", new Float32(), true)), + true, + ), + new Field("price", new Float64(), true), + new Field("category", new Utf8(), true), + new Field("rating", new Int32(), true), + ]); + expect(await table.schema()).toEqual(expectedSchema); + + // Verify that new columns are populated with null values + const results = await table.query().toArray(); + expect(results).toHaveLength(1); + expect(results[0].price).toBeNull(); + expect(results[0].category).toBeNull(); + expect(results[0].rating).toBeNull(); + }); + + it("can add a single column using Field", async function () { + const con = await connect(tmpDir.name); + const table = await con.createTable("vectors", [ + { id: 1n, vector: [0.1, 0.2] }, + ]); + + // Add a single field + const priceField = new Field("price", new Float64(), true); + const result = await table.addColumns(priceField); + expect(result).toHaveProperty("version"); + expect(result.version).toBe(2); + + const expectedSchema = new Schema([ + new Field("id", new Int64(), true), + new Field( + "vector", + new FixedSizeList(2, new Field("item", new Float32(), true)), + true, + ), + new Field("price", new Float64(), true), + ]); + expect(await table.schema()).toEqual(expectedSchema); + }); + + it("can add multiple columns using array of Fields", async function () { + const con = await connect(tmpDir.name); + const table = await con.createTable("vectors", [ + { id: 1n, vector: [0.1, 0.2] }, + ]); + + // Add multiple fields as array + const fields = [ + new Field("price", new Float64(), true), + new Field("category", new Utf8(), true), + ]; + const result = await table.addColumns(fields); + expect(result).toHaveProperty("version"); + expect(result.version).toBe(2); + + const expectedSchema = new Schema([ + new Field("id", new Int64(), true), + new Field( + "vector", + new FixedSizeList(2, new Field("item", new Float32(), true)), + true, + ), + new Field("price", new Float64(), true), + new Field("category", new Utf8(), true), + ]); + expect(await table.schema()).toEqual(expectedSchema); + }); + it("can alter the columns in the schema", async function () { const con = await connect(tmpDir.name); const schema = new Schema([ diff --git a/nodejs/lancedb/table.ts b/nodejs/lancedb/table.ts index 3e48e8f05..ca33504e4 100644 --- a/nodejs/lancedb/table.ts +++ b/nodejs/lancedb/table.ts @@ -5,12 +5,15 @@ import { Table as ArrowTable, Data, DataType, + Field, IntoVector, MultiVector, Schema, dataTypeToJson, fromDataToBuffer, + fromTableToBuffer, isMultiVector, + makeEmptyTable, tableFromIPC, } from "./arrow"; @@ -391,15 +394,16 @@ export abstract class Table { abstract vectorSearch(vector: IntoVector | MultiVector): VectorQuery; /** * Add new columns with defined values. - * @param {AddColumnsSql[]} newColumnTransforms pairs of column names and - * the SQL expression to use to calculate the value of the new column. These - * expressions will be evaluated for each row in the table, and can - * reference existing columns in the table. + * @param {AddColumnsSql[] | Field | Field[] | Schema} newColumnTransforms Either: + * - An array of objects with column names and SQL expressions to calculate values + * - A single Arrow Field defining one column with its data type (column will be initialized with null values) + * - An array of Arrow Fields defining columns with their data types (columns will be initialized with null values) + * - An Arrow Schema defining columns with their data types (columns will be initialized with null values) * @returns {Promise} A promise that resolves to an object * containing the new version number of the table after adding the columns. */ abstract addColumns( - newColumnTransforms: AddColumnsSql[], + newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema, ): Promise; /** @@ -804,9 +808,40 @@ export class LocalTable extends Table { // TODO: Support BatchUDF async addColumns( - newColumnTransforms: AddColumnsSql[], + newColumnTransforms: AddColumnsSql[] | Field | Field[] | Schema, ): Promise { - return await this.inner.addColumns(newColumnTransforms); + // Handle single Field -> convert to array of Fields + if (newColumnTransforms instanceof Field) { + newColumnTransforms = [newColumnTransforms]; + } + + // Handle array of Fields -> convert to Schema + if ( + Array.isArray(newColumnTransforms) && + newColumnTransforms.length > 0 && + newColumnTransforms[0] instanceof Field + ) { + const fields = newColumnTransforms as Field[]; + newColumnTransforms = new Schema(fields); + } + + // Handle Schema -> use schema-based approach + if (newColumnTransforms instanceof Schema) { + const schema = newColumnTransforms; + // Convert schema to buffer using Arrow IPC format + const emptyTable = makeEmptyTable(schema); + const schemaBuf = await fromTableToBuffer(emptyTable); + return await this.inner.addColumnsWithSchema(schemaBuf); + } + + // Handle SQL expressions (existing functionality) + if (Array.isArray(newColumnTransforms)) { + return await this.inner.addColumns( + newColumnTransforms as AddColumnsSql[], + ); + } + + throw new Error("Invalid input type for addColumns"); } async alterColumns( diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index 69788e1e9..cad21838a 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; -use lancedb::ipc::ipc_file_to_batches; +use lancedb::ipc::{ipc_file_to_batches, ipc_file_to_schema}; use lancedb::table::{ AddDataMode, ColumnAlteration as LanceColumnAlteration, Duration, NewColumnTransform, OptimizeAction, OptimizeOptions, Table as LanceDbTable, @@ -279,6 +279,23 @@ impl Table { Ok(res.into()) } + #[napi(catch_unwind)] + pub async fn add_columns_with_schema( + &self, + schema_buf: Buffer, + ) -> napi::Result { + let schema = ipc_file_to_schema(schema_buf.to_vec()) + .map_err(|e| napi::Error::from_reason(format!("Failed to read IPC schema: {}", e)))?; + + let transforms = NewColumnTransform::AllNulls(schema); + let res = self + .inner_ref()? + .add_columns(transforms, None) + .await + .default_error()?; + Ok(res.into()) + } + #[napi(catch_unwind)] pub async fn alter_columns( &self,