diff --git a/nodejs/__test__/arrow.test.ts b/nodejs/__test__/arrow.test.ts index 26b1fdbf..e002ae3d 100644 --- a/nodejs/__test__/arrow.test.ts +++ b/nodejs/__test__/arrow.test.ts @@ -1039,3 +1039,33 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( }); }, ); + +// Test for the undefined values bug fix +describe("undefined values handling", () => { + it("should handle mixed undefined and actual values", () => { + const schema = new Schema([ + new Field("text", new Utf8(), true), // nullable + new Field("number", new Int32(), true), // nullable + new Field("bool", new Bool(), true), // nullable + ]); + + const data = [ + { text: undefined, number: 42, bool: true }, + { text: "hello", number: undefined, bool: false }, + { text: "world", number: 123, bool: undefined }, + ]; + const table = makeArrowTable(data, { schema }); + + const result = table.toArray(); + expect(result).toHaveLength(3); + expect(result[0].text).toBe(null); + expect(result[0].number).toBe(42); + expect(result[0].bool).toBe(true); + expect(result[1].text).toBe("hello"); + expect(result[1].number).toBe(null); + expect(result[1].bool).toBe(false); + expect(result[2].text).toBe("world"); + expect(result[2].number).toBe(123); + expect(result[2].bool).toBe(null); + }); +}); diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 306926b8..e73f4ba2 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -705,7 +705,7 @@ function transposeData( } return current; }); - return makeVector(values, field.type); + return makeVector(values, field.type, undefined, field.nullable); } } @@ -752,9 +752,15 @@ function makeVector( values: unknown[], type?: DataType, stringAsDictionary?: boolean, + nullable?: boolean, // biome-ignore lint/suspicious/noExplicitAny: skip ): Vector { if (type !== undefined) { + // Convert undefined values to null for nullable fields + if (nullable) { + values = values.map((v) => (v === undefined ? null : v)); + } + // workaround for: https://github.com/apache/arrow-js/issues/68 if (DataType.isBool(type)) { const hasNonNullValue = values.some((v) => v !== null && v !== undefined); @@ -769,6 +775,7 @@ function makeVector( return arrowMakeVector(data); } } + // No need for inference, let Arrow create it if (type instanceof Int) { if (DataType.isInt(type) && type.bitWidth === 64) { @@ -893,7 +900,12 @@ async function applyEmbeddingsFromMetadata( for (const field of schema.fields) { if (!(field.name in columns)) { const nullValues = new Array(table.numRows).fill(null); - columns[field.name] = makeVector(nullValues, field.type); + columns[field.name] = makeVector( + nullValues, + field.type, + undefined, + field.nullable, + ); } } @@ -957,7 +969,12 @@ async function applyEmbeddings( } else if (schema != null) { const destField = schema.fields.find((f) => f.name === destColumn); if (destField != null) { - newColumns[destColumn] = makeVector([], destField.type); + newColumns[destColumn] = makeVector( + [], + destField.type, + undefined, + destField.nullable, + ); } else { throw new Error( `Attempt to apply embeddings to an empty table failed because schema was missing embedding column '${destColumn}'`,