diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 9e1946ee..145e7a10 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -211,8 +211,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( }, ); - // TODO: https://github.com/lancedb/lancedb/issues/1832 - it.skip("should be able to omit nullable fields", async () => { + it("should be able to omit nullable fields", async () => { const db = await connect(tmpDir.name); const schema = new arrow.Schema([ new arrow.Field( @@ -236,23 +235,36 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( await table.add([data3]); let res = await table.query().limit(10).toArray(); - const resVector = res.map((r) => r.get("vector").toArray()); + const resVector = res.map((r) => + r.vector ? Array.from(r.vector) : null, + ); expect(resVector).toEqual([null, data2.vector, data3.vector]); - const resItem = res.map((r) => r.get("item").toArray()); + const resItem = res.map((r) => r.item); expect(resItem).toEqual(["foo", null, "bar"]); - const resPrice = res.map((r) => r.get("price").toArray()); + const resPrice = res.map((r) => r.price); expect(resPrice).toEqual([10.0, 2.0, 3.0]); const data4 = { item: "foo" }; // We can't omit a column if it's not nullable - await expect(table.add([data4])).rejects.toThrow("Invalid user input"); + await expect(table.add([data4])).rejects.toThrow( + "Append with different schema", + ); // But we can alter columns to make them nullable await table.alterColumns([{ path: "price", nullable: true }]); await table.add([data4]); - res = (await table.query().limit(10).toArray()).map((r) => r.toJSON()); - expect(res).toEqual([data1, data2, data3, data4]); + res = (await table.query().limit(10).toArray()).map((r) => ({ + ...r.toJSON(), + vector: r.vector ? Array.from(r.vector) : null, + })); + // Rust fills missing nullable fields with null + expect(res).toEqual([ + { ...data1, vector: null }, + { ...data2, item: null }, + data3, + { ...data4, price: null, vector: null }, + ]); }); it("should be able to insert nullable data for non-nullable fields", async () => { @@ -330,6 +342,43 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( const table = await db.createTable("my_table", data); expect(await table.countRows()).toEqual(2); }); + + it("should allow undefined and omitted nullable vector fields", async () => { + // Test for the bug: can't pass undefined or omit vector column + const db = await connect("memory://"); + const schema = new arrow.Schema([ + new arrow.Field("id", new arrow.Int32(), true), + new arrow.Field( + "vector", + new arrow.FixedSizeList( + 32, + new arrow.Field("item", new arrow.Float32(), true), + ), + true, // nullable = true + ), + ]); + const table = await db.createEmptyTable("test_table", schema); + + // Should not throw error for undefined value + await table.add([{ id: 0, vector: undefined }]); + + // Should not throw error for omitted field + await table.add([{ id: 1 }]); + + // Should still work for null + await table.add([{ id: 2, vector: null }]); + + // Should still work for actual vector + const testVector = new Array(32).fill(0.5); + await table.add([{ id: 3, vector: testVector }]); + expect(await table.countRows()).toEqual(4); + + const res = await table.query().limit(10).toArray(); + const resVector = res.map((r) => + r.vector ? Array.from(r.vector) : null, + ); + expect(resVector).toEqual([null, null, null, testVector]); + }); }, ); @@ -1454,7 +1503,9 @@ describe("when optimizing a dataset", () => { it("delete unverified", async () => { const version = await table.version(); - const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${version - 1}.manifest`; + const versionFile = `${tmpDir.name}/${table.name}.lance/_versions/${ + version - 1 + }.manifest`; fs.rmSync(versionFile); let stats = await table.optimize({ deleteUnverified: false }); diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 9c1d9575..90cefc0f 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -1285,19 +1285,36 @@ function validateSchemaEmbeddings( if (isFixedSizeList(field.type)) { field = sanitizeField(field); if (data.length !== 0 && data?.[0]?.[field.name] === undefined) { + // Check if there's an embedding function registered for this field + let hasEmbeddingFunction = false; + + // Check schema metadata for embedding functions if (schema.metadata.has("embedding_functions")) { const embeddings = JSON.parse( schema.metadata.get("embedding_functions")!, ); - if ( - // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f` - embeddings.find((f: any) => f["vectorColumn"] === field.name) === - undefined - ) { + // biome-ignore lint/suspicious/noExplicitAny: we don't know the type of `f` + if (embeddings.find((f: any) => f["vectorColumn"] === field.name)) { + hasEmbeddingFunction = true; + } + } + + // Check passed embedding function parameter + if (embeddings && embeddings.vectorColumn === field.name) { + hasEmbeddingFunction = true; + } + + // If the field is nullable AND there's no embedding function, allow undefined/omitted values + if (field.nullable && !hasEmbeddingFunction) { + fields.push(field); + } else { + // Either not nullable OR has embedding function - require explicit values + if (hasEmbeddingFunction) { + // Don't add to missingEmbeddingFields since this is expected to be filled by embedding function + fields.push(field); + } else { missingEmbeddingFields.push(field); } - } else { - missingEmbeddingFields.push(field); } } else { fields.push(field);