diff --git a/Cargo.lock b/Cargo.lock index f219d625..71da4efe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4135,7 +4135,7 @@ dependencies = [ [[package]] name = "lancedb" -version = "0.19.1-beta.0" +version = "0.19.1-beta.1" dependencies = [ "arrow", "arrow-array", @@ -4222,7 +4222,7 @@ dependencies = [ [[package]] name = "lancedb-node" -version = "0.19.1-beta.0" +version = "0.19.1-beta.1" dependencies = [ "arrow-array", "arrow-ipc", @@ -4247,7 +4247,7 @@ dependencies = [ [[package]] name = "lancedb-nodejs" -version = "0.19.1-beta.0" +version = "0.19.1-beta.1" dependencies = [ "arrow-array", "arrow-ipc", @@ -4266,7 +4266,7 @@ dependencies = [ [[package]] name = "lancedb-python" -version = "0.22.1-beta.0" +version = "0.22.1-beta.1" dependencies = [ "arrow", "env_logger", diff --git a/nodejs/__test__/arrow.test.ts b/nodejs/__test__/arrow.test.ts index 0cb23866..78aef4c8 100644 --- a/nodejs/__test__/arrow.test.ts +++ b/nodejs/__test__/arrow.test.ts @@ -374,6 +374,71 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( expect(table2.numRows).toBe(4); expect(table2.schema).toEqual(schema); }); + + it("should correctly retain values in nested struct fields", async function () { + // Define test data with nested struct + const testData = [ + { + id: "doc1", + vector: [1, 2, 3], + metadata: { + filePath: "/path/to/file1.ts", + startLine: 10, + endLine: 20, + text: "function test() { return true; }", + }, + }, + { + id: "doc2", + vector: [4, 5, 6], + metadata: { + filePath: "/path/to/file2.ts", + startLine: 30, + endLine: 40, + text: "function test2() { return false; }", + }, + }, + ]; + + // Create Arrow table from the data + const table = makeArrowTable(testData); + + // Verify schema has the nested struct fields + const metadataField = table.schema.fields.find( + (f) => f.name === "metadata", + ); + expect(metadataField).toBeDefined(); + // biome-ignore lint/suspicious/noExplicitAny: accessing fields in different Arrow versions + const childNames = metadataField?.type.children.map((c: any) => c.name); + expect(childNames).toEqual([ + "filePath", + "startLine", + "endLine", + "text", + ]); + + // Convert to buffer and back (simulating storage and retrieval) + const buf = await fromTableToBuffer(table); + const retrievedTable = tableFromIPC(buf); + + // Verify the retrieved table has the same structure + const rows = []; + for (let i = 0; i < retrievedTable.numRows; i++) { + rows.push(retrievedTable.get(i)); + } + + // Check values in the first row + const firstRow = rows[0]; + expect(firstRow.id).toBe("doc1"); + expect(firstRow.vector.toJSON()).toEqual([1, 2, 3]); + + // Verify metadata values are preserved (this is where the bug is) + expect(firstRow.metadata).toBeDefined(); + expect(firstRow.metadata.filePath).toBe("/path/to/file1.ts"); + expect(firstRow.metadata.startLine).toBe(10); + expect(firstRow.metadata.endLine).toBe(20); + expect(firstRow.metadata.text).toBe("function test() { return true; }"); + }); }); class DummyEmbedding extends EmbeddingFunction { diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 4b3be487..944f62f7 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -639,8 +639,9 @@ function transposeData( ): Vector { if (field.type instanceof Struct) { const childFields = field.type.children; + const fullPath = [...path, field.name]; const childVectors = childFields.map((child) => { - return transposeData(data, child, [...path, child.name]); + return transposeData(data, child, fullPath); }); const structData = makeData({ type: field.type, @@ -652,7 +653,14 @@ function transposeData( const values = data.map((datum) => { let current: unknown = datum; for (const key of valuesPath) { - if (isObject(current) && Object.hasOwn(current, key)) { + if (current == null) { + return null; + } + + if ( + isObject(current) && + (Object.hasOwn(current, key) || key in current) + ) { current = current[key]; } else { return null;