diff --git a/nodejs/__test__/arrow.test.ts b/nodejs/__test__/arrow.test.ts index e9e07c867..417c51c30 100644 --- a/nodejs/__test__/arrow.test.ts +++ b/nodejs/__test__/arrow.test.ts @@ -63,6 +63,7 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( tableFromIPC, DataType, Dictionary, + Uint8: ArrowUint8, // biome-ignore lint/suspicious/noExplicitAny: } = arrow; type Schema = ApacheArrow["Schema"]; @@ -362,6 +363,38 @@ describe.each([arrow15, arrow16, arrow17, arrow18])( ).toEqual(new Float64().toString()); }); + it("will infer FixedSizeList from Float32Array values", async function () { + const table = makeArrowTable([ + { id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) }, + { id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) }, + ]); + + expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe( + true, + ); + const vectorType = table.getChild("vector")?.type; + expect(vectorType.listSize).toBe(3); + expect(vectorType.children[0].type.toString()).toEqual( + new Float32().toString(), + ); + }); + + it("will infer FixedSizeList from Uint8Array values", async function () { + const table = makeArrowTable([ + { id: "a", vector: new Uint8Array([1, 2, 3]) }, + { id: "b", vector: new Uint8Array([4, 5, 6]) }, + ]); + + expect(DataType.isFixedSizeList(table.getChild("vector")?.type)).toBe( + true, + ); + const vectorType = table.getChild("vector")?.type; + expect(vectorType.listSize).toBe(3); + expect(vectorType.children[0].type.toString()).toEqual( + new ArrowUint8().toString(), + ); + }); + it("will use dictionary encoded strings if asked", async function () { const table = makeArrowTable([{ str: "hello" }]); expect(DataType.isUtf8(table.getChild("str")?.type)).toBe(true); diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index dc741dde6..0930ffc8a 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -2204,3 +2204,36 @@ describe("when creating an empty table", () => { expect((actualSchema.fields[1].type as Float64).precision).toBe(2); }); }); + +// Ensure we can create float32 arrays without using Arrow +// by utilizing native JS TypedArray support +// +// https://github.com/lancedb/lancedb/issues/3115 +describe("when creating a table with Float32Array vectors", () => { + let tmpDir: tmp.DirResult; + beforeEach(() => { + tmpDir = tmp.dirSync({ unsafeCleanup: true }); + }); + afterEach(() => { + tmpDir.removeCallback(); + }); + + it("should persist Float32Array as FixedSizeList in the LanceDB schema", async () => { + const db = await connect(tmpDir.name); + const table = await db.createTable("test", [ + { id: "a", vector: new Float32Array([0.1, 0.2, 0.3]) }, + { id: "b", vector: new Float32Array([0.4, 0.5, 0.6]) }, + ]); + + const schema = await table.schema(); + const vectorField = schema.fields.find((f) => f.name === "vector"); + expect(vectorField).toBeDefined(); + expect(vectorField!.type).toBeInstanceOf(FixedSizeList); + + const fsl = vectorField!.type as FixedSizeList; + expect(fsl.listSize).toBe(3); + expect(fsl.children[0].type.typeId).toBe(Type.Float); + // precision: HALF=0, SINGLE=1, DOUBLE=2 + expect((fsl.children[0].type as Float32).precision).toBe(1); + }); +}); diff --git a/nodejs/lancedb/arrow.ts b/nodejs/lancedb/arrow.ts index 0d1a25369..7fff42f47 100644 --- a/nodejs/lancedb/arrow.ts +++ b/nodejs/lancedb/arrow.ts @@ -20,6 +20,8 @@ import { Float32, Float64, Int, + Int8, + Int16, Int32, Int64, LargeBinary, @@ -35,6 +37,8 @@ import { Timestamp, Type, Uint8, + Uint16, + Uint32, Utf8, Vector, makeVector as arrowMakeVector, @@ -529,7 +533,8 @@ function isObject(value: unknown): value is Record { !(value instanceof Date) && !(value instanceof Set) && !(value instanceof Map) && - !(value instanceof Buffer) + !(value instanceof Buffer) && + !ArrayBuffer.isView(value) ); } @@ -588,6 +593,13 @@ function inferType( return new Bool(); } else if (value instanceof Buffer) { return new Binary(); + } else if (ArrayBuffer.isView(value) && !(value instanceof DataView)) { + const info = typedArrayToArrowType(value); + if (info !== undefined) { + const child = new Field("item", info.elementType, true); + return new FixedSizeList(info.length, child); + } + return undefined; } else if (Array.isArray(value)) { if (value.length === 0) { return undefined; // Without any values we can't infer the type @@ -746,6 +758,32 @@ function makeListVector(lists: unknown[][]): Vector { return listBuilder.finish().toVector(); } +/** + * Map a JS TypedArray instance to the corresponding Arrow element DataType + * and its length. Returns undefined if the value is not a recognized TypedArray. + */ +function typedArrayToArrowType( + value: ArrayBufferView, +): { elementType: DataType; length: number } | undefined { + if (value instanceof Float32Array) + return { elementType: new Float32(), length: value.length }; + if (value instanceof Float64Array) + return { elementType: new Float64(), length: value.length }; + if (value instanceof Uint8Array) + return { elementType: new Uint8(), length: value.length }; + if (value instanceof Uint16Array) + return { elementType: new Uint16(), length: value.length }; + if (value instanceof Uint32Array) + return { elementType: new Uint32(), length: value.length }; + if (value instanceof Int8Array) + return { elementType: new Int8(), length: value.length }; + if (value instanceof Int16Array) + return { elementType: new Int16(), length: value.length }; + if (value instanceof Int32Array) + return { elementType: new Int32(), length: value.length }; + return undefined; +} + /** Helper function to convert an Array of JS values to an Arrow Vector */ function makeVector( values: unknown[], @@ -814,6 +852,16 @@ function makeVector( "makeVector cannot infer the type if all values are null or undefined", ); } + if (ArrayBuffer.isView(sampleValue) && !(sampleValue instanceof DataView)) { + const info = typedArrayToArrowType(sampleValue); + if (info !== undefined) { + const fslType = new FixedSizeList( + info.length, + new Field("item", info.elementType, true), + ); + return vectorFromArray(values, fslType); + } + } if (Array.isArray(sampleValue)) { // Default Arrow inference doesn't handle list types return makeListVector(values as unknown[][]);