diff --git a/node/src/arrow.ts b/node/src/arrow.ts index 09f83116..7d427596 100644 --- a/node/src/arrow.ts +++ b/node/src/arrow.ts @@ -13,6 +13,7 @@ // limitations under the License. import { + Int64, Field, type FixedSizeListBuilder, Float32, @@ -30,7 +31,8 @@ import { RecordBatch, makeData, Struct, - type Float + type Float, + type DataType } from 'apache-arrow' import { type EmbeddingFunction } from './index' @@ -142,15 +144,18 @@ export function makeArrowTable ( // TODO: sample dataset to find missing columns const columnNames = Object.keys(data[0]) for (const colName of columnNames) { - const values = data.map((datum) => datum[colName]) + let values = data.map((datum) => datum[colName]) let vector: Vector if (opt.schema !== undefined) { // Explicit schema is provided, highest priority - vector = vectorFromArray( - values, - opt.schema?.fields.filter((f) => f.name === colName)[0]?.type - ) + const fieldType: DataType | undefined = opt.schema.fields.filter((f) => f.name === colName)[0]?.type as DataType + if (fieldType instanceof Int64) { + // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051 + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + values = values.map((v) => BigInt(v)) + } + vector = vectorFromArray(values, fieldType) } else { const vectorColumnOptions = opt.vectorColumns[colName] if (vectorColumnOptions !== undefined) { diff --git a/nodejs/__test__/arrow.test.ts b/nodejs/__test__/arrow.test.ts index 28b598ee..907e25b1 100644 --- a/nodejs/__test__/arrow.test.ts +++ b/nodejs/__test__/arrow.test.ts @@ -14,6 +14,7 @@ import { makeArrowTable, toBuffer } from "../vectordb/arrow"; import { + Int64, Field, FixedSizeList, Float16, @@ -104,3 +105,16 @@ test("2 vector columns", function () { const actualSchema = actual.schema; expect(actualSchema.toString()).toEqual(schema.toString()); }); + +test("handles int64", function() { + // https://github.com/lancedb/lancedb/issues/960 + const schema = new Schema([ + new Field("x", new Int64(), true) + ]); + const table = makeArrowTable([ + { x: 1 }, + { x: 2 }, + { x: 3 } + ], { schema }); + expect(table.schema).toEqual(schema); +}) \ No newline at end of file diff --git a/nodejs/vectordb/arrow.ts b/nodejs/vectordb/arrow.ts index d0456f12..1923eaf0 100644 --- a/nodejs/vectordb/arrow.ts +++ b/nodejs/vectordb/arrow.ts @@ -13,6 +13,7 @@ // limitations under the License. import { + Int64, Field, FixedSizeList, Float, @@ -23,6 +24,7 @@ import { Vector, vectorFromArray, tableToIPC, + DataType, } from "apache-arrow"; /** Data type accepted by NodeJS SDK */ @@ -137,15 +139,18 @@ export function makeArrowTable( const columnNames = Object.keys(data[0]); for (const colName of columnNames) { // eslint-disable-next-line @typescript-eslint/no-unsafe-return - const values = data.map((datum) => datum[colName]); + let values = data.map((datum) => datum[colName]); let vector: Vector; if (opt.schema !== undefined) { // Explicit schema is provided, highest priority - vector = vectorFromArray( - values, - opt.schema?.fields.filter((f) => f.name === colName)[0]?.type - ); + const fieldType: DataType | undefined = opt.schema.fields.filter((f) => f.name === colName)[0]?.type as DataType; + if (fieldType instanceof Int64) { + // wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051 + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + values = values.map((v) => BigInt(v)); + } + vector = vectorFromArray(values, fieldType); } else { const vectorColumnOptions = opt.vectorColumns[colName]; if (vectorColumnOptions !== undefined) {