fix: wrap in BigInt to avoid upstream bug (#962)

Closes #960
This commit is contained in:
Will Jones
2024-02-13 08:13:56 -08:00
committed by Weston Pace
parent f78fe721db
commit 68115f1369
3 changed files with 35 additions and 11 deletions

View File

@@ -13,6 +13,7 @@
// limitations under the License.
import {
Int64,
Field,
type FixedSizeListBuilder,
Float32,
@@ -30,7 +31,8 @@ import {
RecordBatch,
makeData,
Struct,
type Float
type Float,
type DataType
} from 'apache-arrow'
import { type EmbeddingFunction } from './index'
@@ -142,15 +144,18 @@ export function makeArrowTable (
// TODO: sample dataset to find missing columns
const columnNames = Object.keys(data[0])
for (const colName of columnNames) {
const values = data.map((datum) => datum[colName])
let values = data.map((datum) => datum[colName])
let vector: Vector
if (opt.schema !== undefined) {
// Explicit schema is provided, highest priority
vector = vectorFromArray(
values,
opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
)
const fieldType: DataType | undefined = opt.schema.fields.filter((f) => f.name === colName)[0]?.type as DataType
if (fieldType instanceof Int64) {
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
values = values.map((v) => BigInt(v))
}
vector = vectorFromArray(values, fieldType)
} else {
const vectorColumnOptions = opt.vectorColumns[colName]
if (vectorColumnOptions !== undefined) {

View File

@@ -14,6 +14,7 @@
import { makeArrowTable, toBuffer } from "../vectordb/arrow";
import {
Int64,
Field,
FixedSizeList,
Float16,
@@ -104,3 +105,16 @@ test("2 vector columns", function () {
const actualSchema = actual.schema;
expect(actualSchema.toString()).toEqual(schema.toString());
});
test("handles int64", function() {
// https://github.com/lancedb/lancedb/issues/960
const schema = new Schema([
new Field("x", new Int64(), true)
]);
const table = makeArrowTable([
{ x: 1 },
{ x: 2 },
{ x: 3 }
], { schema });
expect(table.schema).toEqual(schema);
})

View File

@@ -13,6 +13,7 @@
// limitations under the License.
import {
Int64,
Field,
FixedSizeList,
Float,
@@ -23,6 +24,7 @@ import {
Vector,
vectorFromArray,
tableToIPC,
DataType,
} from "apache-arrow";
/** Data type accepted by NodeJS SDK */
@@ -137,15 +139,18 @@ export function makeArrowTable(
const columnNames = Object.keys(data[0]);
for (const colName of columnNames) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
const values = data.map((datum) => datum[colName]);
let values = data.map((datum) => datum[colName]);
let vector: Vector;
if (opt.schema !== undefined) {
// Explicit schema is provided, highest priority
vector = vectorFromArray(
values,
opt.schema?.fields.filter((f) => f.name === colName)[0]?.type
);
const fieldType: DataType | undefined = opt.schema.fields.filter((f) => f.name === colName)[0]?.type as DataType;
if (fieldType instanceof Int64) {
// wrap in BigInt to avoid bug: https://github.com/apache/arrow/issues/40051
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
values = values.map((v) => BigInt(v));
}
vector = vectorFromArray(values, fieldType);
} else {
const vectorColumnOptions = opt.vectorColumns[colName];
if (vectorColumnOptions !== undefined) {