feat(js): support list of string input (#755)

Add support for adding lists of string input (e.g., list of categorical
labels)

Follow-up items: #757 #758
This commit is contained in:
Chang She
2024-01-02 20:55:33 -08:00
committed by Weston Pace
parent 24afea8c56
commit cd791a366b
2 changed files with 46 additions and 1 deletions

View File

@@ -20,7 +20,7 @@ import {
Utf8,
type Vector,
FixedSizeList,
vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter
vectorFromArray, type Schema, Table as ArrowTable, RecordBatchStreamWriter, List, Float64
} from 'apache-arrow'
import { type EmbeddingFunction } from './index'
@@ -59,6 +59,24 @@ export async function convertToTable<T> (data: Array<Record<string, unknown>>, e
if (typeof values[0] === 'string') {
// `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column
records[columnsKey] = vectorFromArray(values, new Utf8())
} else if (Array.isArray(values[0])) {
const elementType = getElementType(values[0])
let innerType
if (elementType === 'string') {
innerType = new Utf8()
} else if (elementType === 'number') {
innerType = new Float64()
} else {
// TODO: pass in schema if it exists, else keep going to the next element
throw new Error(`Unsupported array element type ${elementType}`)
}
const listBuilder = makeBuilder({
type: new List(new Field('item', innerType, true))
})
for (const value of values) {
listBuilder.append(value)
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
records[columnsKey] = vectorFromArray(values)
}
@@ -68,6 +86,14 @@ export async function convertToTable<T> (data: Array<Record<string, unknown>>, e
return new ArrowTable(records)
}
function getElementType (arr: any[]): string {
if (arr.length === 0) {
return 'undefined'
}
return typeof arr[0]
}
// Creates a new Arrow ListBuilder that stores a Vector column
function newVectorBuilder (dim: number): FixedSizeListBuilder<Float32> {
return makeBuilder({