diff --git a/node/src/arrow.ts b/node/src/arrow.ts index 0a593088..36f278e6 100644 --- a/node/src/arrow.ts +++ b/node/src/arrow.ts @@ -18,7 +18,7 @@ import { List, makeBuilder, RecordBatchFileWriter, - Table, + Table, Utf8, type Vector, vectorFromArray } from 'apache-arrow' @@ -52,7 +52,12 @@ export function convertToTable (data: Array>): Table { for (const datum of data) { values.push(datum[columnsKey]) } - records[columnsKey] = vectorFromArray(values) + if (typeof values[0] === 'string') { + // `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column + records[columnsKey] = vectorFromArray(values, new Utf8()) + } else { + records[columnsKey] = vectorFromArray(values) + } } } diff --git a/node/src/test/test.ts b/node/src/test/test.ts index 9ab570e7..b185e17d 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -96,8 +96,8 @@ describe('LanceDB client', function () { const con = await lancedb.connect(dir) const data = [ - { id: 1, vector: [0.1, 0.2], price: 10 }, - { id: 2, vector: [1.1, 1.2], price: 50 } + { id: 1, vector: [0.1, 0.2], price: 10, name: 'a' }, + { id: 2, vector: [1.1, 1.2], price: 50, name: 'b' } ] const table = await con.createTable('vectors', data) @@ -105,8 +105,8 @@ describe('LanceDB client', function () { assert.equal(results.length, 2) const dataAdd = [ - { id: 3, vector: [2.1, 2.2], price: 10 }, - { id: 4, vector: [3.1, 3.2], price: 50 } + { id: 3, vector: [2.1, 2.2], price: 10, name: 'c' }, + { id: 4, vector: [3.1, 3.2], price: 50, name: 'd' } ] await table.add(dataAdd) const resultsAdd = await table.search([0.1, 0.3]).execute()