mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-07 20:32:59 +00:00
bugfix: string columns should be converted to Utf8Array (#94)
This commit is contained in:
@@ -18,7 +18,7 @@ import {
|
||||
List,
|
||||
makeBuilder,
|
||||
RecordBatchFileWriter,
|
||||
Table,
|
||||
Table, Utf8,
|
||||
type Vector,
|
||||
vectorFromArray
|
||||
} from 'apache-arrow'
|
||||
@@ -52,7 +52,12 @@ export function convertToTable (data: Array<Record<string, unknown>>): Table {
|
||||
for (const datum of data) {
|
||||
values.push(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = vectorFromArray(values)
|
||||
if (typeof values[0] === 'string') {
|
||||
// `vectorFromArray` converts strings into dictionary vectors, forcing it back to a string column
|
||||
records[columnsKey] = vectorFromArray(values, new Utf8())
|
||||
} else {
|
||||
records[columnsKey] = vectorFromArray(values)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -96,8 +96,8 @@ describe('LanceDB client', function () {
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10, name: 'a' },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50, name: 'b' }
|
||||
]
|
||||
|
||||
const table = await con.createTable('vectors', data)
|
||||
@@ -105,8 +105,8 @@ describe('LanceDB client', function () {
|
||||
assert.equal(results.length, 2)
|
||||
|
||||
const dataAdd = [
|
||||
{ id: 3, vector: [2.1, 2.2], price: 10 },
|
||||
{ id: 4, vector: [3.1, 3.2], price: 50 }
|
||||
{ id: 3, vector: [2.1, 2.2], price: 10, name: 'c' },
|
||||
{ id: 4, vector: [3.1, 3.2], price: 50, name: 'd' }
|
||||
]
|
||||
await table.add(dataAdd)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
|
||||
Reference in New Issue
Block a user