nodejs append records api (#85)

This commit is contained in:
gsilvestrin
2023-05-18 15:13:57 -07:00
committed by GitHub
parent 61b9479bd9
commit e28fe7b468
8 changed files with 283 additions and 73 deletions

66
node/src/arrow.ts Normal file
View File

@@ -0,0 +1,66 @@
// Copyright 2023 Lance Developers.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import {
Field,
Float32,
List,
makeBuilder,
RecordBatchFileWriter,
Table,
type Vector,
vectorFromArray
} from 'apache-arrow'
export function convertToTable (data: Array<Record<string, unknown>>): Table {
if (data.length === 0) {
throw new Error('At least one record needs to be provided')
}
const columns = Object.keys(data[0])
const records: Record<string, Vector> = {}
for (const columnsKey of columns) {
if (columnsKey === 'vector') {
const children = new Field<Float32>('item', new Float32())
const list = new List(children)
const listBuilder = makeBuilder({
type: list
})
const vectorSize = (data[0].vector as any[]).length
for (const datum of data) {
if ((datum[columnsKey] as any[]).length !== vectorSize) {
throw new Error(`Invalid vector size, expected ${vectorSize}`)
}
listBuilder.append(datum[columnsKey])
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
const values = []
for (const datum of data) {
values.push(datum[columnsKey])
}
records[columnsKey] = vectorFromArray(values)
}
}
return new Table(records)
}
export async function fromRecordsToBuffer (data: Array<Record<string, unknown>>): Promise<Buffer> {
const table = convertToTable(data)
const writer = RecordBatchFileWriter.writeAll(table)
return Buffer.from(await writer.toUint8Array())
}

View File

@@ -13,19 +13,15 @@
// limitations under the License.
import {
Field,
Float32,
List,
makeBuilder,
RecordBatchFileWriter,
Table as ArrowTable,
type Table as ArrowTable,
tableFromIPC,
Vector,
vectorFromArray
Vector
} from 'apache-arrow'
import { fromRecordsToBuffer } from './arrow'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js')
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd } = require('../native.js')
/**
* Connect to a LanceDB instance at the given URI
@@ -68,40 +64,7 @@ export class Connection {
}
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
if (data.length === 0) {
throw new Error('At least one record needs to be provided')
}
const columns = Object.keys(data[0])
const records: Record<string, Vector> = {}
for (const columnsKey of columns) {
if (columnsKey === 'vector') {
const children = new Field<Float32>('item', new Float32())
const list = new List(children)
const listBuilder = makeBuilder({
type: list
})
const vectorSize = (data[0].vector as any[]).length
for (const datum of data) {
if ((datum[columnsKey] as any[]).length !== vectorSize) {
throw new Error(`Invalid vector size, expected ${vectorSize}`)
}
listBuilder.append(datum[columnsKey])
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
const values = []
for (const datum of data) {
values.push(datum[columnsKey])
}
records[columnsKey] = vectorFromArray(values)
}
}
const table = new ArrowTable(records)
await this.createTableArrow(name, table)
await tableCreate.call(this._db, name, await fromRecordsToBuffer(data))
return await this.openTable(name)
}
@@ -135,6 +98,21 @@ export class Table {
search (queryVector: number[]): Query {
return new Query(this._tbl, queryVector)
}
/**
* Insert records into this Table
* @param data Records to be inserted into the Table
*
* @param mode Append / Overwrite existing records. Default: Append
* @return The number of rows added to the table
*/
async add (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Append.toString())
}
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString())
}
}
/**
@@ -194,3 +172,8 @@ export class Query {
})
}
}
export enum WriteMode {
Overwrite = 'overwrite',
Append = 'append'
}

View File

@@ -90,6 +90,45 @@ describe('LanceDB client', function () {
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
})
it('appends records to an existing table ', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], price: 10 },
{ id: 2, vector: [1.1, 1.2], price: 50 }
]
const table = await con.createTable('vectors', data)
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
const dataAdd = [
{ id: 3, vector: [2.1, 2.2], price: 10 },
{ id: 4, vector: [3.1, 3.2], price: 50 }
]
await table.add(dataAdd)
const resultsAdd = await table.search([0.1, 0.3]).execute()
assert.equal(resultsAdd.length, 4)
})
it('overwrite all records in a table', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
const dataOver = [
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
]
await table.overwrite(dataOver)
const resultsAdd = await table.search([0.1, 0.3]).execute()
assert.equal(resultsAdd.length, 2)
})
})
})