mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-23 23:10:40 +00:00
nodejs append records api (#85)
This commit is contained in:
66
node/src/arrow.ts
Normal file
66
node/src/arrow.ts
Normal file
@@ -0,0 +1,66 @@
|
||||
// Copyright 2023 Lance Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
Field,
|
||||
Float32,
|
||||
List,
|
||||
makeBuilder,
|
||||
RecordBatchFileWriter,
|
||||
Table,
|
||||
type Vector,
|
||||
vectorFromArray
|
||||
} from 'apache-arrow'
|
||||
|
||||
export function convertToTable (data: Array<Record<string, unknown>>): Table {
|
||||
if (data.length === 0) {
|
||||
throw new Error('At least one record needs to be provided')
|
||||
}
|
||||
|
||||
const columns = Object.keys(data[0])
|
||||
const records: Record<string, Vector> = {}
|
||||
|
||||
for (const columnsKey of columns) {
|
||||
if (columnsKey === 'vector') {
|
||||
const children = new Field<Float32>('item', new Float32())
|
||||
const list = new List(children)
|
||||
const listBuilder = makeBuilder({
|
||||
type: list
|
||||
})
|
||||
const vectorSize = (data[0].vector as any[]).length
|
||||
for (const datum of data) {
|
||||
if ((datum[columnsKey] as any[]).length !== vectorSize) {
|
||||
throw new Error(`Invalid vector size, expected ${vectorSize}`)
|
||||
}
|
||||
|
||||
listBuilder.append(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = listBuilder.finish().toVector()
|
||||
} else {
|
||||
const values = []
|
||||
for (const datum of data) {
|
||||
values.push(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = vectorFromArray(values)
|
||||
}
|
||||
}
|
||||
|
||||
return new Table(records)
|
||||
}
|
||||
|
||||
export async function fromRecordsToBuffer (data: Array<Record<string, unknown>>): Promise<Buffer> {
|
||||
const table = convertToTable(data)
|
||||
const writer = RecordBatchFileWriter.writeAll(table)
|
||||
return Buffer.from(await writer.toUint8Array())
|
||||
}
|
||||
@@ -13,19 +13,15 @@
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
Field,
|
||||
Float32,
|
||||
List,
|
||||
makeBuilder,
|
||||
RecordBatchFileWriter,
|
||||
Table as ArrowTable,
|
||||
type Table as ArrowTable,
|
||||
tableFromIPC,
|
||||
Vector,
|
||||
vectorFromArray
|
||||
Vector
|
||||
} from 'apache-arrow'
|
||||
import { fromRecordsToBuffer } from './arrow'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../native.js')
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch, tableAdd } = require('../native.js')
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI
|
||||
@@ -68,40 +64,7 @@ export class Connection {
|
||||
}
|
||||
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
|
||||
if (data.length === 0) {
|
||||
throw new Error('At least one record needs to be provided')
|
||||
}
|
||||
|
||||
const columns = Object.keys(data[0])
|
||||
const records: Record<string, Vector> = {}
|
||||
|
||||
for (const columnsKey of columns) {
|
||||
if (columnsKey === 'vector') {
|
||||
const children = new Field<Float32>('item', new Float32())
|
||||
const list = new List(children)
|
||||
const listBuilder = makeBuilder({
|
||||
type: list
|
||||
})
|
||||
const vectorSize = (data[0].vector as any[]).length
|
||||
for (const datum of data) {
|
||||
if ((datum[columnsKey] as any[]).length !== vectorSize) {
|
||||
throw new Error(`Invalid vector size, expected ${vectorSize}`)
|
||||
}
|
||||
|
||||
listBuilder.append(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = listBuilder.finish().toVector()
|
||||
} else {
|
||||
const values = []
|
||||
for (const datum of data) {
|
||||
values.push(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = vectorFromArray(values)
|
||||
}
|
||||
}
|
||||
|
||||
const table = new ArrowTable(records)
|
||||
await this.createTableArrow(name, table)
|
||||
await tableCreate.call(this._db, name, await fromRecordsToBuffer(data))
|
||||
return await this.openTable(name)
|
||||
}
|
||||
|
||||
@@ -135,6 +98,21 @@ export class Table {
|
||||
search (queryVector: number[]): Query {
|
||||
return new Query(this._tbl, queryVector)
|
||||
}
|
||||
|
||||
/**
|
||||
* Insert records into this Table
|
||||
* @param data Records to be inserted into the Table
|
||||
*
|
||||
* @param mode Append / Overwrite existing records. Default: Append
|
||||
* @return The number of rows added to the table
|
||||
*/
|
||||
async add (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Append.toString())
|
||||
}
|
||||
|
||||
async overwrite (data: Array<Record<string, unknown>>): Promise<number> {
|
||||
return tableAdd.call(this._tbl, await fromRecordsToBuffer(data), WriteMode.Overwrite.toString())
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -194,3 +172,8 @@ export class Query {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export enum WriteMode {
|
||||
Overwrite = 'overwrite',
|
||||
Append = 'append'
|
||||
}
|
||||
|
||||
@@ -90,6 +90,45 @@ describe('LanceDB client', function () {
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
})
|
||||
|
||||
it('appends records to an existing table ', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
||||
]
|
||||
|
||||
const table = await con.createTable('vectors', data)
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
|
||||
const dataAdd = [
|
||||
{ id: 3, vector: [2.1, 2.2], price: 10 },
|
||||
{ id: 4, vector: [3.1, 3.2], price: 50 }
|
||||
]
|
||||
await table.add(dataAdd)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 4)
|
||||
})
|
||||
|
||||
it('overwrite all records in a table', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
|
||||
const dataOver = [
|
||||
{ vector: [2.1, 2.2], price: 10, name: 'foo' },
|
||||
{ vector: [3.1, 3.2], price: 50, name: 'bar' }
|
||||
]
|
||||
await table.overwrite(dataOver)
|
||||
const resultsAdd = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(resultsAdd.length, 2)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user