nodejs create_table (#75)

This commit is contained in:
gsilvestrin
2023-05-15 19:00:17 -07:00
committed by GitHub
parent 92d810eac4
commit 395c7460d5
14 changed files with 404 additions and 90 deletions

View File

@@ -12,16 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
import { tableFromIPC, Vector } from 'apache-arrow'
import {
Field,
Float32,
List,
makeBuilder,
RecordBatchFileWriter,
Table as ArrowTable,
tableFromIPC,
Vector,
vectorFromArray
} from 'apache-arrow'
// eslint-disable-next-line @typescript-eslint/no-var-requires
const { databaseNew, databaseTableNames, databaseOpenTable, tableSearch } = require('../index.node')
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../index.node')
/**
* Connect to a LanceDB instance at the given URI
* @param uri The uri of the database.
*/
export function connect (uri: string): Connection {
export async function connect (uri: string): Promise<Connection> {
return new Connection(uri)
}
@@ -44,7 +54,7 @@ export class Connection {
/**
* Get the names of all tables in the database.
*/
tableNames (): string[] {
async tableNames (): Promise<string[]> {
return databaseTableNames.call(this._db)
}
@@ -56,6 +66,50 @@ export class Connection {
const tbl = await databaseOpenTable.call(this._db, name)
return new Table(tbl, name)
}
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
if (data.length === 0) {
throw new Error('At least one record needs to be provided')
}
const columns = Object.keys(data[0])
const records: Record<string, Vector> = {}
for (const columnsKey of columns) {
if (columnsKey === 'vector') {
const children = new Field<Float32>('item', new Float32())
const list = new List(children)
const listBuilder = makeBuilder({
type: list
})
const vectorSize = (data[0].vector as any[]).length
for (const datum of data) {
if ((datum[columnsKey] as any[]).length !== vectorSize) {
throw new Error(`Invalid vector size, expected ${vectorSize}`)
}
listBuilder.append(datum[columnsKey])
}
records[columnsKey] = listBuilder.finish().toVector()
} else {
const values = []
for (const datum of data) {
values.push(datum[columnsKey])
}
records[columnsKey] = vectorFromArray(values)
}
}
const table = new ArrowTable(records)
await this.createTableArrow(name, table)
return await this.openTable(name)
}
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
const writer = RecordBatchFileWriter.writeAll(table)
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
return await this.openTable(name)
}
}
/**
@@ -93,7 +147,7 @@ export class Query {
private readonly _refine_factor?: number
private readonly _nprobes: number
private readonly _columns?: string[]
private readonly _where?: string
private _filter?: string
private readonly _metric = 'L2'
constructor (tbl: any, queryVector: number[]) {
@@ -103,22 +157,29 @@ export class Query {
this._nprobes = 20
this._refine_factor = undefined
this._columns = undefined
this._where = undefined
this._filter = undefined
}
set limit (value: number) {
limit (value: number): Query {
this._limit = value
return this
}
get limit (): number {
return this._limit
filter (value: string): Query {
this._filter = value
return this
}
/**
* Execute the query and return the results as an Array of Objects
*/
async execute (): Promise<unknown[]> {
const buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
async execute<T = Record<string, unknown>> (): Promise<T[]> {
let buffer;
if (this._filter != null) {
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter)
} else {
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
}
const data = tableFromIPC(buffer)
return data.toArray().map((entry: Record<string, unknown>) => {
const newObject: Record<string, unknown> = {}
@@ -129,14 +190,7 @@ export class Query {
newObject[key] = entry[key]
}
})
return newObject
return newObject as unknown as T
})
}
/**
* Execute the query and return the results as an Array of the generic type provided
*/
async execute_cast<T>(): Promise<T[]> {
return await this.execute() as T[]
}
}

View File

@@ -14,67 +14,94 @@
import { describe } from 'mocha'
import { assert } from 'chai'
import { track } from 'temp'
import * as lancedb from '../index'
describe('LanceDB client', function () {
describe('open a connection to lancedb', function () {
const con = lancedb.connect('.../../sample-lancedb')
it('should have a valid url', function () {
assert.equal(con.uri, '.../../sample-lancedb')
describe('when creating a connection to lancedb', function () {
it('should have a valid url', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
assert.equal(con.uri, uri)
})
it('should return the existing table names', function () {
assert.deepEqual(con.tableNames(), ['my_table'])
it('should return the existing table names', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
assert.deepEqual(await con.tableNames(), ['vectors'])
})
})
describe('when querying an existing dataset', function () {
it('should open a table', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
assert.equal(table.name, 'vectors')
})
describe('open a table from a connection', function () {
const tablePromise = con.openTable('my_table')
it('execute a query', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).execute()
it('should have a valid name', async function () {
const table = await tablePromise
assert.equal(table.name, 'my_table')
})
assert.equal(results.length, 2)
assert.equal(results[0].price, 10)
const vector = results[0].vector as Float32Array
assert.approximately(vector[0], 0.0, 0.2)
assert.approximately(vector[0], 0.1, 0.3)
})
class MyResult {
vector: Float32Array = new Float32Array(0)
price: number = 0
item: string = ''
}
it('limits # of results', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).limit(1).execute()
assert.equal(results.length, 1)
assert.equal(results[0].id, 1)
})
it('execute a query', async function () {
const table = await tablePromise
const builder = table.search([0.1, 0.3])
const results = await builder.execute() as MyResult[]
it('uses a filter', async function () {
const uri = await createTestDB()
const con = await lancedb.connect(uri)
const table = await con.openTable('vectors')
const results = await table.search([0.1, 0.3]).filter('id == 2').execute()
assert.equal(results.length, 1)
assert.equal(results[0].id, 2)
})
})
assert.equal(results.length, 2)
assert.equal(results[0].item, 'foo')
assert.equal(results[0].price, 10)
assert.approximately(results[0].vector[0], 3.1, 0.1)
assert.approximately(results[0].vector[1], 4.1, 0.1)
})
describe('when creating a new dataset', function () {
it('creates a new table from javascript objects', async function () {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
it('execute a query and type cast the result', async function () {
const table = await tablePromise
const data = [
{ id: 1, vector: [0.1, 0.2], price: 10 },
{ id: 2, vector: [1.1, 1.2], price: 50 }
]
const builder = table.search([0.1, 0.3])
const results = await builder.execute_cast<MyResult>()
assert.equal(results.length, 2)
assert.equal(results[0].item, 'foo')
assert.equal(results[0].price, 10)
assert.approximately(results[0].vector[0], 3.1, 0.1)
assert.approximately(results[0].vector[1], 4.1, 0.1)
})
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
const table = await con.createTable(tableName, data)
assert.equal(table.name, tableName)
it('limits # of results', async function () {
const table = await tablePromise
const builder = table.search([0.1, 0.3])
builder.limit = 1
const results = await builder.execute() as MyResult[]
assert.equal(results.length, 1)
})
const results = await table.search([0.1, 0.3]).execute()
assert.equal(results.length, 2)
})
})
})
async function createTestDB (): Promise<string> {
const dir = await track().mkdir('lancejs')
const con = await lancedb.connect(dir)
const data = [
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
]
await con.createTable('vectors', data)
return dir
}