mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-27 08:50:39 +00:00
nodejs create_table (#75)
This commit is contained in:
@@ -12,16 +12,26 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
import { tableFromIPC, Vector } from 'apache-arrow'
|
||||
import {
|
||||
Field,
|
||||
Float32,
|
||||
List,
|
||||
makeBuilder,
|
||||
RecordBatchFileWriter,
|
||||
Table as ArrowTable,
|
||||
tableFromIPC,
|
||||
Vector,
|
||||
vectorFromArray
|
||||
} from 'apache-arrow'
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableSearch } = require('../index.node')
|
||||
const { databaseNew, databaseTableNames, databaseOpenTable, tableCreate, tableSearch } = require('../index.node')
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI
|
||||
* @param uri The uri of the database.
|
||||
*/
|
||||
export function connect (uri: string): Connection {
|
||||
export async function connect (uri: string): Promise<Connection> {
|
||||
return new Connection(uri)
|
||||
}
|
||||
|
||||
@@ -44,7 +54,7 @@ export class Connection {
|
||||
/**
|
||||
* Get the names of all tables in the database.
|
||||
*/
|
||||
tableNames (): string[] {
|
||||
async tableNames (): Promise<string[]> {
|
||||
return databaseTableNames.call(this._db)
|
||||
}
|
||||
|
||||
@@ -56,6 +66,50 @@ export class Connection {
|
||||
const tbl = await databaseOpenTable.call(this._db, name)
|
||||
return new Table(tbl, name)
|
||||
}
|
||||
|
||||
async createTable (name: string, data: Array<Record<string, unknown>>): Promise<Table> {
|
||||
if (data.length === 0) {
|
||||
throw new Error('At least one record needs to be provided')
|
||||
}
|
||||
|
||||
const columns = Object.keys(data[0])
|
||||
const records: Record<string, Vector> = {}
|
||||
|
||||
for (const columnsKey of columns) {
|
||||
if (columnsKey === 'vector') {
|
||||
const children = new Field<Float32>('item', new Float32())
|
||||
const list = new List(children)
|
||||
const listBuilder = makeBuilder({
|
||||
type: list
|
||||
})
|
||||
const vectorSize = (data[0].vector as any[]).length
|
||||
for (const datum of data) {
|
||||
if ((datum[columnsKey] as any[]).length !== vectorSize) {
|
||||
throw new Error(`Invalid vector size, expected ${vectorSize}`)
|
||||
}
|
||||
|
||||
listBuilder.append(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = listBuilder.finish().toVector()
|
||||
} else {
|
||||
const values = []
|
||||
for (const datum of data) {
|
||||
values.push(datum[columnsKey])
|
||||
}
|
||||
records[columnsKey] = vectorFromArray(values)
|
||||
}
|
||||
}
|
||||
|
||||
const table = new ArrowTable(records)
|
||||
await this.createTableArrow(name, table)
|
||||
return await this.openTable(name)
|
||||
}
|
||||
|
||||
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
|
||||
const writer = RecordBatchFileWriter.writeAll(table)
|
||||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
||||
return await this.openTable(name)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -93,7 +147,7 @@ export class Query {
|
||||
private readonly _refine_factor?: number
|
||||
private readonly _nprobes: number
|
||||
private readonly _columns?: string[]
|
||||
private readonly _where?: string
|
||||
private _filter?: string
|
||||
private readonly _metric = 'L2'
|
||||
|
||||
constructor (tbl: any, queryVector: number[]) {
|
||||
@@ -103,22 +157,29 @@ export class Query {
|
||||
this._nprobes = 20
|
||||
this._refine_factor = undefined
|
||||
this._columns = undefined
|
||||
this._where = undefined
|
||||
this._filter = undefined
|
||||
}
|
||||
|
||||
set limit (value: number) {
|
||||
limit (value: number): Query {
|
||||
this._limit = value
|
||||
return this
|
||||
}
|
||||
|
||||
get limit (): number {
|
||||
return this._limit
|
||||
filter (value: string): Query {
|
||||
this._filter = value
|
||||
return this
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the query and return the results as an Array of Objects
|
||||
*/
|
||||
async execute (): Promise<unknown[]> {
|
||||
const buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
|
||||
async execute<T = Record<string, unknown>> (): Promise<T[]> {
|
||||
let buffer;
|
||||
if (this._filter != null) {
|
||||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit, this._filter)
|
||||
} else {
|
||||
buffer = await tableSearch.call(this._tbl, this._query_vector, this._limit)
|
||||
}
|
||||
const data = tableFromIPC(buffer)
|
||||
return data.toArray().map((entry: Record<string, unknown>) => {
|
||||
const newObject: Record<string, unknown> = {}
|
||||
@@ -129,14 +190,7 @@ export class Query {
|
||||
newObject[key] = entry[key]
|
||||
}
|
||||
})
|
||||
return newObject
|
||||
return newObject as unknown as T
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the query and return the results as an Array of the generic type provided
|
||||
*/
|
||||
async execute_cast<T>(): Promise<T[]> {
|
||||
return await this.execute() as T[]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,67 +14,94 @@
|
||||
|
||||
import { describe } from 'mocha'
|
||||
import { assert } from 'chai'
|
||||
import { track } from 'temp'
|
||||
|
||||
import * as lancedb from '../index'
|
||||
|
||||
describe('LanceDB client', function () {
|
||||
describe('open a connection to lancedb', function () {
|
||||
const con = lancedb.connect('.../../sample-lancedb')
|
||||
|
||||
it('should have a valid url', function () {
|
||||
assert.equal(con.uri, '.../../sample-lancedb')
|
||||
describe('when creating a connection to lancedb', function () {
|
||||
it('should have a valid url', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
assert.equal(con.uri, uri)
|
||||
})
|
||||
|
||||
it('should return the existing table names', function () {
|
||||
assert.deepEqual(con.tableNames(), ['my_table'])
|
||||
it('should return the existing table names', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
assert.deepEqual(await con.tableNames(), ['vectors'])
|
||||
})
|
||||
})
|
||||
|
||||
describe('when querying an existing dataset', function () {
|
||||
it('should open a table', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
assert.equal(table.name, 'vectors')
|
||||
})
|
||||
|
||||
describe('open a table from a connection', function () {
|
||||
const tablePromise = con.openTable('my_table')
|
||||
it('execute a query', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
|
||||
it('should have a valid name', async function () {
|
||||
const table = await tablePromise
|
||||
assert.equal(table.name, 'my_table')
|
||||
})
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(results[0].price, 10)
|
||||
const vector = results[0].vector as Float32Array
|
||||
assert.approximately(vector[0], 0.0, 0.2)
|
||||
assert.approximately(vector[0], 0.1, 0.3)
|
||||
})
|
||||
|
||||
class MyResult {
|
||||
vector: Float32Array = new Float32Array(0)
|
||||
price: number = 0
|
||||
item: string = ''
|
||||
}
|
||||
it('limits # of results', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).limit(1).execute()
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 1)
|
||||
})
|
||||
|
||||
it('execute a query', async function () {
|
||||
const table = await tablePromise
|
||||
const builder = table.search([0.1, 0.3])
|
||||
const results = await builder.execute() as MyResult[]
|
||||
it('uses a filter', async function () {
|
||||
const uri = await createTestDB()
|
||||
const con = await lancedb.connect(uri)
|
||||
const table = await con.openTable('vectors')
|
||||
const results = await table.search([0.1, 0.3]).filter('id == 2').execute()
|
||||
assert.equal(results.length, 1)
|
||||
assert.equal(results[0].id, 2)
|
||||
})
|
||||
})
|
||||
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(results[0].item, 'foo')
|
||||
assert.equal(results[0].price, 10)
|
||||
assert.approximately(results[0].vector[0], 3.1, 0.1)
|
||||
assert.approximately(results[0].vector[1], 4.1, 0.1)
|
||||
})
|
||||
describe('when creating a new dataset', function () {
|
||||
it('creates a new table from javascript objects', async function () {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
it('execute a query and type cast the result', async function () {
|
||||
const table = await tablePromise
|
||||
const data = [
|
||||
{ id: 1, vector: [0.1, 0.2], price: 10 },
|
||||
{ id: 2, vector: [1.1, 1.2], price: 50 }
|
||||
]
|
||||
|
||||
const builder = table.search([0.1, 0.3])
|
||||
const results = await builder.execute_cast<MyResult>()
|
||||
assert.equal(results.length, 2)
|
||||
assert.equal(results[0].item, 'foo')
|
||||
assert.equal(results[0].price, 10)
|
||||
assert.approximately(results[0].vector[0], 3.1, 0.1)
|
||||
assert.approximately(results[0].vector[1], 4.1, 0.1)
|
||||
})
|
||||
const tableName = `vectors_${Math.floor(Math.random() * 100)}`
|
||||
const table = await con.createTable(tableName, data)
|
||||
assert.equal(table.name, tableName)
|
||||
|
||||
it('limits # of results', async function () {
|
||||
const table = await tablePromise
|
||||
const builder = table.search([0.1, 0.3])
|
||||
builder.limit = 1
|
||||
const results = await builder.execute() as MyResult[]
|
||||
|
||||
assert.equal(results.length, 1)
|
||||
})
|
||||
const results = await table.search([0.1, 0.3]).execute()
|
||||
assert.equal(results.length, 2)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
async function createTestDB (): Promise<string> {
|
||||
const dir = await track().mkdir('lancejs')
|
||||
const con = await lancedb.connect(dir)
|
||||
|
||||
const data = [
|
||||
{ id: 1, vector: [0.1, 0.2], name: 'foo', price: 10, is_active: true },
|
||||
{ id: 2, vector: [1.1, 1.2], name: 'bar', price: 50, is_active: false }
|
||||
]
|
||||
|
||||
await con.createTable('vectors', data)
|
||||
return dir
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user