mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-06 20:02:58 +00:00
feat(node): Create empty tables / Arrow Tables (#399)
- Supports creating an empty table as long as an Arrow Schema is provided - Supports creating a table from an Arrow Table (can be passed as data) - Simplified some Arrow code in the TS/FFI side - removed createTableArrow method, it was never documented / tested.
This commit is contained in:
@@ -13,10 +13,10 @@
|
||||
// limitations under the License.
|
||||
|
||||
import {
|
||||
RecordBatchFileWriter,
|
||||
type Table as ArrowTable
|
||||
type Schema,
|
||||
Table as ArrowTable
|
||||
} from 'apache-arrow'
|
||||
import { fromRecordsToBuffer } from './arrow'
|
||||
import { createEmptyTable, fromRecordsToBuffer, fromTableToBuffer } from './arrow'
|
||||
import type { EmbeddingFunction } from './embedding/embedding_function'
|
||||
import { RemoteConnection } from './remote'
|
||||
import { Query } from './query'
|
||||
@@ -51,6 +51,23 @@ export interface ConnectionOptions {
|
||||
hostOverride?: string
|
||||
}
|
||||
|
||||
export interface CreateTableOptions<T> {
|
||||
// Name of Table
|
||||
name: string
|
||||
|
||||
// Data to insert into the Table
|
||||
data?: Array<Record<string, unknown>> | ArrowTable | undefined
|
||||
|
||||
// Optional Arrow Schema for this table
|
||||
schema?: Schema | undefined
|
||||
|
||||
// Optional embedding function used to create embeddings
|
||||
embeddingFunction?: EmbeddingFunction<T> | undefined
|
||||
|
||||
// WriteOptions for this operation
|
||||
writeOptions?: WriteOptions | undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to a LanceDB instance at the given URI
|
||||
* @param uri The uri of the database.
|
||||
@@ -97,6 +114,17 @@ export interface Connection {
|
||||
*/
|
||||
openTable<T>(name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
|
||||
/**
|
||||
* Creates a new Table, optionally initializing it with new data.
|
||||
*
|
||||
* @param {string} name - The name of the table.
|
||||
* @param data - Array of Records to be inserted into the table
|
||||
* @param schema - An Arrow Schema that describe this table columns
|
||||
* @param {EmbeddingFunction} embeddings - An embedding function to use on this table
|
||||
* @param {WriteOptions} writeOptions - The write options to use when creating the table.
|
||||
*/
|
||||
createTable<T> ({ name, data, schema, embeddingFunction, writeOptions }: CreateTableOptions<T>): Promise<Table<T>>
|
||||
|
||||
/**
|
||||
* Creates a new Table and initialize it with new data.
|
||||
*
|
||||
@@ -132,8 +160,6 @@ export interface Connection {
|
||||
*/
|
||||
createTable<T> (name: string, data: Array<Record<string, unknown>>, embeddings: EmbeddingFunction<T>, options: WriteOptions): Promise<Table<T>>
|
||||
|
||||
createTableArrow(name: string, table: ArrowTable): Promise<Table>
|
||||
|
||||
/**
|
||||
* Drop an existing table.
|
||||
* @param name The name of the table to drop.
|
||||
@@ -256,59 +282,80 @@ export class LocalConnection implements Connection {
|
||||
async openTable<T> (name: string, embeddings: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>>
|
||||
async openTable<T> (name: string, embeddings?: EmbeddingFunction<T>): Promise<Table<T>> {
|
||||
const tbl = await databaseOpenTable.call(this._db, name, ...this.awsParams())
|
||||
if (embeddings !== undefined) {
|
||||
return new LocalTable(tbl, name, this._options(), embeddings)
|
||||
} else {
|
||||
return new LocalTable(tbl, name, this._options())
|
||||
}
|
||||
}
|
||||
|
||||
async createTable<T> (name: string | CreateTableOptions<T>, data?: Array<Record<string, unknown>>, optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>, opt?: WriteOptions): Promise<Table<T>> {
|
||||
if (typeof name === 'string') {
|
||||
let writeOptions: WriteOptions = new DefaultWriteOptions()
|
||||
if (opt !== undefined && isWriteOptions(opt)) {
|
||||
writeOptions = opt
|
||||
} else if (optsOrEmbedding !== undefined && isWriteOptions(optsOrEmbedding)) {
|
||||
writeOptions = optsOrEmbedding
|
||||
}
|
||||
|
||||
let embeddings: undefined | EmbeddingFunction<T>
|
||||
if (optsOrEmbedding !== undefined && isEmbeddingFunction(optsOrEmbedding)) {
|
||||
embeddings = optsOrEmbedding
|
||||
}
|
||||
return await this.createTableImpl({ name, data, embeddingFunction: embeddings, writeOptions })
|
||||
}
|
||||
return await this.createTableImpl(name)
|
||||
}
|
||||
|
||||
private async createTableImpl<T> ({ name, data, schema, embeddingFunction, writeOptions = new DefaultWriteOptions() }: {
|
||||
name: string
|
||||
data?: Array<Record<string, unknown>> | ArrowTable | undefined
|
||||
schema?: Schema | undefined
|
||||
embeddingFunction?: EmbeddingFunction<T> | undefined
|
||||
writeOptions?: WriteOptions | undefined
|
||||
}): Promise<Table<T>> {
|
||||
let buffer: Buffer
|
||||
|
||||
function isEmpty (data: Array<Record<string, unknown>> | ArrowTable<any>): boolean {
|
||||
if (data instanceof ArrowTable) {
|
||||
return data.data.length === 0
|
||||
}
|
||||
return data.length === 0
|
||||
}
|
||||
|
||||
if ((data === undefined) || isEmpty(data)) {
|
||||
if (schema === undefined) {
|
||||
throw new Error('Either data or schema needs to defined')
|
||||
}
|
||||
buffer = await fromTableToBuffer(createEmptyTable(schema))
|
||||
} else if (data instanceof ArrowTable) {
|
||||
buffer = await fromTableToBuffer(data, embeddingFunction)
|
||||
} else {
|
||||
// data is Array<Record<...>>
|
||||
buffer = await fromRecordsToBuffer(data, embeddingFunction)
|
||||
}
|
||||
|
||||
const tbl = await tableCreate.call(this._db, name, buffer, writeOptions?.writeMode?.toString(), ...this.awsParams())
|
||||
if (embeddingFunction !== undefined) {
|
||||
return new LocalTable(tbl, name, this._options(), embeddingFunction)
|
||||
} else {
|
||||
return new LocalTable(tbl, name, this._options())
|
||||
}
|
||||
}
|
||||
|
||||
private awsParams (): any[] {
|
||||
// TODO: move this thing into rust
|
||||
const callArgs = [this._db, name]
|
||||
const awsCredentials = this._options().awsCredentials
|
||||
const params = []
|
||||
if (awsCredentials !== undefined) {
|
||||
callArgs.push(awsCredentials.accessKeyId)
|
||||
callArgs.push(awsCredentials.secretKey)
|
||||
params.push(awsCredentials.accessKeyId)
|
||||
params.push(awsCredentials.secretKey)
|
||||
if (awsCredentials.sessionToken !== undefined) {
|
||||
callArgs.push(awsCredentials.sessionToken)
|
||||
params.push(awsCredentials.sessionToken)
|
||||
}
|
||||
}
|
||||
const tbl = await databaseOpenTable.call(...callArgs)
|
||||
if (embeddings !== undefined) {
|
||||
return new LocalTable(tbl, name, this._options(), embeddings)
|
||||
} else {
|
||||
return new LocalTable(tbl, name, this._options())
|
||||
}
|
||||
}
|
||||
|
||||
async createTable<T> (name: string, data: Array<Record<string, unknown>>, optsOrEmbedding?: WriteOptions | EmbeddingFunction<T>, opt?: WriteOptions): Promise<Table<T>> {
|
||||
let writeOptions: WriteOptions = new DefaultWriteOptions()
|
||||
if (opt !== undefined && isWriteOptions(opt)) {
|
||||
writeOptions = opt
|
||||
} else if (optsOrEmbedding !== undefined && isWriteOptions(optsOrEmbedding)) {
|
||||
writeOptions = optsOrEmbedding
|
||||
}
|
||||
|
||||
let embeddings: undefined | EmbeddingFunction<T>
|
||||
if (optsOrEmbedding !== undefined && isEmbeddingFunction(optsOrEmbedding)) {
|
||||
embeddings = optsOrEmbedding
|
||||
}
|
||||
const createArgs = [this._db, name, await fromRecordsToBuffer(data, embeddings), writeOptions.writeMode?.toString()]
|
||||
const awsCredentials = this._options().awsCredentials
|
||||
if (awsCredentials !== undefined) {
|
||||
createArgs.push(awsCredentials.accessKeyId)
|
||||
createArgs.push(awsCredentials.secretKey)
|
||||
if (awsCredentials.sessionToken !== undefined) {
|
||||
createArgs.push(awsCredentials.sessionToken)
|
||||
}
|
||||
}
|
||||
|
||||
const tbl = await tableCreate.call(...createArgs)
|
||||
|
||||
if (embeddings !== undefined) {
|
||||
return new LocalTable(tbl, name, this._options(), embeddings)
|
||||
} else {
|
||||
return new LocalTable(tbl, name, this._options())
|
||||
}
|
||||
}
|
||||
|
||||
async createTableArrow (name: string, table: ArrowTable): Promise<Table> {
|
||||
const writer = RecordBatchFileWriter.writeAll(table)
|
||||
await tableCreate.call(this._db, name, Buffer.from(await writer.toUint8Array()))
|
||||
return await this.openTable(name)
|
||||
return params
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user