From e926819e570ad0920dd7c81ecfb161d9dd56c4b5 Mon Sep 17 00:00:00 2001 From: albertlockett Date: Thu, 14 Dec 2023 12:57:31 -0500 Subject: [PATCH] WIP hastily moved types around --- .../_deletions/0-1-880176755671022891.arrow | Bin 0 -> 538 bytes node/.lancedb/my_table.lance/_latest.manifest | Bin 0 -> 338 bytes ...0-7f115968-624e-4139-befb-ab37ccbbe5f5.txn | Bin 0 -> 205 bytes ...1-f50352e0-0986-4b19-b1d9-b834e2788159.txn | Bin 0 -> 169 bytes .../my_table.lance/_versions/1.manifest | Bin 0 -> 259 bytes .../my_table.lance/_versions/2.manifest | Bin 0 -> 338 bytes ...0d380567-0703-475a-85f2-4ee0366d5203.lance | Bin 0 -> 690 bytes ...5d31021d-96e4-45c4-8d8f-b39ce155aa3c.lance | Bin 0 -> 675 bytes node/src/index.ts | 377 +---------- node/src/integration_test/test.ts | 180 ----- node/src/query.ts | 10 +- node/src/remote/index.ts | 13 +- node/src/test/embedding/openai.ts | 57 -- node/src/test/io.ts | 76 --- node/src/test/test.ts | 616 ------------------ node/src/test/util.ts | 45 -- node/src/types.ts | 375 +++++++++++ 17 files changed, 400 insertions(+), 1349 deletions(-) create mode 100644 node/.lancedb/my_table.lance/_deletions/0-1-880176755671022891.arrow create mode 100644 node/.lancedb/my_table.lance/_latest.manifest create mode 100644 node/.lancedb/my_table.lance/_transactions/0-7f115968-624e-4139-befb-ab37ccbbe5f5.txn create mode 100644 node/.lancedb/my_table.lance/_transactions/1-f50352e0-0986-4b19-b1d9-b834e2788159.txn create mode 100644 node/.lancedb/my_table.lance/_versions/1.manifest create mode 100644 node/.lancedb/my_table.lance/_versions/2.manifest create mode 100644 node/.lancedb/my_table.lance/data/0d380567-0703-475a-85f2-4ee0366d5203.lance create mode 100644 node/.lancedb/my_table.lance/data/5d31021d-96e4-45c4-8d8f-b39ce155aa3c.lance delete mode 100644 node/src/integration_test/test.ts delete mode 100644 node/src/test/embedding/openai.ts delete mode 100644 node/src/test/io.ts delete mode 100644 node/src/test/test.ts delete mode 100644 node/src/test/util.ts create mode 100644 node/src/types.ts diff --git a/node/.lancedb/my_table.lance/_deletions/0-1-880176755671022891.arrow b/node/.lancedb/my_table.lance/_deletions/0-1-880176755671022891.arrow new file mode 100644 index 0000000000000000000000000000000000000000..010ddcad4b9780dfca57ea3b52c201a0c8c20a36 GIT binary patch literal 538 zcmcIiI|{-;5PgY@8d>mzND7M-Uce*d1VRujZEOS~V50}H@E9J$(wpgfv%3VecEX#P zH!m|gSyDI6X1503v@3uK0gT{@0**e`GKV1_uogU!kqxTNm;y8TbA-=Y@o_kZ`SeeQ(pWge(vRCo}z G{SzNI=q`8w literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/_latest.manifest b/node/.lancedb/my_table.lance/_latest.manifest new file mode 100644 index 0000000000000000000000000000000000000000..887baf2b381efacee5d7db28b39cad0b214f514b GIT binary patch literal 338 zcmYk0F-pWh7=`nX8kQ)!Ok)&;)PV^znVDpgLJ$!wL@e!=Br}mnOkk6h#nxu;;1Tu) zc3!|s*ogKPHo788Uh%5$eY``kEN2g+N_$^_0pHf`EU%3MhhqEI6n#5&VF>J&)!gVw zSKXOOU3Gn0)@|0O;c9V!*PHCdxM;}RT|cdwD~0e0W{yuaS5!tZp)ut|#4;mFmLUS%CJp$KWnMpWhbuPY+ujcnHTM_z_9X84Z~xNn`{OvJgaRRg#=1 zg<(==87~GR4_uUPjy@h=miG8{@jg3+3%G>bXQY&r%g|6l6BQAWvxMYKuS#)Y!dNLL blc1kB*E`O}?=zbQDqxBAAF$4j&X2zVQ|4yI literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/_transactions/0-7f115968-624e-4139-befb-ab37ccbbe5f5.txn b/node/.lancedb/my_table.lance/_transactions/0-7f115968-624e-4139-befb-ab37ccbbe5f5.txn new file mode 100644 index 0000000000000000000000000000000000000000..cde8be808e5c960c8d07201547cfc52c8e304a8c GIT binary patch literal 205 zcmYkzyAFad7(iiL4Fh8raWG+UBB?2qUf^X&FMmxFgaptSU&lA_1=YobGk=GiR47Z) zNKIu`1FcKmPz%9QW4Ut9S`b0JlNU+_Dbs>^u9=XEGQ~Kpl`vF`P|+HIaVfp17(e%B za6q;jgmX*|1V^8L{-GlA?UQ+#;&9RU3&EgAng9R* literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/_transactions/1-f50352e0-0986-4b19-b1d9-b834e2788159.txn b/node/.lancedb/my_table.lance/_transactions/1-f50352e0-0986-4b19-b1d9-b834e2788159.txn new file mode 100644 index 0000000000000000000000000000000000000000..5b2b2b7730fe7794a80f9f27e6896a56e56b3f51 GIT binary patch literal 169 zcmYk!u?@m75CA|NB!mz`$Q4SJf-2IZx8HBS2VfA#b}GaOG&JnM2u#5O3_-~VR6s{_ z<;|7ej*P^%B488`ZZHuBQ@#kfYJ-U3_?(^eUYkXag#_6Lz<>lUa021cf~ytD`@$A! zcSy%v^>m_|cC}WU+xzjlobT6Vr@E%)U&DnAT2mnU>cDY!AVo$PDCTOIQ=#X1N#7gCa@Y0_@uDz`fwN%N0HY75sYhp*%t_yjlj7CwNR zs~)Iu-~9bP_m7MQ(?{_ZeOa~!UQyW&76|hRkhB*k1StoVZIR`4JV%`o9XX(jr Sitxe|qS-(G10}cDtD7IuUR1XL literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/_versions/2.manifest b/node/.lancedb/my_table.lance/_versions/2.manifest new file mode 100644 index 0000000000000000000000000000000000000000..887baf2b381efacee5d7db28b39cad0b214f514b GIT binary patch literal 338 zcmYk0F-pWh7=`nX8kQ)!Ok)&;)PV^znVDpgLJ$!wL@e!=Br}mnOkk6h#nxu;;1Tu) zc3!|s*ogKPHo788Uh%5$eY``kEN2g+N_$^_0pHf`EU%3MhhqEI6n#5&VF>J&)!gVw zSKXOOU3Gn0)@|0O;c9V!*PHCdxM;}RT|cdwD~0e0W{yuaS5!tZp)ut|#4;mFmLUS%CJp$KWnMpWhbuPY+ujcnHTM_z_9X84Z~xNn`{OvJgaRRg#=1 zg<(==87~GR4_uUPjy@h=miG8{@jg3+3%G>bXQY&r%g|6l6BQAWvxMYKuS#)Y!dNLL blc1kB*E`O}?=zbQDqxBAAF$4j&X2zVQ|4yI literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/data/0d380567-0703-475a-85f2-4ee0366d5203.lance b/node/.lancedb/my_table.lance/data/0d380567-0703-475a-85f2-4ee0366d5203.lance new file mode 100644 index 0000000000000000000000000000000000000000..a2daa9bb0ee884bfe30c50d03567be530fb2d9dd GIT binary patch literal 690 zcmZvYu}%Up9LC#nci=6W4pIga!r;I_kQjpuig8tUrptL0Q!Tv&dW1M2lcR%=V0;7; zAI1l8^$oNq|6;w^+~wD=-`D$kMJOMir$0cbZytD;hRgndUy3_kN9)l5?gK3dOm=`) z+nm)lzoy%|Udg+jAoL6z@E4f90*g?vfPu4uUKBLQ4IT@=ZMNUDy;eV%d|0DI4C`NZ z;ASvPvunw_IBe5k#uJ@R7BJ8B`j>BDw+r`aUqQfc;RVHPWq?ytTH+`Y> z(V?lV(yg&j(JYpk>2*$@8}S^Bl4CH6JIwp9r1q!8uY12WHMb@(&gQSo4LU_-*P<1K UNy+nrGF#y{d+i9Li?gfqZxM8j+W-In literal 0 HcmV?d00001 diff --git a/node/.lancedb/my_table.lance/data/5d31021d-96e4-45c4-8d8f-b39ce155aa3c.lance b/node/.lancedb/my_table.lance/data/5d31021d-96e4-45c4-8d8f-b39ce155aa3c.lance new file mode 100644 index 0000000000000000000000000000000000000000..13944b14ba6e29fb85486fc49ddf21d1939066f3 GIT binary patch literal 675 zcmZvYF;2rU6o&oMBn??W9*8p_RV+vys!$(lFdJA(^uct@4IF(wwtKPUpNX#>D>*lKz1N1OFG3+L%x!KYyie&;s@C2uBFKo*#sLvV}Ku?Fi82`SryY D { - // Name of Table - name: string - - // Data to insert into the Table - data?: Array> | ArrowTable | undefined - - // Optional Arrow Schema for this table - schema?: Schema | undefined - - // Optional embedding function used to create embeddings - embeddingFunction?: EmbeddingFunction | undefined - - // WriteOptions for this operation - writeOptions?: WriteOptions | undefined -} - /** * Connect to a LanceDB instance at the given URI * @param uri The uri of the database. @@ -116,235 +83,6 @@ export async function connect (arg: string | Partial): Promis return new LocalConnection(db, opts) } -/** - * A LanceDB Connection that allows you to open tables and create new ones. - * - * Connection could be local against filesystem or remote against a server. - */ -export interface Connection { - uri: string - - tableNames(): Promise - - /** - * Open a table in the database. - * - * @param name The name of the table. - * @param embeddings An embedding function to use on this table - */ - openTable(name: string, embeddings?: EmbeddingFunction): Promise> - - /** - * Creates a new Table, optionally initializing it with new data. - * - * @param {string} name - The name of the table. - * @param data - Array of Records to be inserted into the table - * @param schema - An Arrow Schema that describe this table columns - * @param {EmbeddingFunction} embeddings - An embedding function to use on this table - * @param {WriteOptions} writeOptions - The write options to use when creating the table. - */ - createTable ({ name, data, schema, embeddingFunction, writeOptions }: CreateTableOptions): Promise> - - /** - * Creates a new Table and initialize it with new data. - * - * @param {string} name - The name of the table. - * @param data - Non-empty Array of Records to be inserted into the table - */ - createTable (name: string, data: Array>): Promise - - /** - * Creates a new Table and initialize it with new data. - * - * @param {string} name - The name of the table. - * @param data - Non-empty Array of Records to be inserted into the table - * @param {WriteOptions} options - The write options to use when creating the table. - */ - createTable (name: string, data: Array>, options: WriteOptions): Promise
- - /** - * Creates a new Table and initialize it with new data. - * - * @param {string} name - The name of the table. - * @param data - Non-empty Array of Records to be inserted into the table - * @param {EmbeddingFunction} embeddings - An embedding function to use on this table - */ - createTable (name: string, data: Array>, embeddings: EmbeddingFunction): Promise> - /** - * Creates a new Table and initialize it with new data. - * - * @param {string} name - The name of the table. - * @param data - Non-empty Array of Records to be inserted into the table - * @param {EmbeddingFunction} embeddings - An embedding function to use on this table - * @param {WriteOptions} options - The write options to use when creating the table. - */ - createTable (name: string, data: Array>, embeddings: EmbeddingFunction, options: WriteOptions): Promise> - - /** - * Drop an existing table. - * @param name The name of the table to drop. - */ - dropTable(name: string): Promise - -} - -/** - * A LanceDB Table is the collection of Records. Each Record has one or more vector fields. - */ -export interface Table { - name: string - - /** - * Creates a search query to find the nearest neighbors of the given search term - * @param query The query search term - */ - search: (query: T) => Query - - /** - * Insert records into this Table. - * - * @param data Records to be inserted into the Table - * @return The number of rows added to the table - */ - add: (data: Array>) => Promise - - /** - * Insert records into this Table, replacing its contents. - * - * @param data Records to be inserted into the Table - * @return The number of rows added to the table - */ - overwrite: (data: Array>) => Promise - - /** - * Create an ANN index on this Table vector index. - * - * @param indexParams The parameters of this Index, @see VectorIndexParams. - */ - createIndex: (indexParams: VectorIndexParams) => Promise - - /** - * Returns the number of rows in this table. - */ - countRows: () => Promise - - /** - * Delete rows from this table. - * - * This can be used to delete a single row, many rows, all rows, or - * sometimes no rows (if your predicate matches nothing). - * - * @param filter A filter in the same format used by a sql WHERE clause. The - * filter must not be empty. - * - * @examples - * - * ```ts - * const con = await lancedb.connect("./.lancedb") - * const data = [ - * {id: 1, vector: [1, 2]}, - * {id: 2, vector: [3, 4]}, - * {id: 3, vector: [5, 6]}, - * ]; - * const tbl = await con.createTable("my_table", data) - * await tbl.delete("id = 2") - * await tbl.countRows() // Returns 2 - * ``` - * - * If you have a list of values to delete, you can combine them into a - * stringified list and use the `IN` operator: - * - * ```ts - * const to_remove = [1, 5]; - * await tbl.delete(`id IN (${to_remove.join(",")})`) - * await tbl.countRows() // Returns 1 - * ``` - */ - delete: (filter: string) => Promise - - /** - * Update rows in this table. - * - * This can be used to update a single row, many rows, all rows, or - * sometimes no rows (if your predicate matches nothing). - * - * @param args see {@link UpdateArgs} and {@link UpdateSqlArgs} for more details - * - * @examples - * - * ```ts - * const con = await lancedb.connect("./.lancedb") - * const data = [ - * {id: 1, vector: [3, 3], name: 'Ye'}, - * {id: 2, vector: [4, 4], name: 'Mike'}, - * ]; - * const tbl = await con.createTable("my_table", data) - * - * await tbl.update({ - * filter: "id = 2", - * updates: { vector: [2, 2], name: "Michael" }, - * }) - * - * let results = await tbl.search([1, 1]).execute(); - * // Returns [ - * // {id: 2, vector: [2, 2], name: 'Michael'} - * // {id: 1, vector: [3, 3], name: 'Ye'} - * // ] - * ``` - * - */ - update: (args: UpdateArgs | UpdateSqlArgs) => Promise - - /** - * List the indicies on this table. - */ - listIndices: () => Promise - - /** - * Get statistics about an index. - */ - indexStats: (indexUuid: string) => Promise -} - -export interface UpdateArgs { - /** - * A filter in the same format used by a sql WHERE clause. The filter may be empty, - * in which case all rows will be updated. - */ - where?: string - - /** - * A key-value map of updates. The keys are the column names, and the values are the - * new values to set - */ - values: Record -} - -export interface UpdateSqlArgs { - /** - * A filter in the same format used by a sql WHERE clause. The filter may be empty, - * in which case all rows will be updated. - */ - where?: string - - /** - * A key-value map of updates. The keys are the column names, and the values are the - * new values to set as SQL expressions. - */ - valuesSql: Record -} - -export interface VectorIndex { - columns: string[] - name: string - uuid: string -} - -export interface IndexStats { - numIndexedRows: number | null - numUnindexedRows: number | null -} - /** * A connection to a LanceDB database. */ @@ -692,83 +430,6 @@ export interface CompactionMetrics { filesAdded: number } -/// Config to build IVF_PQ index. -/// -export interface IvfPQIndexConfig { - /** - * The column to be indexed - */ - column?: string - - /** - * A unique name for the index - */ - index_name?: string - - /** - * Metric type, L2 or Cosine - */ - metric_type?: MetricType - - /** - * The number of partitions this index - */ - num_partitions?: number - - /** - * The max number of iterations for kmeans training. - */ - max_iters?: number - - /** - * Train as optimized product quantization. - */ - use_opq?: boolean - - /** - * Number of subvectors to build PQ code - */ - num_sub_vectors?: number - /** - * The number of bits to present one PQ centroid. - */ - num_bits?: number - - /** - * Max number of iterations to train OPQ, if `use_opq` is true. - */ - max_opq_iters?: number - - /** - * Replace an existing index with the same name if it exists. - */ - replace?: boolean - - type: 'ivf_pq' -} - -export type VectorIndexParams = IvfPQIndexConfig - -/** - * Write mode for writing a table. - */ -export enum WriteMode { - /** Create a new {@link Table}. */ - Create = 'create', - /** Overwrite the existing {@link Table} if presented. */ - Overwrite = 'overwrite', - /** Append new data to the table. */ - Append = 'append' -} - -/** - * Write options when creating a Table. - */ -export interface WriteOptions { - /** A {@link WriteMode} to use on this operation */ - writeMode?: WriteMode -} - export class DefaultWriteOptions implements WriteOptions { writeMode = WriteMode.Create } @@ -777,23 +438,3 @@ export function isWriteOptions (value: any): value is WriteOptions { return Object.keys(value).length === 1 && (value.writeMode === undefined || typeof value.writeMode === 'string') } - -/** - * Distance metrics type. - */ -export enum MetricType { - /** - * Euclidean distance - */ - L2 = 'l2', - - /** - * Cosine distance - */ - Cosine = 'cosine', - - /** - * Dot product - */ - Dot = 'dot' -} diff --git a/node/src/integration_test/test.ts b/node/src/integration_test/test.ts deleted file mode 100644 index 7a09db67f..000000000 --- a/node/src/integration_test/test.ts +++ /dev/null @@ -1,180 +0,0 @@ -// Copyright 2023 LanceDB Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { describe } from 'mocha' -import * as chai from 'chai' -import * as chaiAsPromised from 'chai-as-promised' -import { v4 as uuidv4 } from 'uuid' - -import * as lancedb from '../index' -import { tmpdir } from 'os' -import * as fs from 'fs' -import * as path from 'path' - -const assert = chai.assert -chai.use(chaiAsPromised) - -describe('LanceDB AWS Integration test', function () { - it('s3+ddb schema is processed correctly', async function () { - this.timeout(15000) - - // WARNING: specifying engine is NOT a publicly supported feature in lancedb yet - // THE API WILL CHANGE - const conn = await lancedb.connect('s3://lancedb-integtest?engine=ddb&ddbTableName=lancedb-integtest') - const data = [{ vector: Array(128).fill(1.0) }] - - const tableName = uuidv4() - let table = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite }) - - const futs = [table.add(data), table.add(data), table.add(data), table.add(data), table.add(data)] - await Promise.allSettled(futs) - - table = await conn.openTable(tableName) - assert.equal(await table.countRows(), 6) - }) -}) - -describe('LanceDB Mirrored Store Integration test', function () { - it('s3://...?mirroredStore=... param is processed correctly', async function () { - this.timeout(600000) - - const dir = tmpdir() - console.log(dir) - const conn = await lancedb.connect(`s3://lancedb-integtest?mirroredStore=${dir}`) - const data = Array(200).fill({ vector: Array(128).fill(1.0), id: 0 }) - data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 1 })) - data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 2 })) - data.push(...Array(200).fill({ vector: Array(128).fill(1.0), id: 3 })) - - const tableName = uuidv4() - - // try create table and check if it's mirrored - const t = await conn.createTable(tableName, data, { writeMode: lancedb.WriteMode.Overwrite }) - - const mirroredPath = path.join(dir, `${tableName}.lance`) - fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { - if (err != null) throw err - // there should be three dirs - assert.equal(files.length, 3) - assert.isTrue(files[0].isDirectory()) - assert.isTrue(files[1].isDirectory()) - - fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.txn')) - }) - - fs.readdir(path.join(mirroredPath, '_versions'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.manifest')) - }) - - fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.lance')) - }) - }) - - // try create index and check if it's mirrored - await t.createIndex({ column: 'vector', type: 'ivf_pq' }) - - fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { - if (err != null) throw err - // there should be four dirs - assert.equal(files.length, 4) - assert.isTrue(files[0].isDirectory()) - assert.isTrue(files[1].isDirectory()) - assert.isTrue(files[2].isDirectory()) - - // Two TXs now - fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 2) - assert.isTrue(files[0].name.endsWith('.txn')) - assert.isTrue(files[1].name.endsWith('.txn')) - }) - - fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.lance')) - }) - - fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].isDirectory()) - - fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - - assert.equal(files.length, 1) - assert.isTrue(files[0].isFile()) - assert.isTrue(files[0].name.endsWith('.idx')) - }) - }) - }) - - // try delete and check if it's mirrored - await t.delete('id = 0') - - fs.readdir(mirroredPath, { withFileTypes: true }, (err, files) => { - if (err != null) throw err - // there should be five dirs - assert.equal(files.length, 5) - assert.isTrue(files[0].isDirectory()) - assert.isTrue(files[1].isDirectory()) - assert.isTrue(files[2].isDirectory()) - assert.isTrue(files[3].isDirectory()) - assert.isTrue(files[4].isDirectory()) - - // Three TXs now - fs.readdir(path.join(mirroredPath, '_transactions'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 3) - assert.isTrue(files[0].name.endsWith('.txn')) - assert.isTrue(files[1].name.endsWith('.txn')) - }) - - fs.readdir(path.join(mirroredPath, 'data'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.lance')) - }) - - fs.readdir(path.join(mirroredPath, '_indices'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].isDirectory()) - - fs.readdir(path.join(mirroredPath, '_indices', files[0].name), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - - assert.equal(files.length, 1) - assert.isTrue(files[0].isFile()) - assert.isTrue(files[0].name.endsWith('.idx')) - }) - }) - - fs.readdir(path.join(mirroredPath, '_deletions'), { withFileTypes: true }, (err, files) => { - if (err != null) throw err - assert.equal(files.length, 1) - assert.isTrue(files[0].name.endsWith('.arrow')) - }) - }) - }) -}) diff --git a/node/src/query.ts b/node/src/query.ts index 932adddba..490b05f1f 100644 --- a/node/src/query.ts +++ b/node/src/query.ts @@ -14,10 +14,16 @@ import { Vector, tableFromIPC } from 'apache-arrow' import { type EmbeddingFunction } from './embedding/embedding_function' -import { type MetricType } from '.' +import { type MetricType } from './types' // eslint-disable-next-line @typescript-eslint/no-var-requires -const { tableSearch } = require('../native.js') +// const { tableSearch } = require('../native.js') + +const tableSearch = async function (args: any, arg2: any): Promise { + return await new Promise((resolve, reject) => { + resolve('') + }) +} /** * A builder for nearest neighbor queries for LanceDB. diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index b1eee6da5..be5af4fc5 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -13,12 +13,15 @@ // limitations under the License. import { - type EmbeddingFunction, type Table, type VectorIndexParams, type Connection, - type ConnectionOptions, type CreateTableOptions, type VectorIndex, - type WriteOptions, + type Table, type VectorIndexParams, + type VectorIndex, type IndexStats, - type UpdateArgs, type UpdateSqlArgs -} from '../index' + type UpdateArgs, type UpdateSqlArgs, + type Connection, + type ConnectionOptions, type CreateTableOptions, + type WriteOptions +} from '../types' +import { type EmbeddingFunction } from '../embedding/embedding_function' import { Query } from '../query' import { Vector, Table as ArrowTable } from 'apache-arrow' diff --git a/node/src/test/embedding/openai.ts b/node/src/test/embedding/openai.ts deleted file mode 100644 index 2c07393e4..000000000 --- a/node/src/test/embedding/openai.ts +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2023 Lance Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { describe } from 'mocha' -import { assert } from 'chai' - -import { OpenAIEmbeddingFunction } from '../../embedding/openai' -import { isEmbeddingFunction } from '../../embedding/embedding_function' - -// eslint-disable-next-line @typescript-eslint/no-var-requires -const { OpenAIApi } = require('openai') -// eslint-disable-next-line @typescript-eslint/no-var-requires -const { stub } = require('sinon') - -describe('OpenAPIEmbeddings', function () { - const stubValue = { - data: { - data: [ - { - embedding: Array(1536).fill(1.0) - }, - { - embedding: Array(1536).fill(2.0) - } - ] - } - } - - describe('#embed', function () { - it('should create vector embeddings', async function () { - const openAIStub = stub(OpenAIApi.prototype, 'createEmbedding').returns(stubValue) - const f = new OpenAIEmbeddingFunction('text', 'sk-key') - const vectors = await f.embed(['abc', 'def']) - assert.isTrue(openAIStub.calledOnce) - assert.equal(vectors.length, 2) - assert.deepEqual(vectors[0], stubValue.data.data[0].embedding) - assert.deepEqual(vectors[1], stubValue.data.data[1].embedding) - }) - }) - - describe('isEmbeddingFunction', function () { - it('should match the isEmbeddingFunction guard', function () { - assert.isTrue(isEmbeddingFunction(new OpenAIEmbeddingFunction('text', 'sk-key'))) - }) - }) -}) diff --git a/node/src/test/io.ts b/node/src/test/io.ts deleted file mode 100644 index 238c44a1e..000000000 --- a/node/src/test/io.ts +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2023 Lance Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// IO tests - -import { describe } from 'mocha' -import { assert } from 'chai' - -import * as lancedb from '../index' -import { type ConnectionOptions } from '../index' - -describe('LanceDB S3 client', function () { - if (process.env.TEST_S3_BASE_URL != null) { - const baseUri = process.env.TEST_S3_BASE_URL - it('should have a valid url', async function () { - const opts = { uri: `${baseUri}/valid_url` } - const table = await createTestDB(opts, 2, 20) - const con = await lancedb.connect(opts) - assert.equal(con.uri, opts.uri) - - const results = await table.search([0.1, 0.3]).limit(5).execute() - assert.equal(results.length, 5) - }).timeout(10_000) - } else { - describe.skip('Skip S3 test', function () {}) - } - - if (process.env.TEST_S3_BASE_URL != null && process.env.TEST_AWS_ACCESS_KEY_ID != null && process.env.TEST_AWS_SECRET_ACCESS_KEY != null) { - const baseUri = process.env.TEST_S3_BASE_URL - it('use custom credentials', async function () { - const opts: ConnectionOptions = { - uri: `${baseUri}/custom_credentials`, - awsCredentials: { - accessKeyId: process.env.TEST_AWS_ACCESS_KEY_ID as string, - secretKey: process.env.TEST_AWS_SECRET_ACCESS_KEY as string - } - } - const table = await createTestDB(opts, 2, 20) - console.log(table) - const con = await lancedb.connect(opts) - console.log(con) - assert.equal(con.uri, opts.uri) - - const results = await table.search([0.1, 0.3]).limit(5).execute() - assert.equal(results.length, 5) - }).timeout(10_000) - } else { - describe.skip('Skip S3 test', function () {}) - } -}) - -async function createTestDB (opts: ConnectionOptions, numDimensions: number = 2, numRows: number = 2): Promise { - const con = await lancedb.connect(opts) - - const data = [] - for (let i = 0; i < numRows; i++) { - const vector = [] - for (let j = 0; j < numDimensions; j++) { - vector.push(i + (j * 0.1)) - } - data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector }) - } - - return await con.createTable('vectors_2', data) -} diff --git a/node/src/test/test.ts b/node/src/test/test.ts deleted file mode 100644 index 89a44c6cf..000000000 --- a/node/src/test/test.ts +++ /dev/null @@ -1,616 +0,0 @@ -// Copyright 2023 LanceDB Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { describe } from 'mocha' -import { track } from 'temp' -import * as chai from 'chai' -import * as chaiAsPromised from 'chai-as-promised' - -import * as lancedb from '../index' -import { type AwsCredentials, type EmbeddingFunction, MetricType, Query, WriteMode, DefaultWriteOptions, isWriteOptions, type LocalTable } from '../index' -import { FixedSizeList, Field, Int32, makeVector, Schema, Utf8, Table as ArrowTable, vectorFromArray, Float32 } from 'apache-arrow' - -const expect = chai.expect -const assert = chai.assert -chai.use(chaiAsPromised) - -describe('LanceDB client', function () { - describe('when creating a connection to lancedb', function () { - it('should have a valid url', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - assert.equal(con.uri, uri) - }) - - it('should accept an options object', async function () { - const uri = await createTestDB() - const con = await lancedb.connect({ uri }) - assert.equal(con.uri, uri) - }) - - it('should accept custom aws credentials', async function () { - const uri = await createTestDB() - const awsCredentials: AwsCredentials = { - accessKeyId: '', - secretKey: '' - } - const con = await lancedb.connect({ uri, awsCredentials }) - assert.equal(con.uri, uri) - }) - - it('should return the existing table names', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - assert.deepEqual(await con.tableNames(), ['vectors']) - }) - }) - - describe('when querying an existing dataset', function () { - it('should open a table', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - assert.equal(table.name, 'vectors') - }) - - it('execute a query', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - const results = await table.search([0.1, 0.3]).execute() - - assert.equal(results.length, 2) - assert.equal(results[0].price, 10) - const vector = results[0].vector as Float32Array - assert.approximately(vector[0], 0.0, 0.2) - assert.approximately(vector[0], 0.1, 0.3) - }) - - it('limits # of results', async function () { - const uri = await createTestDB(2, 100) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - let results = await table.search([0.1, 0.3]).limit(1).execute() - assert.equal(results.length, 1) - assert.equal(results[0].id, 1) - - // there is a default limit if unspecified - results = await table.search([0.1, 0.3]).execute() - assert.equal(results.length, 10) - }) - - it('uses a filter / where clause without vector search', async function () { - // eslint-disable-next-line @typescript-eslint/explicit-function-return-type - const assertResults = (results: Array>) => { - assert.equal(results.length, 50) - } - - const uri = await createTestDB(2, 100) - const con = await lancedb.connect(uri) - const table = (await con.openTable('vectors')) as LocalTable - let results = await table.filter('id % 2 = 0').execute() - assertResults(results) - results = await table.where('id % 2 = 0').execute() - assertResults(results) - }) - - it('uses a filter / where clause', async function () { - // eslint-disable-next-line @typescript-eslint/explicit-function-return-type - const assertResults = (results: Array>) => { - assert.equal(results.length, 1) - assert.equal(results[0].id, 2) - } - - const uri = await createTestDB() - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - let results = await table.search([0.1, 0.1]).filter('id == 2').execute() - assertResults(results) - results = await table.search([0.1, 0.1]).where('id == 2').execute() - assertResults(results) - }) - - it('should correctly process prefilter/postfilter', async function () { - const uri = await createTestDB(16, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - // post filter should return less than the limit - let results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(false).execute() - assert.isTrue(results.length < 10) - - // pre filter should return exactly the limit - results = await table.search(new Array(16).fill(0.1)).limit(10).filter('id >= 10').prefilter(true).execute() - assert.isTrue(results.length === 10) - }) - - it('select only a subset of columns', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - const results = await table.search([0.1, 0.1]).select(['is_active']).execute() - assert.equal(results.length, 2) - // vector and _distance are always returned - assert.isDefined(results[0].vector) - assert.isDefined(results[0]._distance) - assert.isDefined(results[0].is_active) - - assert.isUndefined(results[0].id) - assert.isUndefined(results[0].name) - assert.isUndefined(results[0].price) - }) - }) - - describe('when creating a new dataset', function () { - it('create an empty table', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const schema = new Schema( - [new Field('id', new Int32()), new Field('name', new Utf8())] - ) - const table = await con.createTable({ name: 'vectors', schema }) - assert.equal(table.name, 'vectors') - assert.deepEqual(await con.tableNames(), ['vectors']) - }) - - it('create a table with a empty data array', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const schema = new Schema( - [new Field('id', new Int32()), new Field('name', new Utf8())] - ) - const table = await con.createTable({ name: 'vectors', schema, data: [] }) - assert.equal(table.name, 'vectors') - assert.deepEqual(await con.tableNames(), ['vectors']) - }) - - it('create a table from an Arrow Table', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const i32s = new Int32Array(new Array(10)) - const i32 = makeVector(i32s) - - const data = new ArrowTable({ vector: i32 }) - - const table = await con.createTable({ name: 'vectors', data }) - assert.equal(table.name, 'vectors') - assert.equal(await table.countRows(), 10) - assert.deepEqual(await con.tableNames(), ['vectors']) - }) - - it('creates a new table from javascript objects', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { id: 1, vector: [0.1, 0.2], price: 10 }, - { id: 2, vector: [1.1, 1.2], price: 50 } - ] - - const tableName = `vectors_${Math.floor(Math.random() * 100)}` - const table = await con.createTable(tableName, data) - assert.equal(table.name, tableName) - assert.equal(await table.countRows(), 2) - }) - - it('fails to create a new table when the vector column is missing', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { id: 1, price: 10 } - ] - - const create = con.createTable('missing_vector', data) - await expect(create).to.be.rejectedWith(Error, 'column \'vector\' is missing') - }) - - it('use overwrite flag to overwrite existing table', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { id: 1, vector: [0.1, 0.2], price: 10 }, - { id: 2, vector: [1.1, 1.2], price: 50 } - ] - - const tableName = 'overwrite' - await con.createTable(tableName, data, { writeMode: WriteMode.Create }) - - const newData = [ - { id: 1, vector: [0.1, 0.2], price: 10 }, - { id: 2, vector: [1.1, 1.2], price: 50 }, - { id: 3, vector: [1.1, 1.2], price: 50 } - ] - - await expect(con.createTable(tableName, newData)).to.be.rejectedWith(Error, 'already exists') - - const table = await con.createTable(tableName, newData, { writeMode: WriteMode.Overwrite }) - assert.equal(table.name, tableName) - assert.equal(await table.countRows(), 3) - }) - - it('appends records to an existing table ', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { id: 1, vector: [0.1, 0.2], price: 10, name: 'a' }, - { id: 2, vector: [1.1, 1.2], price: 50, name: 'b' } - ] - - const table = await con.createTable('vectors', data) - assert.equal(await table.countRows(), 2) - - const dataAdd = [ - { id: 3, vector: [2.1, 2.2], price: 10, name: 'c' }, - { id: 4, vector: [3.1, 3.2], price: 50, name: 'd' } - ] - await table.add(dataAdd) - assert.equal(await table.countRows(), 4) - }) - - it('overwrite all records in a table', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - - const table = await con.openTable('vectors') - assert.equal(await table.countRows(), 2) - - const dataOver = [ - { vector: [2.1, 2.2], price: 10, name: 'foo' }, - { vector: [3.1, 3.2], price: 50, name: 'bar' } - ] - await table.overwrite(dataOver) - assert.equal(await table.countRows(), 2) - }) - - it('can update records in the table', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - - const table = await con.openTable('vectors') - assert.equal(await table.countRows(), 2) - - await table.update({ where: 'price = 10', valuesSql: { price: '100' } }) - const results = await table.search([0.1, 0.2]).execute() - assert.equal(results[0].price, 100) - assert.equal(results[1].price, 11) - }) - - it('can update the records using a literal value', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - - const table = await con.openTable('vectors') - assert.equal(await table.countRows(), 2) - - await table.update({ where: 'price = 10', values: { price: 100 } }) - const results = await table.search([0.1, 0.2]).execute() - assert.equal(results[0].price, 100) - assert.equal(results[1].price, 11) - }) - - it('can update every record in the table', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - - const table = await con.openTable('vectors') - assert.equal(await table.countRows(), 2) - - await table.update({ valuesSql: { price: '100' } }) - const results = await table.search([0.1, 0.2]).execute() - - assert.equal(results[0].price, 100) - assert.equal(results[1].price, 100) - }) - - it('can delete records from a table', async function () { - const uri = await createTestDB() - const con = await lancedb.connect(uri) - - const table = await con.openTable('vectors') - assert.equal(await table.countRows(), 2) - - await table.delete('price = 10') - assert.equal(await table.countRows(), 1) - }) - }) - - describe('when searching an empty dataset', function () { - it('should not fail', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const schema = new Schema( - [new Field('vector', new FixedSizeList(128, new Field('float32', new Float32())))] - ) - const table = await con.createTable({ name: 'vectors', schema }) - const result = await table.search(Array(128).fill(0.1)).execute() - assert.isEmpty(result) - }) - }) - - describe('when searching an empty-after-delete dataset', function () { - it('should not fail', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const schema = new Schema( - [new Field('vector', new FixedSizeList(128, new Field('float32', new Float32())))] - ) - const table = await con.createTable({ name: 'vectors', schema }) - await table.add([{ vector: Array(128).fill(0.1) }]) - // https://github.com/lancedb/lance/issues/1635 - await table.delete('true') - const result = await table.search(Array(128).fill(0.1)).execute() - assert.isEmpty(result) - }) - }) - - describe('when creating a vector index', function () { - it('overwrite all records in a table', async function () { - const uri = await createTestDB(32, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - }).timeout(10_000) // Timeout is high partially because GH macos runner is pretty slow - - it('replace an existing index', async function () { - const uri = await createTestDB(16, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - - // Replace should fail if the index already exists - await expect(table.createIndex({ - type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2, replace: false - }) - ).to.be.rejectedWith('LanceError(Index)') - - // Default replace = true - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - }).timeout(50_000) - - it('it should fail when the column is not a vector', async function () { - const uri = await createTestDB(32, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - await expect(createIndex).to.be.rejectedWith(/VectorIndex requires the column data type to be fixed size list of float32s/) - }) - - it('it should fail when the column is not a vector', async function () { - const uri = await createTestDB(32, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 }) - await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0') - }) - - it('should be able to list index and stats', async function () { - const uri = await createTestDB(32, 300) - const con = await lancedb.connect(uri) - const table = await con.openTable('vectors') - await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) - - const indices = await table.listIndices() - expect(indices).to.have.lengthOf(1) - expect(indices[0].name).to.equal('vector_idx') - expect(indices[0].uuid).to.not.be.equal(undefined) - expect(indices[0].columns).to.have.lengthOf(1) - expect(indices[0].columns[0]).to.equal('vector') - - const stats = await table.indexStats(indices[0].uuid) - expect(stats.numIndexedRows).to.equal(300) - expect(stats.numUnindexedRows).to.equal(0) - }).timeout(50_000) - }) - - describe('when using a custom embedding function', function () { - class TextEmbedding implements EmbeddingFunction { - sourceColumn: string - - constructor (targetColumn: string) { - this.sourceColumn = targetColumn - } - - _embedding_map = new Map([ - ['foo', [2.1, 2.2]], - ['bar', [3.1, 3.2]] - ]) - - async embed (data: string[]): Promise { - return data.map(datum => this._embedding_map.get(datum) ?? [0.0, 0.0]) - } - } - - it('should encode the original data into embeddings', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - const embeddings = new TextEmbedding('name') - - const data = [ - { price: 10, name: 'foo' }, - { price: 50, name: 'bar' } - ] - const table = await con.createTable('vectors', data, embeddings, { writeMode: WriteMode.Create }) - const results = await table.search('foo').execute() - assert.equal(results.length, 2) - }) - - it('should create embeddings for Arrow Table', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - const embeddingFunction = new TextEmbedding('name') - - const names = vectorFromArray(['foo', 'bar'], new Utf8()) - const data = new ArrowTable({ name: names }) - - const table = await con.createTable({ name: 'vectors', data, embeddingFunction }) - assert.equal(table.name, 'vectors') - const results = await table.search('foo').execute() - assert.equal(results.length, 2) - }) - }) -}) - -describe('Remote LanceDB client', function () { - describe('when the server is not reachable', function () { - it('produces a network error', async function () { - const con = await lancedb.connect({ - uri: 'db://test-1234', - region: 'asdfasfasfdf', - apiKey: 'some-api-key' - }) - - // GET - try { - await con.tableNames() - } catch (err) { - expect(err).to.have.property('message', 'Network Error: getaddrinfo ENOTFOUND test-1234.asdfasfasfdf.api.lancedb.com') - } - - // POST - try { - await con.createTable({ name: 'vectors', schema: new Schema([]) }) - } catch (err) { - expect(err).to.have.property('message', 'Network Error: getaddrinfo ENOTFOUND test-1234.asdfasfasfdf.api.lancedb.com') - } - - // Search - const table = await con.openTable('vectors') - try { - await table.search([0.1, 0.3]).execute() - } catch (err) { - expect(err).to.have.property('message', 'Network Error: getaddrinfo ENOTFOUND test-1234.asdfasfasfdf.api.lancedb.com') - } - }) - }) -}) - -describe('Query object', function () { - it('sets custom parameters', async function () { - const query = new Query([0.1, 0.3]) - .limit(1) - .metricType(MetricType.Cosine) - .refineFactor(100) - .select(['a', 'b']) - .nprobes(20) as Record - assert.equal(query._limit, 1) - assert.equal(query._metricType, MetricType.Cosine) - assert.equal(query._refineFactor, 100) - assert.equal(query._nprobes, 20) - assert.deepEqual(query._select, ['a', 'b']) - }) -}) - -async function createTestDB (numDimensions: number = 2, numRows: number = 2): Promise { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [] - for (let i = 0; i < numRows; i++) { - const vector = [] - for (let j = 0; j < numDimensions; j++) { - vector.push(i + (j * 0.1)) - } - data.push({ id: i + 1, name: `name_${i}`, price: i + 10, is_active: (i % 2 === 0), vector }) - } - - await con.createTable('vectors', data) - return dir -} - -describe('Drop table', function () { - it('drop a table', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { price: 10, name: 'foo', vector: [1, 2, 3] }, - { price: 50, name: 'bar', vector: [4, 5, 6] } - ] - await con.createTable('t1', data) - await con.createTable('t2', data) - - assert.deepEqual(await con.tableNames(), ['t1', 't2']) - - await con.dropTable('t1') - assert.deepEqual(await con.tableNames(), ['t2']) - }) -}) - -describe('WriteOptions', function () { - context('#isWriteOptions', function () { - it('should not match empty object', function () { - assert.equal(isWriteOptions({}), false) - }) - it('should match write options', function () { - assert.equal(isWriteOptions({ writeMode: WriteMode.Create }), true) - }) - it('should match undefined write mode', function () { - assert.equal(isWriteOptions({ writeMode: undefined }), true) - }) - it('should match default write options', function () { - assert.equal(isWriteOptions(new DefaultWriteOptions()), true) - }) - }) -}) - -describe('Compact and cleanup', function () { - it('can cleanup after compaction', async function () { - const dir = await track().mkdir('lancejs') - const con = await lancedb.connect(dir) - - const data = [ - { price: 10, name: 'foo', vector: [1, 2, 3] }, - { price: 50, name: 'bar', vector: [4, 5, 6] } - ] - const table = await con.createTable('t1', data) as LocalTable - - const newData = [ - { price: 30, name: 'baz', vector: [7, 8, 9] } - ] - await table.add(newData) - - const compactionMetrics = await table.compactFiles({ - numThreads: 2 - }) - assert.equal(compactionMetrics.fragmentsRemoved, 2) - assert.equal(compactionMetrics.fragmentsAdded, 1) - assert.equal(await table.countRows(), 3) - - await table.cleanupOldVersions() - assert.equal(await table.countRows(), 3) - - // should have no effect, but this validates the arguments are parsed. - await table.compactFiles({ - targetRowsPerFragment: 102410, - maxRowsPerGroup: 1024, - materializeDeletions: true, - materializeDeletionsThreshold: 0.5, - numThreads: 2 - }) - - const cleanupMetrics = await table.cleanupOldVersions(0, true) - assert.isAtLeast(cleanupMetrics.bytesRemoved, 1) - assert.isAtLeast(cleanupMetrics.oldVersions, 1) - assert.equal(await table.countRows(), 3) - }) -}) diff --git a/node/src/test/util.ts b/node/src/test/util.ts deleted file mode 100644 index 07e96e0b1..000000000 --- a/node/src/test/util.ts +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2023 LanceDB Developers. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { toSQL } from '../util' -import * as chai from 'chai' - -const expect = chai.expect - -describe('toSQL', function () { - it('should turn string to SQL expression', function () { - expect(toSQL('foo')).to.equal("'foo'") - }) - - it('should turn number to SQL expression', function () { - expect(toSQL(123)).to.equal('123') - }) - - it('should turn boolean to SQL expression', function () { - expect(toSQL(true)).to.equal('TRUE') - }) - - it('should turn null to SQL expression', function () { - expect(toSQL(null)).to.equal('NULL') - }) - - it('should turn Date to SQL expression', function () { - const date = new Date('05 October 2011 14:48 UTC') - expect(toSQL(date)).to.equal("'2011-10-05T14:48:00.000Z'") - }) - - it('should turn array to SQL expression', function () { - expect(toSQL(['foo', 'bar', true, 1])).to.equal("['foo', 'bar', TRUE, 1]") - }) -}) diff --git a/node/src/types.ts b/node/src/types.ts new file mode 100644 index 000000000..36a1c0928 --- /dev/null +++ b/node/src/types.ts @@ -0,0 +1,375 @@ + +import { + type Schema, + type Table as ArrowTable +} from 'apache-arrow' + +import { type Literal } from './util' +import type { EmbeddingFunction } from './embedding/embedding_function' +import { type Query } from './query' + +export interface AwsCredentials { + accessKeyId: string + + secretKey: string + + sessionToken?: string +} + +/** + * Write options when creating a Table. + */ +export interface WriteOptions { + /** A {@link WriteMode} to use on this operation */ + writeMode?: WriteMode +} + +/** + * Write mode for writing a table. + */ +export enum WriteMode { + /** Create a new {@link Table}. */ + Create = 'create', + /** Overwrite the existing {@link Table} if presented. */ + Overwrite = 'overwrite', + /** Append new data to the table. */ + Append = 'append' +} + +/** + * A LanceDB Connection that allows you to open tables and create new ones. + * + * Connection could be local against filesystem or remote against a server. + */ +export interface Connection { + uri: string + + tableNames(): Promise + + /** + * Open a table in the database. + * + * @param name The name of the table. + * @param embeddings An embedding function to use on this table + */ + openTable(name: string, embeddings?: EmbeddingFunction): Promise> + + /** + * Creates a new Table, optionally initializing it with new data. + * + * @param {string} name - The name of the table. + * @param data - Array of Records to be inserted into the table + * @param schema - An Arrow Schema that describe this table columns + * @param {EmbeddingFunction} embeddings - An embedding function to use on this table + * @param {WriteOptions} writeOptions - The write options to use when creating the table. + */ + createTable ({ name, data, schema, embeddingFunction, writeOptions }: CreateTableOptions): Promise> + + /** + * Creates a new Table and initialize it with new data. + * + * @param {string} name - The name of the table. + * @param data - Non-empty Array of Records to be inserted into the table + */ + createTable (name: string, data: Array>): Promise
+ + /** + * Creates a new Table and initialize it with new data. + * + * @param {string} name - The name of the table. + * @param data - Non-empty Array of Records to be inserted into the table + * @param {WriteOptions} options - The write options to use when creating the table. + */ + createTable (name: string, data: Array>, options: WriteOptions): Promise
+ + /** + * Creates a new Table and initialize it with new data. + * + * @param {string} name - The name of the table. + * @param data - Non-empty Array of Records to be inserted into the table + * @param {EmbeddingFunction} embeddings - An embedding function to use on this table + */ + createTable (name: string, data: Array>, embeddings: EmbeddingFunction): Promise> + /** + * Creates a new Table and initialize it with new data. + * + * @param {string} name - The name of the table. + * @param data - Non-empty Array of Records to be inserted into the table + * @param {EmbeddingFunction} embeddings - An embedding function to use on this table + * @param {WriteOptions} options - The write options to use when creating the table. + */ + createTable (name: string, data: Array>, embeddings: EmbeddingFunction, options: WriteOptions): Promise> + + /** + * Drop an existing table. + * @param name The name of the table to drop. + */ + dropTable(name: string): Promise + +} + +export interface CreateTableOptions { + // Name of Table + name: string + + // Data to insert into the Table + data?: Array> | ArrowTable | undefined + + // Optional Arrow Schema for this table + schema?: Schema | undefined + + // Optional embedding function used to create embeddings + embeddingFunction?: EmbeddingFunction | undefined + + // WriteOptions for this operation + writeOptions?: WriteOptions | undefined +} + +export interface ConnectionOptions { + uri: string + + awsCredentials?: AwsCredentials + + awsRegion?: string + + // API key for the remote connections + apiKey?: string + // Region to connect + region?: string + + // override the host for the remote connections + hostOverride?: string +} + +/** + * Distance metrics type. + */ +export enum MetricType { + /** + * Euclidean distance + */ + L2 = 'l2', + + /** + * Cosine distance + */ + Cosine = 'cosine', + + /** + * Dot product + */ + Dot = 'dot' +} + +/// Config to build IVF_PQ index. +/// +export interface IvfPQIndexConfig { + /** + * The column to be indexed + */ + column?: string + + /** + * A unique name for the index + */ + index_name?: string + + /** + * Metric type, L2 or Cosine + */ + metric_type?: MetricType + + /** + * The number of partitions this index + */ + num_partitions?: number + + /** + * The max number of iterations for kmeans training. + */ + max_iters?: number + + /** + * Train as optimized product quantization. + */ + use_opq?: boolean + + /** + * Number of subvectors to build PQ code + */ + num_sub_vectors?: number + /** + * The number of bits to present one PQ centroid. + */ + num_bits?: number + + /** + * Max number of iterations to train OPQ, if `use_opq` is true. + */ + max_opq_iters?: number + + /** + * Replace an existing index with the same name if it exists. + */ + replace?: boolean + + type: 'ivf_pq' +} + +export type VectorIndexParams = IvfPQIndexConfig + +/** + * A LanceDB Table is the collection of Records. Each Record has one or more vector fields. + */ +export interface Table { + name: string + + /** + * Creates a search query to find the nearest neighbors of the given search term + * @param query The query search term + */ + search: (query: T) => Query + + /** + * Insert records into this Table. + * + * @param data Records to be inserted into the Table + * @return The number of rows added to the table + */ + add: (data: Array>) => Promise + + /** + * Insert records into this Table, replacing its contents. + * + * @param data Records to be inserted into the Table + * @return The number of rows added to the table + */ + overwrite: (data: Array>) => Promise + + /** + * Create an ANN index on this Table vector index. + * + * @param indexParams The parameters of this Index, @see VectorIndexParams. + */ + createIndex: (indexParams: VectorIndexParams) => Promise + + /** + * Returns the number of rows in this table. + */ + countRows: () => Promise + + /** + * Delete rows from this table. + * + * This can be used to delete a single row, many rows, all rows, or + * sometimes no rows (if your predicate matches nothing). + * + * @param filter A filter in the same format used by a sql WHERE clause. The + * filter must not be empty. + * + * @examples + * + * ```ts + * const con = await lancedb.connect("./.lancedb") + * const data = [ + * {id: 1, vector: [1, 2]}, + * {id: 2, vector: [3, 4]}, + * {id: 3, vector: [5, 6]}, + * ]; + * const tbl = await con.createTable("my_table", data) + * await tbl.delete("id = 2") + * await tbl.countRows() // Returns 2 + * ``` + * + * If you have a list of values to delete, you can combine them into a + * stringified list and use the `IN` operator: + * + * ```ts + * const to_remove = [1, 5]; + * await tbl.delete(`id IN (${to_remove.join(",")})`) + * await tbl.countRows() // Returns 1 + * ``` + */ + delete: (filter: string) => Promise + + /** + * Update rows in this table. + * + * This can be used to update a single row, many rows, all rows, or + * sometimes no rows (if your predicate matches nothing). + * + * @param args see {@link UpdateArgs} and {@link UpdateSqlArgs} for more details + * + * @examples + * + * ```ts + * const con = await lancedb.connect("./.lancedb") + * const data = [ + * {id: 1, vector: [3, 3], name: 'Ye'}, + * {id: 2, vector: [4, 4], name: 'Mike'}, + * ]; + * const tbl = await con.createTable("my_table", data) + * + * await tbl.update({ + * filter: "id = 2", + * updates: { vector: [2, 2], name: "Michael" }, + * }) + * + * let results = await tbl.search([1, 1]).execute(); + * // Returns [ + * // {id: 2, vector: [2, 2], name: 'Michael'} + * // {id: 1, vector: [3, 3], name: 'Ye'} + * // ] + * ``` + * + */ + update: (args: UpdateArgs | UpdateSqlArgs) => Promise + + /** + * List the indicies on this table. + */ + listIndices: () => Promise + + /** + * Get statistics about an index. + */ + indexStats: (indexUuid: string) => Promise +} +export interface UpdateArgs { + /** + * A filter in the same format used by a sql WHERE clause. The filter may be empty, + * in which case all rows will be updated. + */ + where?: string + + /** + * A key-value map of updates. The keys are the column names, and the values are the + * new values to set + */ + values: Record +} + +export interface UpdateSqlArgs { + /** + * A filter in the same format used by a sql WHERE clause. The filter may be empty, + * in which case all rows will be updated. + */ + where?: string + + /** + * A key-value map of updates. The keys are the column names, and the values are the + * new values to set as SQL expressions. + */ + valuesSql: Record +} + +export interface VectorIndex { + columns: string[] + name: string + uuid: string +} + +export interface IndexStats { + numIndexedRows: number | null + numUnindexedRows: number | null +}