From 5ca98c326fdd68a332fd454aaab6d5b6b2f5fc9d Mon Sep 17 00:00:00 2001 From: Bert Date: Thu, 26 Oct 2023 17:00:48 -0400 Subject: [PATCH] feat: added dataset stats api to node (#604) --- node/src/index.ts | 31 +++++++++++++- node/src/remote/index.ts | 21 ++++++++- node/src/test/test.ts | 18 ++++++++ rust/ffi/node/src/lib.rs | 2 + rust/ffi/node/src/table.rs | 87 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 2 deletions(-) diff --git a/node/src/index.ts b/node/src/index.ts index 35085de0..646edbe3 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -23,7 +23,7 @@ import { Query } from './query' import { isEmbeddingFunction } from './embedding/embedding_function' // eslint-disable-next-line @typescript-eslint/no-var-requires -const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableCleanupOldVersions, tableCompactFiles } = require('../native.js') +const { databaseNew, databaseTableNames, databaseOpenTable, databaseDropTable, tableCreate, tableAdd, tableCreateVectorIndex, tableCountRows, tableDelete, tableCleanupOldVersions, tableCompactFiles, tableListIndices, tableIndexStats } = require('../native.js') export { Query } export type { EmbeddingFunction } @@ -260,6 +260,27 @@ export interface Table { * ``` */ delete: (filter: string) => Promise + + /** + * List the indicies on this table. + */ + listIndices: () => Promise + + /** + * Get statistics about an index. + */ + indexStats: (indexUuid: string) => Promise +} + +export interface VectorIndex { + columns: string[] + name: string + uuid: string +} + +export interface IndexStats { + numIndexedRows: number | null + numUnindexedRows: number | null } /** @@ -502,6 +523,14 @@ export class LocalTable implements Table { return res.metrics }) } + + async listIndices (): Promise { + return tableListIndices.call(this._tbl) + } + + async indexStats (indexUuid: string): Promise { + return tableIndexStats.call(this._tbl, indexUuid) + } } export interface CleanupStats { diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index abb3d8f0..25178c61 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -14,7 +14,9 @@ import { type EmbeddingFunction, type Table, type VectorIndexParams, type Connection, - type ConnectionOptions, type CreateTableOptions, type WriteOptions + type ConnectionOptions, type CreateTableOptions, type VectorIndex, + type WriteOptions, + type IndexStats } from '../index' import { Query } from '../query' @@ -241,4 +243,21 @@ export class RemoteTable implements Table { async delete (filter: string): Promise { await this._client.post(`/v1/table/${this._name}/delete/`, { predicate: filter }) } + + async listIndices (): Promise { + const results = await this._client.post(`/v1/table/${this._name}/index/list/`) + return results.data.indexes?.map((index: any) => ({ + columns: index.columns, + name: index.index_name, + uuid: index.index_uuid + })) + } + + async indexStats (indexUuid: string): Promise { + const results = await this._client.post(`/v1/table/${this._name}/index/${indexUuid}/stats/`) + return { + numIndexedRows: results.data.num_indexed_rows, + numUnindexedRows: results.data.num_unindexed_rows + } + } } diff --git a/node/src/test/test.ts b/node/src/test/test.ts index 28830589..777a250f 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -328,6 +328,24 @@ describe('LanceDB client', function () { const createIndex = table.createIndex({ type: 'ivf_pq', column: 'name', num_partitions: -1, max_iters: 2, num_sub_vectors: 2 }) await expect(createIndex).to.be.rejectedWith('num_partitions: must be > 0') }) + + it('should be able to list index and stats', async function () { + const uri = await createTestDB(32, 300) + const con = await lancedb.connect(uri) + const table = await con.openTable('vectors') + await table.createIndex({ type: 'ivf_pq', column: 'vector', num_partitions: 2, max_iters: 2, num_sub_vectors: 2 }) + + const indices = await table.listIndices() + expect(indices).to.have.lengthOf(1) + expect(indices[0].name).to.equal('vector_idx') + expect(indices[0].uuid).to.not.be.equal(undefined) + expect(indices[0].columns).to.have.lengthOf(1) + expect(indices[0].columns[0]).to.equal('vector') + + const stats = await table.indexStats(indices[0].uuid) + expect(stats.numIndexedRows).to.equal(300) + expect(stats.numUnindexedRows).to.equal(0) + }).timeout(50_000) }) describe('when using a custom embedding function', function () { diff --git a/rust/ffi/node/src/lib.rs b/rust/ffi/node/src/lib.rs index 97bfedec..6de89c31 100644 --- a/rust/ffi/node/src/lib.rs +++ b/rust/ffi/node/src/lib.rs @@ -239,6 +239,8 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> { cx.export_function("tableDelete", JsTable::js_delete)?; cx.export_function("tableCleanupOldVersions", JsTable::js_cleanup)?; cx.export_function("tableCompactFiles", JsTable::js_compact)?; + cx.export_function("tableListIndices", JsTable::js_list_indices)?; + cx.export_function("tableIndexStats", JsTable::js_index_stats)?; cx.export_function( "tableCreateVectorIndex", index::vector::table_create_vector_index, diff --git a/rust/ffi/node/src/table.rs b/rust/ffi/node/src/table.rs index e6c4c138..f44a50bc 100644 --- a/rust/ffi/node/src/table.rs +++ b/rust/ffi/node/src/table.rs @@ -276,4 +276,91 @@ impl JsTable { }); Ok(promise) } + + pub(crate) fn js_list_indices(mut cx: FunctionContext) -> JsResult { + let js_table = cx.this().downcast_or_throw::, _>(&mut cx)?; + let rt = runtime(&mut cx)?; + let (deferred, promise) = cx.promise(); + // let predicate = cx.argument::(0)?.value(&mut cx); + let channel = cx.channel(); + let table = js_table.table.clone(); + + rt.spawn(async move { + let indices = table.load_indices().await; + + deferred.settle_with(&channel, move |mut cx| { + let indices = indices.or_throw(&mut cx)?; + + let output = JsArray::new(&mut cx, indices.len() as u32); + for (i, index) in indices.iter().enumerate() { + let js_index = JsObject::new(&mut cx); + let index_name = cx.string(index.index_name.clone()); + js_index.set(&mut cx, "name", index_name)?; + + let index_uuid = cx.string(index.index_uuid.clone()); + js_index.set(&mut cx, "uuid", index_uuid)?; + + let js_index_columns = JsArray::new(&mut cx, index.columns.len() as u32); + for (j, column) in index.columns.iter().enumerate() { + let js_column = cx.string(column.clone()); + js_index_columns.set(&mut cx, j as u32, js_column)?; + } + js_index.set(&mut cx, "columns", js_index_columns)?; + + output.set(&mut cx, i as u32, js_index)?; + } + + Ok(output) + }) + }); + Ok(promise) + } + + pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult { + let js_table = cx.this().downcast_or_throw::, _>(&mut cx)?; + let rt = runtime(&mut cx)?; + let (deferred, promise) = cx.promise(); + let index_uuid = cx.argument::(0)?.value(&mut cx); + let channel = cx.channel(); + let table = js_table.table.clone(); + + rt.spawn(async move { + let load_stats = futures::try_join!( + table.count_indexed_rows(&index_uuid), + table.count_unindexed_rows(&index_uuid) + ); + + deferred.settle_with(&channel, move |mut cx| { + let (indexed_rows, unindexed_rows) = load_stats.or_throw(&mut cx)?; + + let output = JsObject::new(&mut cx); + + match indexed_rows { + Some(x) => { + let i = cx.number(x as f64); + output.set(&mut cx, "numIndexedRows", i)?; + } + None => { + let null = cx.null(); + output.set(&mut cx, "numIndexedRows", null)?; + } + }; + + match unindexed_rows { + Some(x) => { + let i = cx.number(x as f64); + output.set(&mut cx, "numUnindexedRows", i)?; + } + None => { + let null = cx.null(); + output.set(&mut cx, "numUnindexedRows", null)?; + } + }; + + Ok(output) + }) + }); + + Ok(promise) + } }