diff --git a/nodejs/__test__/embedding.test.ts b/nodejs/__test__/embedding.test.ts index bc03bc1c..2200aed9 100644 --- a/nodejs/__test__/embedding.test.ts +++ b/nodejs/__test__/embedding.test.ts @@ -230,7 +230,7 @@ describe("embedding functions", () => { }, ); - test.only.each([new Float16(), new Float32(), new Float64()])( + test.each([new Float16(), new Float32(), new Float64()])( "should be able to provide auto embeddings with multiple float datatypes", async (floatType) => { @register("test1") diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index a205647f..7ca86de0 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -368,6 +368,20 @@ describe("When creating an index", () => { } }); + test("should be able to get index stats", async () => { + await tbl.createIndex("id"); + + const stats = await tbl.indexStats("id_idx"); + expect(stats).toBeDefined(); + expect(stats?.numIndexedRows).toEqual(300); + expect(stats?.numUnindexedRows).toEqual(0); + }); + + test("when getting stats on non-existent index", async () => { + const stats = await tbl.indexStats("some non-existent index"); + expect(stats).toBeUndefined(); + }); + // TODO: Move this test to the query API test (making sure we can reject queries // when the dimension is incorrect) test("two columns with different dimensions", async () => { diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 3c037b72..d8ba4373 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -31,6 +31,9 @@ export { AddColumnsSql, ColumnAlteration, ConnectionOptions, + IndexStatistics, + IndexMetadata, + IndexConfig, } from "./native.js"; export { @@ -56,7 +59,7 @@ export { export { Index, IndexOptions, IvfPqOptions } from "./indices"; -export { Table, AddDataOptions, IndexConfig, UpdateOptions } from "./table"; +export { Table, AddDataOptions, UpdateOptions } from "./table"; export * as embedding from "./embedding"; diff --git a/nodejs/lancedb/remote/table.ts b/nodejs/lancedb/remote/table.ts index 224bffcc..63def38f 100644 --- a/nodejs/lancedb/remote/table.ts +++ b/nodejs/lancedb/remote/table.ts @@ -16,6 +16,7 @@ import { Table as ArrowTable } from "apache-arrow"; import { Data, IntoVector } from "../arrow"; +import { IndexStatistics } from ".."; import { CreateTableOptions } from "../connection"; import { IndexOptions } from "../indices"; import { MergeInsertBuilder } from "../merge"; @@ -165,4 +166,7 @@ export class RemoteTable extends Table { mergeInsert(_on: string | string[]): MergeInsertBuilder { throw new Error("mergeInsert() is not yet supported on the LanceDB cloud"); } + async indexStats(_name: string): Promise { + throw new Error("indexStats() is not yet supported on the LanceDB cloud"); + } } diff --git a/nodejs/lancedb/table.ts b/nodejs/lancedb/table.ts index b85e719e..1ad5249a 100644 --- a/nodejs/lancedb/table.ts +++ b/nodejs/lancedb/table.ts @@ -33,11 +33,11 @@ import { AddColumnsSql, ColumnAlteration, IndexConfig, + IndexStatistics, OptimizeStats, Table as _NativeTable, } from "./native"; import { Query, VectorQuery } from "./query"; -export { IndexConfig } from "./native"; /** * Options for adding data to a table. @@ -160,6 +160,9 @@ export abstract class Table { * Indices on vector columns will speed up vector searches. * Indices on scalar columns will speed up filtering (in both * vector and non-vector searches) + * + * @note We currently don't support custom named indexes, + * The index name will always be `${column}_idx` * @example * // If the column has a vector (fixed size list) data type then * // an IvfPq vector index will be created. @@ -370,6 +373,13 @@ export abstract class Table { abstract mergeInsert(on: string | string[]): MergeInsertBuilder; + /** List all the stats of a specified index + * + * @param {string} name The name of the index. + * @returns {IndexStatistics | undefined} The stats of the index. If the index does not exist, it will return undefined + */ + abstract indexStats(name: string): Promise; + static async parseTableData( // biome-ignore lint/suspicious/noExplicitAny: data: Record[] | ArrowTable, @@ -569,6 +579,13 @@ export class LocalTable extends Table { return await this.query().toArrow(); } + async indexStats(name: string): Promise { + const stats = await this.inner.indexStats(name); + if (stats === null) { + return undefined; + } + return stats; + } mergeInsert(on: string | string[]): MergeInsertBuilder { on = Array.isArray(on) ? on : [on]; return new MergeInsertBuilder(this.inner.mergeInsert(on)); diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index 462b2fa3..664a46dc 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -330,6 +330,13 @@ impl Table { .collect::>()) } + #[napi] + pub async fn index_stats(&self, index_name: String) -> napi::Result> { + let tbl = self.inner_ref()?.as_native().unwrap(); + let stats = tbl.index_stats(&index_name).await.default_error()?; + Ok(stats.map(IndexStatistics::from)) + } + #[napi] pub fn merge_insert(&self, on: Vec) -> napi::Result { let on: Vec<_> = on.iter().map(String::as_str).collect(); @@ -346,7 +353,7 @@ pub struct IndexConfig { pub index_type: String, /// The columns in the index /// - /// Currently this is always an array of size 1. In the future there may + /// Currently this is always an array of size 1. In the future there may /// be more columns to represent composite indices. pub columns: Vec, } @@ -440,3 +447,40 @@ pub struct AddColumnsSql { /// The expression can reference other columns in the table. pub value_sql: String, } + +#[napi(object)] +pub struct IndexStatistics { + /// The number of rows indexed by the index + pub num_indexed_rows: f64, + /// The number of rows not indexed + pub num_unindexed_rows: f64, + /// The type of the index + pub index_type: Option, + /// The metadata for each index + pub indices: Vec, +} +impl From for IndexStatistics { + fn from(value: lancedb::index::IndexStatistics) -> Self { + Self { + num_indexed_rows: value.num_indexed_rows as f64, + num_unindexed_rows: value.num_unindexed_rows as f64, + index_type: value.index_type.map(|t| format!("{:?}", t)), + indices: value.indices.into_iter().map(Into::into).collect(), + } + } +} + +#[napi(object)] +pub struct IndexMetadata { + pub metric_type: Option, + pub index_type: Option, +} + +impl From for IndexMetadata { + fn from(value: lancedb::index::IndexMetadata) -> Self { + Self { + metric_type: value.metric_type, + index_type: value.index_type, + } + } +} diff --git a/rust/ffi/node/src/table.rs b/rust/ffi/node/src/table.rs index 1555526b..b47af535 100644 --- a/rust/ffi/node/src/table.rs +++ b/rust/ffi/node/src/table.rs @@ -463,6 +463,7 @@ impl JsTable { Ok(promise) } + #[allow(deprecated)] pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult { let js_table = cx.this().downcast_or_throw::, _>(&mut cx)?; let rt = runtime(&mut cx)?; diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 34a5d1e7..c2a89bbb 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -1206,28 +1206,36 @@ impl NativeTable { .await) } + #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result> { + #[allow(deprecated)] match self.load_index_stats(index_uuid).await? { Some(stats) => Ok(Some(stats.num_indexed_rows)), None => Ok(None), } } + #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result> { + #[allow(deprecated)] match self.load_index_stats(index_uuid).await? { Some(stats) => Ok(Some(stats.num_unindexed_rows)), None => Ok(None), } } + #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] pub async fn get_index_type(&self, index_uuid: &str) -> Result> { + #[allow(deprecated)] match self.load_index_stats(index_uuid).await? { Some(stats) => Ok(Some(stats.index_type.unwrap_or_default())), None => Ok(None), } } + #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] pub async fn get_distance_type(&self, index_uuid: &str) -> Result> { + #[allow(deprecated)] match self.load_index_stats(index_uuid).await? { Some(stats) => Ok(Some( stats @@ -1240,16 +1248,8 @@ impl NativeTable { } } - pub async fn load_indices(&self) -> Result> { - let dataset = self.dataset.get().await?; - let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?; - Ok(indices - .iter() - .map(|i| VectorIndex::new_from_format(&mf, i)) - .collect()) - } - - async fn load_index_stats(&self, index_uuid: &str) -> Result> { + #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] + pub async fn load_index_stats(&self, index_uuid: &str) -> Result> { let index = self .load_indices() .await? @@ -1268,6 +1268,35 @@ impl NativeTable { Ok(Some(index_stats)) } + /// Get statistics about an index. + /// Returns an error if the index does not exist. + pub async fn index_stats>( + &self, + index_name: S, + ) -> Result> { + self.dataset + .get() + .await? + .index_statistics(index_name.as_ref()) + .await + .ok() + .map(|stats| { + serde_json::from_str(&stats).map_err(|e| Error::InvalidInput { + message: format!("error deserializing index statistics: {}", e), + }) + }) + .transpose() + } + + pub async fn load_indices(&self) -> Result> { + let dataset = self.dataset.get().await?; + let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?; + Ok(indices + .iter() + .map(|i| VectorIndex::new_from_format(&mf, i)) + .collect()) + } + async fn create_ivf_pq_index( &self, index: IvfPqIndexBuilder, @@ -1860,14 +1889,20 @@ impl TableInternal for NativeTable { } columns.push(field.name.clone()); } + let index_type = if is_vector { + crate::index::IndexType::IvfPq + } else { + crate::index::IndexType::BTree + }; + let name = idx.name.clone(); - let index_type = if is_vector { crate::index::IndexType::IvfPq } else { crate::index::IndexType::BTree }; Ok(IndexConfig { index_type, columns, name }) }).collect::>>() } } #[cfg(test)] +#[allow(deprecated)] mod tests { use std::iter; use std::sync::atomic::{AtomicBool, Ordering};