diff --git a/node/src/index.ts b/node/src/index.ts index 20cfcfc4..5b312c2a 100644 --- a/node/src/index.ts +++ b/node/src/index.ts @@ -724,9 +724,9 @@ export interface VectorIndex { export interface IndexStats { numIndexedRows: number | null numUnindexedRows: number | null - indexType: string | null - distanceType: string | null - completedAt: string | null + indexType: string + distanceType?: string + numIndices?: number } /** diff --git a/node/src/query.ts b/node/src/query.ts index 4c4e0216..23d8acca 100644 --- a/node/src/query.ts +++ b/node/src/query.ts @@ -112,7 +112,7 @@ export class Query { return this } - /** + /** * Skip searching un-indexed data. This can make search faster, but will miss * any data that is not yet indexed. */ diff --git a/node/src/remote/client.ts b/node/src/remote/client.ts index de70a9e3..01a11f07 100644 --- a/node/src/remote/client.ts +++ b/node/src/remote/client.ts @@ -17,7 +17,7 @@ import axios, { type AxiosResponse, type ResponseType } from 'axios' import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow' import { type RemoteResponse, type RemoteRequest, Method } from '../middleware' -import { MetricType } from '..' +import type { MetricType } from '..' interface HttpLancedbClientMiddleware { onRemoteRequest( diff --git a/node/src/remote/index.ts b/node/src/remote/index.ts index df7caab3..8f01b48c 100644 --- a/node/src/remote/index.ts +++ b/node/src/remote/index.ts @@ -526,8 +526,7 @@ export class RemoteTable implements Table { numIndexedRows: body?.num_indexed_rows, numUnindexedRows: body?.num_unindexed_rows, indexType: body?.index_type, - distanceType: body?.distance_type, - completedAt: body?.completed_at + distanceType: body?.distance_type } } diff --git a/node/src/test/test.ts b/node/src/test/test.ts index 33f676dc..4f2b3218 100644 --- a/node/src/test/test.ts +++ b/node/src/test/test.ts @@ -888,9 +888,12 @@ describe("LanceDB client", function () { expect(indices[0].columns).to.have.lengthOf(1); expect(indices[0].columns[0]).to.equal("vector"); - const stats = await table.indexStats(indices[0].uuid); + const stats = await table.indexStats(indices[0].name); expect(stats.numIndexedRows).to.equal(300); expect(stats.numUnindexedRows).to.equal(0); + expect(stats.indexType).to.equal("IVF_PQ"); + expect(stats.distanceType).to.equal("l2"); + expect(stats.numIndices).to.equal(1); }).timeout(50_000); }); diff --git a/nodejs/__test__/table.test.ts b/nodejs/__test__/table.test.ts index 66dbbc5c..5bf01dad 100644 --- a/nodejs/__test__/table.test.ts +++ b/nodejs/__test__/table.test.ts @@ -479,6 +479,9 @@ describe("When creating an index", () => { expect(stats).toBeDefined(); expect(stats?.numIndexedRows).toEqual(300); expect(stats?.numUnindexedRows).toEqual(0); + expect(stats?.distanceType).toBeUndefined(); + expect(stats?.indexType).toEqual("BTREE"); + expect(stats?.numIndices).toEqual(1); }); test("when getting stats on non-existent index", async () => { diff --git a/nodejs/lancedb/index.ts b/nodejs/lancedb/index.ts index 1280286f..1e66bb8c 100644 --- a/nodejs/lancedb/index.ts +++ b/nodejs/lancedb/index.ts @@ -32,7 +32,6 @@ export { ColumnAlteration, ConnectionOptions, IndexStatistics, - IndexMetadata, IndexConfig, } from "./native.js"; diff --git a/nodejs/src/table.rs b/nodejs/src/table.rs index 7cf91357..f9b14e7f 100644 --- a/nodejs/src/table.rs +++ b/nodejs/src/table.rs @@ -337,7 +337,7 @@ impl Table { #[napi(catch_unwind)] pub async fn index_stats(&self, index_name: String) -> napi::Result> { - let tbl = self.inner_ref()?.as_native().unwrap(); + let tbl = self.inner_ref()?; let stats = tbl.index_stats(&index_name).await.default_error()?; Ok(stats.map(IndexStatistics::from)) } @@ -480,32 +480,22 @@ pub struct IndexStatistics { /// The number of rows not indexed pub num_unindexed_rows: f64, /// The type of the index - pub index_type: Option, - /// The metadata for each index - pub indices: Vec, + pub index_type: String, + /// The type of the distance function used by the index. This is only + /// present for vector indices. Scalar and full text search indices do + /// not have a distance function. + pub distance_type: Option, + /// The number of parts this index is split into. + pub num_indices: Option, } impl From for IndexStatistics { fn from(value: lancedb::index::IndexStatistics) -> Self { Self { num_indexed_rows: value.num_indexed_rows as f64, num_unindexed_rows: value.num_unindexed_rows as f64, - index_type: value.index_type.map(|t| format!("{:?}", t)), - indices: value.indices.into_iter().map(Into::into).collect(), - } - } -} - -#[napi(object)] -pub struct IndexMetadata { - pub metric_type: Option, - pub index_type: Option, -} - -impl From for IndexMetadata { - fn from(value: lancedb::index::IndexMetadata) -> Self { - Self { - metric_type: value.metric_type, - index_type: value.index_type, + index_type: value.index_type.to_string(), + distance_type: value.distance_type.map(|d| d.to_string()), + num_indices: value.num_indices, } } } diff --git a/python/python/lancedb/table.py b/python/python/lancedb/table.py index 11d923f5..59e0d465 100644 --- a/python/python/lancedb/table.py +++ b/python/python/lancedb/table.py @@ -2683,6 +2683,26 @@ class AsyncTable: """ return await self._inner.list_indices() + async def index_stats(self, index_name: str) -> Optional[IndexStatistics]: + """ + Retrieve statistics about an index + + Parameters + ---------- + index_name: str + The name of the index to retrieve statistics for + + Returns + ------- + IndexStatistics or None + The statistics about the index. Returns None if the index does not exist. + """ + stats = await self._inner.index_stats(index_name) + if stats is None: + return None + else: + return IndexStatistics(**stats) + async def uses_v2_manifest_paths(self) -> bool: """ Check if the table is using the new v2 manifest paths. @@ -2713,3 +2733,31 @@ class AsyncTable: to check if the table is already using the new path style. """ await self._inner.migrate_manifest_paths_v2() + + +@dataclass +class IndexStatistics: + """ + Statistics about an index. + + Attributes + ---------- + num_indexed_rows: int + The number of rows that are covered by this index. + num_unindexed_rows: int + The number of rows that are not covered by this index. + index_type: str + The type of index that was created. + distance_type: Optional[str] + The distance type used by the index. + num_indices: Optional[int] + The number of parts the index is split into. + """ + + num_indexed_rows: int + num_unindexed_rows: int + index_type: Literal[ + "IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST" + ] + distance_type: Optional[Literal["l2", "cosine", "dot"]] = None + num_indices: Optional[int] = None diff --git a/python/python/tests/test_index.py b/python/python/tests/test_index.py index 6ce6391b..b0646afe 100644 --- a/python/python/tests/test_index.py +++ b/python/python/tests/test_index.py @@ -66,6 +66,15 @@ async def test_create_bitmap_index(some_table: AsyncTable): # TODO: Fix via https://github.com/lancedb/lance/issues/2039 # indices = await some_table.list_indices() # assert str(indices) == '[Index(Bitmap, columns=["id"])]' + indices = await some_table.list_indices() + assert len(indices) == 1 + index_name = indices[0].name + stats = await some_table.index_stats(index_name) + assert stats.index_type == "BITMAP" + assert stats.distance_type is None + assert stats.num_indexed_rows == await some_table.count_rows() + assert stats.num_unindexed_rows == 0 + assert stats.num_indices == 1 @pytest.mark.asyncio @@ -91,6 +100,14 @@ async def test_create_vector_index(some_table: AsyncTable): assert len(indices) == 1 assert indices[0].index_type == "IvfPq" assert indices[0].columns == ["vector"] + assert indices[0].name == "vector_idx" + + stats = await some_table.index_stats("vector_idx") + assert stats.index_type == "IVF_PQ" + assert stats.distance_type == "l2" + assert stats.num_indexed_rows == await some_table.count_rows() + assert stats.num_unindexed_rows == 0 + assert stats.num_indices == 1 @pytest.mark.asyncio diff --git a/python/src/index.rs b/python/src/index.rs index aa8ffda8..d9eeccde 100644 --- a/python/src/index.rs +++ b/python/src/index.rs @@ -200,6 +200,8 @@ pub struct IndexConfig { /// Currently this is always a list of size 1. In the future there may /// be more columns to represent composite indices. pub columns: Vec, + /// Name of the index. + pub name: String, } #[pymethods] @@ -215,6 +217,7 @@ impl From for IndexConfig { Self { index_type, columns: value.columns, + name: value.name, } } } diff --git a/python/src/table.rs b/python/src/table.rs index ee949a4b..957bf76f 100644 --- a/python/src/table.rs +++ b/python/src/table.rs @@ -8,8 +8,8 @@ use lancedb::table::{ use pyo3::{ exceptions::{PyRuntimeError, PyValueError}, pyclass, pymethods, - types::{PyDict, PyString}, - Bound, PyAny, PyRef, PyResult, Python, + types::{PyDict, PyDictMethods, PyString}, + Bound, PyAny, PyRef, PyResult, Python, ToPyObject, }; use pyo3_asyncio_0_21::tokio::future_into_py; @@ -204,6 +204,33 @@ impl Table { }) } + pub fn index_stats(self_: PyRef<'_, Self>, index_name: String) -> PyResult> { + let inner = self_.inner_ref()?.clone(); + future_into_py(self_.py(), async move { + let stats = inner.index_stats(&index_name).await.infer_error()?; + if let Some(stats) = stats { + Python::with_gil(|py| { + let dict = PyDict::new_bound(py); + dict.set_item("num_indexed_rows", stats.num_indexed_rows)?; + dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?; + dict.set_item("index_type", stats.index_type.to_string())?; + + if let Some(distance_type) = stats.distance_type { + dict.set_item("distance_type", distance_type.to_string())?; + } + + if let Some(num_indices) = stats.num_indices { + dict.set_item("num_indices", num_indices)?; + } + + Ok(Some(dict.to_object(py))) + }) + } else { + Ok(None) + } + }) + } + pub fn __repr__(&self) -> String { match &self.inner { None => format!("ClosedTable({})", self.name), diff --git a/rust/ffi/node/src/table.rs b/rust/ffi/node/src/table.rs index 3e49d742..bbd59631 100644 --- a/rust/ffi/node/src/table.rs +++ b/rust/ffi/node/src/table.rs @@ -470,49 +470,42 @@ impl JsTable { Ok(promise) } - #[allow(deprecated)] pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult { let js_table = cx.this().downcast_or_throw::, _>(&mut cx)?; let rt = runtime(&mut cx)?; let (deferred, promise) = cx.promise(); - let index_uuid = cx.argument::(0)?.value(&mut cx); + let index_name = cx.argument::(0)?.value(&mut cx); let channel = cx.channel(); let table = js_table.table.clone(); rt.spawn(async move { - let load_stats = futures::try_join!( - table.as_native().unwrap().count_indexed_rows(&index_uuid), - table.as_native().unwrap().count_unindexed_rows(&index_uuid) - ); + let load_stats = table.index_stats(index_name).await; deferred.settle_with(&channel, move |mut cx| { - let (indexed_rows, unindexed_rows) = load_stats.or_throw(&mut cx)?; + let stats = load_stats.or_throw(&mut cx)?; - let output = JsObject::new(&mut cx); + if let Some(stats) = stats { + let output = JsObject::new(&mut cx); + let num_indexed_rows = cx.number(stats.num_indexed_rows as f64); + output.set(&mut cx, "numIndexedRows", num_indexed_rows)?; + let num_unindexed_rows = cx.number(stats.num_unindexed_rows as f64); + output.set(&mut cx, "numUnindexedRows", num_unindexed_rows)?; + if let Some(distance_type) = stats.distance_type { + let distance_type = cx.string(distance_type.to_string()); + output.set(&mut cx, "distanceType", distance_type)?; + } + let index_type = cx.string(stats.index_type.to_string()); + output.set(&mut cx, "indexType", index_type)?; - match indexed_rows { - Some(x) => { - let i = cx.number(x as f64); - output.set(&mut cx, "numIndexedRows", i)?; + if let Some(num_indices) = stats.num_indices { + let num_indices = cx.number(num_indices as f64); + output.set(&mut cx, "numIndices", num_indices)?; } - None => { - let null = cx.null(); - output.set(&mut cx, "numIndexedRows", null)?; - } - }; - match unindexed_rows { - Some(x) => { - let i = cx.number(x as f64); - output.set(&mut cx, "numUnindexedRows", i)?; - } - None => { - let null = cx.null(); - output.set(&mut cx, "numUnindexedRows", null)?; - } - }; - - Ok(output) + Ok(output.as_value(&mut cx)) + } else { + Ok(JsNull::new(&mut cx).as_value(&mut cx)) + } }) }); diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index 8af6b3de..21301a2b 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -18,7 +18,7 @@ use scalar::FtsIndexBuilder; use serde::Deserialize; use serde_with::skip_serializing_none; -use crate::{table::TableInternal, Result}; +use crate::{table::TableInternal, DistanceType, Result}; use self::{ scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder}, @@ -102,19 +102,42 @@ impl IndexBuilder { } } -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Deserialize)] pub enum IndexType { // Vector + #[serde(alias = "IVF_PQ")] IvfPq, + #[serde(alias = "IVF_HNSW_PQ")] IvfHnswPq, + #[serde(alias = "IVF_HNSW_SQ")] IvfHnswSq, // Scalar + #[serde(alias = "BTREE")] BTree, + #[serde(alias = "BITMAP")] Bitmap, + #[serde(alias = "LABEL_LIST")] LabelList, + // FTS + FTS, +} + +impl std::fmt::Display for IndexType { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::IvfPq => write!(f, "IVF_PQ"), + Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"), + Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"), + Self::BTree => write!(f, "BTREE"), + Self::Bitmap => write!(f, "BITMAP"), + Self::LabelList => write!(f, "LABEL_LIST"), + Self::FTS => write!(f, "FTS"), + } + } } /// A description of an index currently configured on a column +#[derive(Debug, PartialEq, Clone)] pub struct IndexConfig { /// The name of the index pub name: String, @@ -129,16 +152,39 @@ pub struct IndexConfig { #[skip_serializing_none] #[derive(Debug, Deserialize)] -pub struct IndexMetadata { - pub metric_type: Option, - pub index_type: Option, +pub(crate) struct IndexMetadata { + pub metric_type: Option, + // Sometimes the index type is provided at this level. + pub index_type: Option, +} + +// This struct is used to deserialize the JSON data returned from the Lance API +// Dataset::index_statistics(). +#[skip_serializing_none] +#[derive(Debug, Deserialize)] +pub(crate) struct IndexStatisticsImpl { + pub num_indexed_rows: usize, + pub num_unindexed_rows: usize, + pub indices: Vec, + // Sometimes, the index type is provided at this level. + pub index_type: Option, + pub num_indices: Option, } #[skip_serializing_none] -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, PartialEq)] pub struct IndexStatistics { + /// The number of rows in the table that are covered by this index. pub num_indexed_rows: usize, + /// The number of rows in the table that are not covered by this index. + /// These are rows that haven't yet been added to the index. pub num_unindexed_rows: usize, - pub index_type: Option, - pub indices: Vec, + /// The type of the index. + pub index_type: IndexType, + /// The distance type used by the index. + /// + /// This is only present for vector indices. + pub distance_type: Option, + /// The number of parts this index is split into. + pub num_indices: Option, } diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 7adea431..bdd59045 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -1,6 +1,7 @@ use std::sync::{Arc, Mutex}; use crate::index::Index; +use crate::index::IndexStatistics; use crate::query::Select; use crate::table::AddDataMode; use crate::utils::{supported_btree_data_type, supported_vector_data_type}; @@ -523,6 +524,26 @@ impl TableInternal for RemoteTable { message: "list_indices is not yet supported.".into(), }) } + async fn index_stats(&self, index_name: &str) -> Result> { + let request = self + .client + .post(&format!("/table/{}/index/{}/stats/", self.name, index_name)); + let response = self.client.send(request).await?; + + if response.status() == StatusCode::NOT_FOUND { + return Ok(None); + } + + let response = self.check_table_response(response).await?; + + let body = response.text().await?; + + let stats = serde_json::from_str(&body).map_err(|e| Error::Http { + message: format!("Failed to parse index statistics: {}", e), + })?; + + Ok(Some(stats)) + } async fn table_definition(&self) -> Result { Err(Error::NotSupported { message: "table_definition is not supported on LanceDB cloud.".into(), @@ -582,7 +603,7 @@ mod tests { use reqwest::Body; use crate::{ - index::{vector::IvfPqIndexBuilder, Index}, + index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType}, query::{ExecutableQuery, QueryBase}, DistanceType, Error, Table, }; @@ -1152,4 +1173,49 @@ mod tests { table.create_index(&["a"], index).execute().await.unwrap(); } } + + #[tokio::test] + async fn test_index_stats() { + let table = Table::new_with_handler("my_table", |request| { + assert_eq!(request.method(), "POST"); + assert_eq!( + request.url().path(), + "/table/my_table/index/my_index/stats/" + ); + + let response_body = serde_json::json!({ + "num_indexed_rows": 100000, + "num_unindexed_rows": 0, + "index_type": "IVF_PQ", + "distance_type": "l2" + }); + let response_body = serde_json::to_string(&response_body).unwrap(); + + http::Response::builder() + .status(200) + .body(response_body) + .unwrap() + }); + let indices = table.index_stats("my_index").await.unwrap().unwrap(); + let expected = IndexStatistics { + num_indexed_rows: 100000, + num_unindexed_rows: 0, + index_type: IndexType::IvfPq, + distance_type: Some(DistanceType::L2), + num_indices: None, + }; + assert_eq!(indices, expected); + + let table = Table::new_with_handler("my_table", |request| { + assert_eq!(request.method(), "POST"); + assert_eq!( + request.url().path(), + "/table/my_table/index/my_index/stats/" + ); + + http::Response::builder().status(404).body("").unwrap() + }); + let indices = table.index_stats("my_index").await.unwrap(); + assert!(indices.is_none()); + } } diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index e4ec93d4..cd52c601 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -47,7 +47,6 @@ use lance_index::IndexType; use lance_table::io::commit::ManifestNamingScheme; use log::info; use serde::{Deserialize, Serialize}; -use snafu::whatever; use crate::arrow::IntoArrow; use crate::connection::NoData; @@ -58,12 +57,12 @@ use crate::index::vector::{ suggested_num_partitions_for_hnsw, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder, IvfPqIndexBuilder, VectorIndex, }; -use crate::index::IndexConfig; use crate::index::IndexStatistics; use crate::index::{ vector::{suggested_num_partitions, suggested_num_sub_vectors}, Index, IndexBuilder, }; +use crate::index::{IndexConfig, IndexStatisticsImpl}; use crate::query::{ IntoQueryVector, Query, QueryExecutionOptions, Select, VectorQuery, DEFAULT_TOP_K, }; @@ -405,6 +404,7 @@ pub(crate) trait TableInternal: std::fmt::Display + std::fmt::Debug + Send + Syn async fn update(&self, update: UpdateBuilder) -> Result; async fn create_index(&self, index: IndexBuilder) -> Result<()>; async fn list_indices(&self) -> Result>; + async fn index_stats(&self, index_name: &str) -> Result>; async fn merge_insert( &self, params: MergeInsertBuilder, @@ -962,6 +962,15 @@ impl Table { pub fn dataset_uri(&self) -> &str { self.inner.dataset_uri() } + + /// Get statistics about an index. + /// Returns None if the index does not exist. + pub async fn index_stats( + &self, + index_name: impl AsRef, + ) -> Result> { + self.inner.index_stats(index_name.as_ref()).await + } } impl From for Table { @@ -1250,91 +1259,6 @@ impl NativeTable { .await) } - #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] - pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result> { - #[allow(deprecated)] - match self.load_index_stats(index_uuid).await? { - Some(stats) => Ok(Some(stats.num_indexed_rows)), - None => Ok(None), - } - } - - #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] - pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result> { - #[allow(deprecated)] - match self.load_index_stats(index_uuid).await? { - Some(stats) => Ok(Some(stats.num_unindexed_rows)), - None => Ok(None), - } - } - - #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] - pub async fn get_index_type(&self, index_uuid: &str) -> Result> { - #[allow(deprecated)] - match self.load_index_stats(index_uuid).await? { - Some(stats) => Ok(Some(stats.index_type.unwrap_or_default())), - None => Ok(None), - } - } - - #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] - pub async fn get_distance_type(&self, index_uuid: &str) -> Result> { - #[allow(deprecated)] - match self.load_index_stats(index_uuid).await? { - Some(stats) => Ok(Some( - stats - .indices - .iter() - .filter_map(|i| i.metric_type.clone()) - .collect(), - )), - None => Ok(None), - } - } - - #[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")] - pub async fn load_index_stats(&self, index_uuid: &str) -> Result> { - let index = self - .load_indices() - .await? - .into_iter() - .find(|i| i.index_uuid == index_uuid); - if index.is_none() { - return Ok(None); - } - let dataset = self.dataset.get().await?; - let index_stats = dataset.index_statistics(&index.unwrap().index_name).await?; - let index_stats: IndexStatistics = whatever!( - serde_json::from_str(&index_stats), - "error deserializing index statistics {index_stats}", - ); - - Ok(Some(index_stats)) - } - - /// Get statistics about an index. - /// Returns an error if the index does not exist. - pub async fn index_stats( - &self, - index_name: impl AsRef, - ) -> Result> { - let stats = match self - .dataset - .get() - .await? - .index_statistics(index_name.as_ref()) - .await - { - Ok(stats) => stats, - Err(lance::error::Error::IndexNotFound { .. }) => return Ok(None), - Err(e) => return Err(Error::from(e)), - }; - - serde_json::from_str(&stats).map_err(|e| Error::InvalidInput { - message: format!("error deserializing index statistics: {}", e), - }) - } - pub async fn load_indices(&self) -> Result> { let dataset = self.dataset.get().await?; let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?; @@ -2126,6 +2050,44 @@ impl TableInternal for NativeTable { fn dataset_uri(&self) -> &str { self.uri.as_str() } + + async fn index_stats(&self, index_name: &str) -> Result> { + let stats = match self + .dataset + .get() + .await? + .index_statistics(index_name.as_ref()) + .await + { + Ok(stats) => stats, + Err(lance::error::Error::IndexNotFound { .. }) => return Ok(None), + Err(e) => return Err(Error::from(e)), + }; + + let mut stats: IndexStatisticsImpl = + serde_json::from_str(&stats).map_err(|e| Error::InvalidInput { + message: format!("error deserializing index statistics: {}", e), + })?; + + let first_index = stats.indices.pop().ok_or_else(|| Error::InvalidInput { + message: "index statistics is empty".to_string(), + })?; + // Index type should be present at one of the levels. + let index_type = + stats + .index_type + .or(first_index.index_type) + .ok_or_else(|| Error::InvalidInput { + message: "index statistics was missing index type".to_string(), + })?; + Ok(Some(IndexStatistics { + num_indexed_rows: stats.num_indexed_rows, + num_unindexed_rows: stats.num_unindexed_rows, + index_type, + distance_type: first_index.metric_type, + num_indices: stats.num_indices, + })) + } } #[cfg(test)] @@ -2763,24 +2725,7 @@ mod tests { let table = conn.create_table("test", batches).execute().await.unwrap(); - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows("my_index") - .await - .unwrap(), - None - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows("my_index") - .await - .unwrap(), - None - ); + assert_eq!(table.index_stats("my_index").await.unwrap(), None); table .create_index(&["embeddings"], Index::Auto) @@ -2797,43 +2742,12 @@ mod tests { assert_eq!(table.name(), "test"); let indices = table.as_native().unwrap().load_indices().await.unwrap(); - let index_uuid = &indices[0].index_uuid; - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows(index_uuid) - .await - .unwrap(), - Some(512) - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows(index_uuid) - .await - .unwrap(), - Some(0) - ); - assert_eq!( - table - .as_native() - .unwrap() - .get_index_type(index_uuid) - .await - .unwrap(), - Some("IVF_PQ".to_string()) - ); - assert_eq!( - table - .as_native() - .unwrap() - .get_distance_type(index_uuid) - .await - .unwrap(), - Some(crate::DistanceType::L2.to_string()) - ); + let index_name = &indices[0].index_name; + let stats = table.index_stats(index_name).await.unwrap().unwrap(); + assert_eq!(stats.num_indexed_rows, 512); + assert_eq!(stats.num_unindexed_rows, 0); + assert_eq!(stats.index_type, crate::index::IndexType::IvfPq); + assert_eq!(stats.distance_type, Some(crate::DistanceType::L2)); } #[tokio::test] @@ -2876,24 +2790,8 @@ mod tests { let table = conn.create_table("test", batches).execute().await.unwrap(); - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows("my_index") - .await - .unwrap(), - None - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows("my_index") - .await - .unwrap(), - None - ); + let stats = table.index_stats("my_index").await.unwrap(); + assert!(stats.is_none()); let index = IvfHnswSqIndexBuilder::default(); table @@ -2911,25 +2809,10 @@ mod tests { assert_eq!(table.name(), "test"); let indices = table.as_native().unwrap().load_indices().await.unwrap(); - let index_uuid = &indices[0].index_uuid; - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows(index_uuid) - .await - .unwrap(), - Some(512) - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows(index_uuid) - .await - .unwrap(), - Some(0) - ); + let index_name = &indices[0].index_name; + let stats = table.index_stats(index_name).await.unwrap().unwrap(); + assert_eq!(stats.num_indexed_rows, 512); + assert_eq!(stats.num_unindexed_rows, 0); } #[tokio::test] @@ -2971,25 +2854,8 @@ mod tests { ); let table = conn.create_table("test", batches).execute().await.unwrap(); - - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows("my_index") - .await - .unwrap(), - None - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows("my_index") - .await - .unwrap(), - None - ); + let stats = table.index_stats("my_index").await.unwrap(); + assert!(stats.is_none()); let index = IvfHnswPqIndexBuilder::default(); table @@ -3006,26 +2872,11 @@ mod tests { assert_eq!(table.count_rows(None).await.unwrap(), 512); assert_eq!(table.name(), "test"); - let indices = table.as_native().unwrap().load_indices().await.unwrap(); - let index_uuid = &indices[0].index_uuid; - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows(index_uuid) - .await - .unwrap(), - Some(512) - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows(index_uuid) - .await - .unwrap(), - Some(0) - ); + let indices: Vec = table.as_native().unwrap().load_indices().await.unwrap(); + let index_name = &indices[0].index_name; + let stats = table.index_stats(index_name).await.unwrap().unwrap(); + assert_eq!(stats.num_indexed_rows, 512); + assert_eq!(stats.num_unindexed_rows, 0); } fn create_fixed_size_list(values: T, list_size: i32) -> Result { @@ -3101,25 +2952,10 @@ mod tests { assert_eq!(index.columns, vec!["i".to_string()]); let indices = table.as_native().unwrap().load_indices().await.unwrap(); - let index_uuid = &indices[0].index_uuid; - assert_eq!( - table - .as_native() - .unwrap() - .count_indexed_rows(index_uuid) - .await - .unwrap(), - Some(1) - ); - assert_eq!( - table - .as_native() - .unwrap() - .count_unindexed_rows(index_uuid) - .await - .unwrap(), - Some(0) - ); + let index_name = &indices[0].index_name; + let stats = table.index_stats(index_name).await.unwrap().unwrap(); + assert_eq!(stats.num_indexed_rows, 1); + assert_eq!(stats.num_unindexed_rows, 0); } #[tokio::test]