feat: remote index stats (#1702)

BREAKING CHANGE: the return value of `index_stats` method has changed
and all `index_stats` APIs now take index name instead of UUID. Also
several deprecated index statistics methods were removed.

* Removes deprecated methods for individual index statistics
* Aligns public `IndexStatistics` struct with API response from LanceDB
Cloud.
* Implements `index_stats` for remote Rust SDK and Python async API.
This commit is contained in:
Will Jones
2024-09-27 12:10:00 -07:00
committed by GitHub
parent c1d9d6f70b
commit f958f4d2e8
16 changed files with 337 additions and 307 deletions

View File

@@ -724,9 +724,9 @@ export interface VectorIndex {
export interface IndexStats {
numIndexedRows: number | null
numUnindexedRows: number | null
indexType: string | null
distanceType: string | null
completedAt: string | null
indexType: string
distanceType?: string
numIndices?: number
}
/**

View File

@@ -112,7 +112,7 @@ export class Query<T = number[]> {
return this
}
/**
/**
* Skip searching un-indexed data. This can make search faster, but will miss
* any data that is not yet indexed.
*/

View File

@@ -17,7 +17,7 @@ import axios, { type AxiosResponse, type ResponseType } from 'axios'
import { tableFromIPC, type Table as ArrowTable } from 'apache-arrow'
import { type RemoteResponse, type RemoteRequest, Method } from '../middleware'
import { MetricType } from '..'
import type { MetricType } from '..'
interface HttpLancedbClientMiddleware {
onRemoteRequest(

View File

@@ -526,8 +526,7 @@ export class RemoteTable<T = number[]> implements Table<T> {
numIndexedRows: body?.num_indexed_rows,
numUnindexedRows: body?.num_unindexed_rows,
indexType: body?.index_type,
distanceType: body?.distance_type,
completedAt: body?.completed_at
distanceType: body?.distance_type
}
}

View File

@@ -888,9 +888,12 @@ describe("LanceDB client", function () {
expect(indices[0].columns).to.have.lengthOf(1);
expect(indices[0].columns[0]).to.equal("vector");
const stats = await table.indexStats(indices[0].uuid);
const stats = await table.indexStats(indices[0].name);
expect(stats.numIndexedRows).to.equal(300);
expect(stats.numUnindexedRows).to.equal(0);
expect(stats.indexType).to.equal("IVF_PQ");
expect(stats.distanceType).to.equal("l2");
expect(stats.numIndices).to.equal(1);
}).timeout(50_000);
});

View File

@@ -479,6 +479,9 @@ describe("When creating an index", () => {
expect(stats).toBeDefined();
expect(stats?.numIndexedRows).toEqual(300);
expect(stats?.numUnindexedRows).toEqual(0);
expect(stats?.distanceType).toBeUndefined();
expect(stats?.indexType).toEqual("BTREE");
expect(stats?.numIndices).toEqual(1);
});
test("when getting stats on non-existent index", async () => {

View File

@@ -32,7 +32,6 @@ export {
ColumnAlteration,
ConnectionOptions,
IndexStatistics,
IndexMetadata,
IndexConfig,
} from "./native.js";

View File

@@ -337,7 +337,7 @@ impl Table {
#[napi(catch_unwind)]
pub async fn index_stats(&self, index_name: String) -> napi::Result<Option<IndexStatistics>> {
let tbl = self.inner_ref()?.as_native().unwrap();
let tbl = self.inner_ref()?;
let stats = tbl.index_stats(&index_name).await.default_error()?;
Ok(stats.map(IndexStatistics::from))
}
@@ -480,32 +480,22 @@ pub struct IndexStatistics {
/// The number of rows not indexed
pub num_unindexed_rows: f64,
/// The type of the index
pub index_type: Option<String>,
/// The metadata for each index
pub indices: Vec<IndexMetadata>,
pub index_type: String,
/// The type of the distance function used by the index. This is only
/// present for vector indices. Scalar and full text search indices do
/// not have a distance function.
pub distance_type: Option<String>,
/// The number of parts this index is split into.
pub num_indices: Option<u32>,
}
impl From<lancedb::index::IndexStatistics> for IndexStatistics {
fn from(value: lancedb::index::IndexStatistics) -> Self {
Self {
num_indexed_rows: value.num_indexed_rows as f64,
num_unindexed_rows: value.num_unindexed_rows as f64,
index_type: value.index_type.map(|t| format!("{:?}", t)),
indices: value.indices.into_iter().map(Into::into).collect(),
}
}
}
#[napi(object)]
pub struct IndexMetadata {
pub metric_type: Option<String>,
pub index_type: Option<String>,
}
impl From<lancedb::index::IndexMetadata> for IndexMetadata {
fn from(value: lancedb::index::IndexMetadata) -> Self {
Self {
metric_type: value.metric_type,
index_type: value.index_type,
index_type: value.index_type.to_string(),
distance_type: value.distance_type.map(|d| d.to_string()),
num_indices: value.num_indices,
}
}
}

View File

@@ -2683,6 +2683,26 @@ class AsyncTable:
"""
return await self._inner.list_indices()
async def index_stats(self, index_name: str) -> Optional[IndexStatistics]:
"""
Retrieve statistics about an index
Parameters
----------
index_name: str
The name of the index to retrieve statistics for
Returns
-------
IndexStatistics or None
The statistics about the index. Returns None if the index does not exist.
"""
stats = await self._inner.index_stats(index_name)
if stats is None:
return None
else:
return IndexStatistics(**stats)
async def uses_v2_manifest_paths(self) -> bool:
"""
Check if the table is using the new v2 manifest paths.
@@ -2713,3 +2733,31 @@ class AsyncTable:
to check if the table is already using the new path style.
"""
await self._inner.migrate_manifest_paths_v2()
@dataclass
class IndexStatistics:
"""
Statistics about an index.
Attributes
----------
num_indexed_rows: int
The number of rows that are covered by this index.
num_unindexed_rows: int
The number of rows that are not covered by this index.
index_type: str
The type of index that was created.
distance_type: Optional[str]
The distance type used by the index.
num_indices: Optional[int]
The number of parts the index is split into.
"""
num_indexed_rows: int
num_unindexed_rows: int
index_type: Literal[
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
]
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
num_indices: Optional[int] = None

View File

@@ -66,6 +66,15 @@ async def test_create_bitmap_index(some_table: AsyncTable):
# TODO: Fix via https://github.com/lancedb/lance/issues/2039
# indices = await some_table.list_indices()
# assert str(indices) == '[Index(Bitmap, columns=["id"])]'
indices = await some_table.list_indices()
assert len(indices) == 1
index_name = indices[0].name
stats = await some_table.index_stats(index_name)
assert stats.index_type == "BITMAP"
assert stats.distance_type is None
assert stats.num_indexed_rows == await some_table.count_rows()
assert stats.num_unindexed_rows == 0
assert stats.num_indices == 1
@pytest.mark.asyncio
@@ -91,6 +100,14 @@ async def test_create_vector_index(some_table: AsyncTable):
assert len(indices) == 1
assert indices[0].index_type == "IvfPq"
assert indices[0].columns == ["vector"]
assert indices[0].name == "vector_idx"
stats = await some_table.index_stats("vector_idx")
assert stats.index_type == "IVF_PQ"
assert stats.distance_type == "l2"
assert stats.num_indexed_rows == await some_table.count_rows()
assert stats.num_unindexed_rows == 0
assert stats.num_indices == 1
@pytest.mark.asyncio

View File

@@ -200,6 +200,8 @@ pub struct IndexConfig {
/// Currently this is always a list of size 1. In the future there may
/// be more columns to represent composite indices.
pub columns: Vec<String>,
/// Name of the index.
pub name: String,
}
#[pymethods]
@@ -215,6 +217,7 @@ impl From<lancedb::index::IndexConfig> for IndexConfig {
Self {
index_type,
columns: value.columns,
name: value.name,
}
}
}

View File

@@ -8,8 +8,8 @@ use lancedb::table::{
use pyo3::{
exceptions::{PyRuntimeError, PyValueError},
pyclass, pymethods,
types::{PyDict, PyString},
Bound, PyAny, PyRef, PyResult, Python,
types::{PyDict, PyDictMethods, PyString},
Bound, PyAny, PyRef, PyResult, Python, ToPyObject,
};
use pyo3_asyncio_0_21::tokio::future_into_py;
@@ -204,6 +204,33 @@ impl Table {
})
}
pub fn index_stats(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
let stats = inner.index_stats(&index_name).await.infer_error()?;
if let Some(stats) = stats {
Python::with_gil(|py| {
let dict = PyDict::new_bound(py);
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
dict.set_item("index_type", stats.index_type.to_string())?;
if let Some(distance_type) = stats.distance_type {
dict.set_item("distance_type", distance_type.to_string())?;
}
if let Some(num_indices) = stats.num_indices {
dict.set_item("num_indices", num_indices)?;
}
Ok(Some(dict.to_object(py)))
})
} else {
Ok(None)
}
})
}
pub fn __repr__(&self) -> String {
match &self.inner {
None => format!("ClosedTable({})", self.name),

View File

@@ -470,49 +470,42 @@ impl JsTable {
Ok(promise)
}
#[allow(deprecated)]
pub(crate) fn js_index_stats(mut cx: FunctionContext) -> JsResult<JsPromise> {
let js_table = cx.this().downcast_or_throw::<JsBox<Self>, _>(&mut cx)?;
let rt = runtime(&mut cx)?;
let (deferred, promise) = cx.promise();
let index_uuid = cx.argument::<JsString>(0)?.value(&mut cx);
let index_name = cx.argument::<JsString>(0)?.value(&mut cx);
let channel = cx.channel();
let table = js_table.table.clone();
rt.spawn(async move {
let load_stats = futures::try_join!(
table.as_native().unwrap().count_indexed_rows(&index_uuid),
table.as_native().unwrap().count_unindexed_rows(&index_uuid)
);
let load_stats = table.index_stats(index_name).await;
deferred.settle_with(&channel, move |mut cx| {
let (indexed_rows, unindexed_rows) = load_stats.or_throw(&mut cx)?;
let stats = load_stats.or_throw(&mut cx)?;
let output = JsObject::new(&mut cx);
if let Some(stats) = stats {
let output = JsObject::new(&mut cx);
let num_indexed_rows = cx.number(stats.num_indexed_rows as f64);
output.set(&mut cx, "numIndexedRows", num_indexed_rows)?;
let num_unindexed_rows = cx.number(stats.num_unindexed_rows as f64);
output.set(&mut cx, "numUnindexedRows", num_unindexed_rows)?;
if let Some(distance_type) = stats.distance_type {
let distance_type = cx.string(distance_type.to_string());
output.set(&mut cx, "distanceType", distance_type)?;
}
let index_type = cx.string(stats.index_type.to_string());
output.set(&mut cx, "indexType", index_type)?;
match indexed_rows {
Some(x) => {
let i = cx.number(x as f64);
output.set(&mut cx, "numIndexedRows", i)?;
if let Some(num_indices) = stats.num_indices {
let num_indices = cx.number(num_indices as f64);
output.set(&mut cx, "numIndices", num_indices)?;
}
None => {
let null = cx.null();
output.set(&mut cx, "numIndexedRows", null)?;
}
};
match unindexed_rows {
Some(x) => {
let i = cx.number(x as f64);
output.set(&mut cx, "numUnindexedRows", i)?;
}
None => {
let null = cx.null();
output.set(&mut cx, "numUnindexedRows", null)?;
}
};
Ok(output)
Ok(output.as_value(&mut cx))
} else {
Ok(JsNull::new(&mut cx).as_value(&mut cx))
}
})
});

View File

@@ -18,7 +18,7 @@ use scalar::FtsIndexBuilder;
use serde::Deserialize;
use serde_with::skip_serializing_none;
use crate::{table::TableInternal, Result};
use crate::{table::TableInternal, DistanceType, Result};
use self::{
scalar::{BTreeIndexBuilder, BitmapIndexBuilder, LabelListIndexBuilder},
@@ -102,19 +102,42 @@ impl IndexBuilder {
}
}
#[derive(Debug, Clone, PartialEq)]
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub enum IndexType {
// Vector
#[serde(alias = "IVF_PQ")]
IvfPq,
#[serde(alias = "IVF_HNSW_PQ")]
IvfHnswPq,
#[serde(alias = "IVF_HNSW_SQ")]
IvfHnswSq,
// Scalar
#[serde(alias = "BTREE")]
BTree,
#[serde(alias = "BITMAP")]
Bitmap,
#[serde(alias = "LABEL_LIST")]
LabelList,
// FTS
FTS,
}
impl std::fmt::Display for IndexType {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::IvfPq => write!(f, "IVF_PQ"),
Self::IvfHnswPq => write!(f, "IVF_HNSW_PQ"),
Self::IvfHnswSq => write!(f, "IVF_HNSW_SQ"),
Self::BTree => write!(f, "BTREE"),
Self::Bitmap => write!(f, "BITMAP"),
Self::LabelList => write!(f, "LABEL_LIST"),
Self::FTS => write!(f, "FTS"),
}
}
}
/// A description of an index currently configured on a column
#[derive(Debug, PartialEq, Clone)]
pub struct IndexConfig {
/// The name of the index
pub name: String,
@@ -129,16 +152,39 @@ pub struct IndexConfig {
#[skip_serializing_none]
#[derive(Debug, Deserialize)]
pub struct IndexMetadata {
pub metric_type: Option<String>,
pub index_type: Option<String>,
pub(crate) struct IndexMetadata {
pub metric_type: Option<DistanceType>,
// Sometimes the index type is provided at this level.
pub index_type: Option<IndexType>,
}
// This struct is used to deserialize the JSON data returned from the Lance API
// Dataset::index_statistics().
#[skip_serializing_none]
#[derive(Debug, Deserialize)]
pub(crate) struct IndexStatisticsImpl {
pub num_indexed_rows: usize,
pub num_unindexed_rows: usize,
pub indices: Vec<IndexMetadata>,
// Sometimes, the index type is provided at this level.
pub index_type: Option<IndexType>,
pub num_indices: Option<u32>,
}
#[skip_serializing_none]
#[derive(Debug, Deserialize)]
#[derive(Debug, Deserialize, PartialEq)]
pub struct IndexStatistics {
/// The number of rows in the table that are covered by this index.
pub num_indexed_rows: usize,
/// The number of rows in the table that are not covered by this index.
/// These are rows that haven't yet been added to the index.
pub num_unindexed_rows: usize,
pub index_type: Option<String>,
pub indices: Vec<IndexMetadata>,
/// The type of the index.
pub index_type: IndexType,
/// The distance type used by the index.
///
/// This is only present for vector indices.
pub distance_type: Option<DistanceType>,
/// The number of parts this index is split into.
pub num_indices: Option<u32>,
}

View File

@@ -1,6 +1,7 @@
use std::sync::{Arc, Mutex};
use crate::index::Index;
use crate::index::IndexStatistics;
use crate::query::Select;
use crate::table::AddDataMode;
use crate::utils::{supported_btree_data_type, supported_vector_data_type};
@@ -523,6 +524,26 @@ impl<S: HttpSend> TableInternal for RemoteTable<S> {
message: "list_indices is not yet supported.".into(),
})
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
let request = self
.client
.post(&format!("/table/{}/index/{}/stats/", self.name, index_name));
let response = self.client.send(request).await?;
if response.status() == StatusCode::NOT_FOUND {
return Ok(None);
}
let response = self.check_table_response(response).await?;
let body = response.text().await?;
let stats = serde_json::from_str(&body).map_err(|e| Error::Http {
message: format!("Failed to parse index statistics: {}", e),
})?;
Ok(Some(stats))
}
async fn table_definition(&self) -> Result<TableDefinition> {
Err(Error::NotSupported {
message: "table_definition is not supported on LanceDB cloud.".into(),
@@ -582,7 +603,7 @@ mod tests {
use reqwest::Body;
use crate::{
index::{vector::IvfPqIndexBuilder, Index},
index::{vector::IvfPqIndexBuilder, Index, IndexStatistics, IndexType},
query::{ExecutableQuery, QueryBase},
DistanceType, Error, Table,
};
@@ -1152,4 +1173,49 @@ mod tests {
table.create_index(&["a"], index).execute().await.unwrap();
}
}
#[tokio::test]
async fn test_index_stats() {
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/table/my_table/index/my_index/stats/"
);
let response_body = serde_json::json!({
"num_indexed_rows": 100000,
"num_unindexed_rows": 0,
"index_type": "IVF_PQ",
"distance_type": "l2"
});
let response_body = serde_json::to_string(&response_body).unwrap();
http::Response::builder()
.status(200)
.body(response_body)
.unwrap()
});
let indices = table.index_stats("my_index").await.unwrap().unwrap();
let expected = IndexStatistics {
num_indexed_rows: 100000,
num_unindexed_rows: 0,
index_type: IndexType::IvfPq,
distance_type: Some(DistanceType::L2),
num_indices: None,
};
assert_eq!(indices, expected);
let table = Table::new_with_handler("my_table", |request| {
assert_eq!(request.method(), "POST");
assert_eq!(
request.url().path(),
"/table/my_table/index/my_index/stats/"
);
http::Response::builder().status(404).body("").unwrap()
});
let indices = table.index_stats("my_index").await.unwrap();
assert!(indices.is_none());
}
}

View File

@@ -47,7 +47,6 @@ use lance_index::IndexType;
use lance_table::io::commit::ManifestNamingScheme;
use log::info;
use serde::{Deserialize, Serialize};
use snafu::whatever;
use crate::arrow::IntoArrow;
use crate::connection::NoData;
@@ -58,12 +57,12 @@ use crate::index::vector::{
suggested_num_partitions_for_hnsw, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
IvfPqIndexBuilder, VectorIndex,
};
use crate::index::IndexConfig;
use crate::index::IndexStatistics;
use crate::index::{
vector::{suggested_num_partitions, suggested_num_sub_vectors},
Index, IndexBuilder,
};
use crate::index::{IndexConfig, IndexStatisticsImpl};
use crate::query::{
IntoQueryVector, Query, QueryExecutionOptions, Select, VectorQuery, DEFAULT_TOP_K,
};
@@ -405,6 +404,7 @@ pub(crate) trait TableInternal: std::fmt::Display + std::fmt::Debug + Send + Syn
async fn update(&self, update: UpdateBuilder) -> Result<u64>;
async fn create_index(&self, index: IndexBuilder) -> Result<()>;
async fn list_indices(&self) -> Result<Vec<IndexConfig>>;
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>>;
async fn merge_insert(
&self,
params: MergeInsertBuilder,
@@ -962,6 +962,15 @@ impl Table {
pub fn dataset_uri(&self) -> &str {
self.inner.dataset_uri()
}
/// Get statistics about an index.
/// Returns None if the index does not exist.
pub async fn index_stats(
&self,
index_name: impl AsRef<str>,
) -> Result<Option<IndexStatistics>> {
self.inner.index_stats(index_name.as_ref()).await
}
}
impl From<NativeTable> for Table {
@@ -1250,91 +1259,6 @@ impl NativeTable {
.await)
}
#[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")]
pub async fn count_indexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
#[allow(deprecated)]
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.num_indexed_rows)),
None => Ok(None),
}
}
#[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")]
pub async fn count_unindexed_rows(&self, index_uuid: &str) -> Result<Option<usize>> {
#[allow(deprecated)]
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.num_unindexed_rows)),
None => Ok(None),
}
}
#[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")]
pub async fn get_index_type(&self, index_uuid: &str) -> Result<Option<String>> {
#[allow(deprecated)]
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(stats.index_type.unwrap_or_default())),
None => Ok(None),
}
}
#[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")]
pub async fn get_distance_type(&self, index_uuid: &str) -> Result<Option<String>> {
#[allow(deprecated)]
match self.load_index_stats(index_uuid).await? {
Some(stats) => Ok(Some(
stats
.indices
.iter()
.filter_map(|i| i.metric_type.clone())
.collect(),
)),
None => Ok(None),
}
}
#[deprecated(since = "0.5.2", note = "Please use `index_stats` instead")]
pub async fn load_index_stats(&self, index_uuid: &str) -> Result<Option<IndexStatistics>> {
let index = self
.load_indices()
.await?
.into_iter()
.find(|i| i.index_uuid == index_uuid);
if index.is_none() {
return Ok(None);
}
let dataset = self.dataset.get().await?;
let index_stats = dataset.index_statistics(&index.unwrap().index_name).await?;
let index_stats: IndexStatistics = whatever!(
serde_json::from_str(&index_stats),
"error deserializing index statistics {index_stats}",
);
Ok(Some(index_stats))
}
/// Get statistics about an index.
/// Returns an error if the index does not exist.
pub async fn index_stats(
&self,
index_name: impl AsRef<str>,
) -> Result<Option<IndexStatistics>> {
let stats = match self
.dataset
.get()
.await?
.index_statistics(index_name.as_ref())
.await
{
Ok(stats) => stats,
Err(lance::error::Error::IndexNotFound { .. }) => return Ok(None),
Err(e) => return Err(Error::from(e)),
};
serde_json::from_str(&stats).map_err(|e| Error::InvalidInput {
message: format!("error deserializing index statistics: {}", e),
})
}
pub async fn load_indices(&self) -> Result<Vec<VectorIndex>> {
let dataset = self.dataset.get().await?;
let (indices, mf) = futures::try_join!(dataset.load_indices(), dataset.latest_manifest())?;
@@ -2126,6 +2050,44 @@ impl TableInternal for NativeTable {
fn dataset_uri(&self) -> &str {
self.uri.as_str()
}
async fn index_stats(&self, index_name: &str) -> Result<Option<IndexStatistics>> {
let stats = match self
.dataset
.get()
.await?
.index_statistics(index_name.as_ref())
.await
{
Ok(stats) => stats,
Err(lance::error::Error::IndexNotFound { .. }) => return Ok(None),
Err(e) => return Err(Error::from(e)),
};
let mut stats: IndexStatisticsImpl =
serde_json::from_str(&stats).map_err(|e| Error::InvalidInput {
message: format!("error deserializing index statistics: {}", e),
})?;
let first_index = stats.indices.pop().ok_or_else(|| Error::InvalidInput {
message: "index statistics is empty".to_string(),
})?;
// Index type should be present at one of the levels.
let index_type =
stats
.index_type
.or(first_index.index_type)
.ok_or_else(|| Error::InvalidInput {
message: "index statistics was missing index type".to_string(),
})?;
Ok(Some(IndexStatistics {
num_indexed_rows: stats.num_indexed_rows,
num_unindexed_rows: stats.num_unindexed_rows,
index_type,
distance_type: first_index.metric_type,
num_indices: stats.num_indices,
}))
}
}
#[cfg(test)]
@@ -2763,24 +2725,7 @@ mod tests {
let table = conn.create_table("test", batches).execute().await.unwrap();
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows("my_index")
.await
.unwrap(),
None
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows("my_index")
.await
.unwrap(),
None
);
assert_eq!(table.index_stats("my_index").await.unwrap(), None);
table
.create_index(&["embeddings"], Index::Auto)
@@ -2797,43 +2742,12 @@ mod tests {
assert_eq!(table.name(), "test");
let indices = table.as_native().unwrap().load_indices().await.unwrap();
let index_uuid = &indices[0].index_uuid;
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows(index_uuid)
.await
.unwrap(),
Some(512)
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows(index_uuid)
.await
.unwrap(),
Some(0)
);
assert_eq!(
table
.as_native()
.unwrap()
.get_index_type(index_uuid)
.await
.unwrap(),
Some("IVF_PQ".to_string())
);
assert_eq!(
table
.as_native()
.unwrap()
.get_distance_type(index_uuid)
.await
.unwrap(),
Some(crate::DistanceType::L2.to_string())
);
let index_name = &indices[0].index_name;
let stats = table.index_stats(index_name).await.unwrap().unwrap();
assert_eq!(stats.num_indexed_rows, 512);
assert_eq!(stats.num_unindexed_rows, 0);
assert_eq!(stats.index_type, crate::index::IndexType::IvfPq);
assert_eq!(stats.distance_type, Some(crate::DistanceType::L2));
}
#[tokio::test]
@@ -2876,24 +2790,8 @@ mod tests {
let table = conn.create_table("test", batches).execute().await.unwrap();
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows("my_index")
.await
.unwrap(),
None
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows("my_index")
.await
.unwrap(),
None
);
let stats = table.index_stats("my_index").await.unwrap();
assert!(stats.is_none());
let index = IvfHnswSqIndexBuilder::default();
table
@@ -2911,25 +2809,10 @@ mod tests {
assert_eq!(table.name(), "test");
let indices = table.as_native().unwrap().load_indices().await.unwrap();
let index_uuid = &indices[0].index_uuid;
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows(index_uuid)
.await
.unwrap(),
Some(512)
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows(index_uuid)
.await
.unwrap(),
Some(0)
);
let index_name = &indices[0].index_name;
let stats = table.index_stats(index_name).await.unwrap().unwrap();
assert_eq!(stats.num_indexed_rows, 512);
assert_eq!(stats.num_unindexed_rows, 0);
}
#[tokio::test]
@@ -2971,25 +2854,8 @@ mod tests {
);
let table = conn.create_table("test", batches).execute().await.unwrap();
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows("my_index")
.await
.unwrap(),
None
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows("my_index")
.await
.unwrap(),
None
);
let stats = table.index_stats("my_index").await.unwrap();
assert!(stats.is_none());
let index = IvfHnswPqIndexBuilder::default();
table
@@ -3006,26 +2872,11 @@ mod tests {
assert_eq!(table.count_rows(None).await.unwrap(), 512);
assert_eq!(table.name(), "test");
let indices = table.as_native().unwrap().load_indices().await.unwrap();
let index_uuid = &indices[0].index_uuid;
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows(index_uuid)
.await
.unwrap(),
Some(512)
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows(index_uuid)
.await
.unwrap(),
Some(0)
);
let indices: Vec<VectorIndex> = table.as_native().unwrap().load_indices().await.unwrap();
let index_name = &indices[0].index_name;
let stats = table.index_stats(index_name).await.unwrap().unwrap();
assert_eq!(stats.num_indexed_rows, 512);
assert_eq!(stats.num_unindexed_rows, 0);
}
fn create_fixed_size_list<T: Array>(values: T, list_size: i32) -> Result<FixedSizeListArray> {
@@ -3101,25 +2952,10 @@ mod tests {
assert_eq!(index.columns, vec!["i".to_string()]);
let indices = table.as_native().unwrap().load_indices().await.unwrap();
let index_uuid = &indices[0].index_uuid;
assert_eq!(
table
.as_native()
.unwrap()
.count_indexed_rows(index_uuid)
.await
.unwrap(),
Some(1)
);
assert_eq!(
table
.as_native()
.unwrap()
.count_unindexed_rows(index_uuid)
.await
.unwrap(),
Some(0)
);
let index_name = &indices[0].index_name;
let stats = table.index_stats(index_name).await.unwrap().unwrap();
assert_eq!(stats.num_indexed_rows, 1);
assert_eq!(stats.num_unindexed_rows, 0);
}
#[tokio::test]