mirror of
https://github.com/lancedb/lancedb.git
synced 2026-01-08 12:52:58 +00:00
feat: remote index stats (#1702)
BREAKING CHANGE: the return value of `index_stats` method has changed and all `index_stats` APIs now take index name instead of UUID. Also several deprecated index statistics methods were removed. * Removes deprecated methods for individual index statistics * Aligns public `IndexStatistics` struct with API response from LanceDB Cloud. * Implements `index_stats` for remote Rust SDK and Python async API.
This commit is contained in:
@@ -2683,6 +2683,26 @@ class AsyncTable:
|
||||
"""
|
||||
return await self._inner.list_indices()
|
||||
|
||||
async def index_stats(self, index_name: str) -> Optional[IndexStatistics]:
|
||||
"""
|
||||
Retrieve statistics about an index
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index_name: str
|
||||
The name of the index to retrieve statistics for
|
||||
|
||||
Returns
|
||||
-------
|
||||
IndexStatistics or None
|
||||
The statistics about the index. Returns None if the index does not exist.
|
||||
"""
|
||||
stats = await self._inner.index_stats(index_name)
|
||||
if stats is None:
|
||||
return None
|
||||
else:
|
||||
return IndexStatistics(**stats)
|
||||
|
||||
async def uses_v2_manifest_paths(self) -> bool:
|
||||
"""
|
||||
Check if the table is using the new v2 manifest paths.
|
||||
@@ -2713,3 +2733,31 @@ class AsyncTable:
|
||||
to check if the table is already using the new path style.
|
||||
"""
|
||||
await self._inner.migrate_manifest_paths_v2()
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexStatistics:
|
||||
"""
|
||||
Statistics about an index.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
num_indexed_rows: int
|
||||
The number of rows that are covered by this index.
|
||||
num_unindexed_rows: int
|
||||
The number of rows that are not covered by this index.
|
||||
index_type: str
|
||||
The type of index that was created.
|
||||
distance_type: Optional[str]
|
||||
The distance type used by the index.
|
||||
num_indices: Optional[int]
|
||||
The number of parts the index is split into.
|
||||
"""
|
||||
|
||||
num_indexed_rows: int
|
||||
num_unindexed_rows: int
|
||||
index_type: Literal[
|
||||
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
|
||||
]
|
||||
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
||||
num_indices: Optional[int] = None
|
||||
|
||||
@@ -66,6 +66,15 @@ async def test_create_bitmap_index(some_table: AsyncTable):
|
||||
# TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
# indices = await some_table.list_indices()
|
||||
# assert str(indices) == '[Index(Bitmap, columns=["id"])]'
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
index_name = indices[0].name
|
||||
stats = await some_table.index_stats(index_name)
|
||||
assert stats.index_type == "BITMAP"
|
||||
assert stats.distance_type is None
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -91,6 +100,14 @@ async def test_create_vector_index(some_table: AsyncTable):
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type == "IvfPq"
|
||||
assert indices[0].columns == ["vector"]
|
||||
assert indices[0].name == "vector_idx"
|
||||
|
||||
stats = await some_table.index_stats("vector_idx")
|
||||
assert stats.index_type == "IVF_PQ"
|
||||
assert stats.distance_type == "l2"
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@@ -200,6 +200,8 @@ pub struct IndexConfig {
|
||||
/// Currently this is always a list of size 1. In the future there may
|
||||
/// be more columns to represent composite indices.
|
||||
pub columns: Vec<String>,
|
||||
/// Name of the index.
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
@@ -215,6 +217,7 @@ impl From<lancedb::index::IndexConfig> for IndexConfig {
|
||||
Self {
|
||||
index_type,
|
||||
columns: value.columns,
|
||||
name: value.name,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,8 +8,8 @@ use lancedb::table::{
|
||||
use pyo3::{
|
||||
exceptions::{PyRuntimeError, PyValueError},
|
||||
pyclass, pymethods,
|
||||
types::{PyDict, PyString},
|
||||
Bound, PyAny, PyRef, PyResult, Python,
|
||||
types::{PyDict, PyDictMethods, PyString},
|
||||
Bound, PyAny, PyRef, PyResult, Python, ToPyObject,
|
||||
};
|
||||
use pyo3_asyncio_0_21::tokio::future_into_py;
|
||||
|
||||
@@ -204,6 +204,33 @@ impl Table {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn index_stats(self_: PyRef<'_, Self>, index_name: String) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
let stats = inner.index_stats(&index_name).await.infer_error()?;
|
||||
if let Some(stats) = stats {
|
||||
Python::with_gil(|py| {
|
||||
let dict = PyDict::new_bound(py);
|
||||
dict.set_item("num_indexed_rows", stats.num_indexed_rows)?;
|
||||
dict.set_item("num_unindexed_rows", stats.num_unindexed_rows)?;
|
||||
dict.set_item("index_type", stats.index_type.to_string())?;
|
||||
|
||||
if let Some(distance_type) = stats.distance_type {
|
||||
dict.set_item("distance_type", distance_type.to_string())?;
|
||||
}
|
||||
|
||||
if let Some(num_indices) = stats.num_indices {
|
||||
dict.set_item("num_indices", num_indices)?;
|
||||
}
|
||||
|
||||
Ok(Some(dict.to_object(py)))
|
||||
})
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn __repr__(&self) -> String {
|
||||
match &self.inner {
|
||||
None => format!("ClosedTable({})", self.name),
|
||||
|
||||
Reference in New Issue
Block a user