mirror of
https://github.com/lancedb/lancedb.git
synced 2026-05-14 10:30:40 +00:00
feat: remote index stats (#1702)
BREAKING CHANGE: the return value of `index_stats` method has changed and all `index_stats` APIs now take index name instead of UUID. Also several deprecated index statistics methods were removed. * Removes deprecated methods for individual index statistics * Aligns public `IndexStatistics` struct with API response from LanceDB Cloud. * Implements `index_stats` for remote Rust SDK and Python async API.
This commit is contained in:
@@ -2683,6 +2683,26 @@ class AsyncTable:
|
||||
"""
|
||||
return await self._inner.list_indices()
|
||||
|
||||
async def index_stats(self, index_name: str) -> Optional[IndexStatistics]:
|
||||
"""
|
||||
Retrieve statistics about an index
|
||||
|
||||
Parameters
|
||||
----------
|
||||
index_name: str
|
||||
The name of the index to retrieve statistics for
|
||||
|
||||
Returns
|
||||
-------
|
||||
IndexStatistics or None
|
||||
The statistics about the index. Returns None if the index does not exist.
|
||||
"""
|
||||
stats = await self._inner.index_stats(index_name)
|
||||
if stats is None:
|
||||
return None
|
||||
else:
|
||||
return IndexStatistics(**stats)
|
||||
|
||||
async def uses_v2_manifest_paths(self) -> bool:
|
||||
"""
|
||||
Check if the table is using the new v2 manifest paths.
|
||||
@@ -2713,3 +2733,31 @@ class AsyncTable:
|
||||
to check if the table is already using the new path style.
|
||||
"""
|
||||
await self._inner.migrate_manifest_paths_v2()
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexStatistics:
|
||||
"""
|
||||
Statistics about an index.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
num_indexed_rows: int
|
||||
The number of rows that are covered by this index.
|
||||
num_unindexed_rows: int
|
||||
The number of rows that are not covered by this index.
|
||||
index_type: str
|
||||
The type of index that was created.
|
||||
distance_type: Optional[str]
|
||||
The distance type used by the index.
|
||||
num_indices: Optional[int]
|
||||
The number of parts the index is split into.
|
||||
"""
|
||||
|
||||
num_indexed_rows: int
|
||||
num_unindexed_rows: int
|
||||
index_type: Literal[
|
||||
"IVF_PQ", "IVF_HNSW_PQ", "IVF_HNSW_SQ", "FTS", "BTREE", "BITMAP", "LABEL_LIST"
|
||||
]
|
||||
distance_type: Optional[Literal["l2", "cosine", "dot"]] = None
|
||||
num_indices: Optional[int] = None
|
||||
|
||||
@@ -66,6 +66,15 @@ async def test_create_bitmap_index(some_table: AsyncTable):
|
||||
# TODO: Fix via https://github.com/lancedb/lance/issues/2039
|
||||
# indices = await some_table.list_indices()
|
||||
# assert str(indices) == '[Index(Bitmap, columns=["id"])]'
|
||||
indices = await some_table.list_indices()
|
||||
assert len(indices) == 1
|
||||
index_name = indices[0].name
|
||||
stats = await some_table.index_stats(index_name)
|
||||
assert stats.index_type == "BITMAP"
|
||||
assert stats.distance_type is None
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -91,6 +100,14 @@ async def test_create_vector_index(some_table: AsyncTable):
|
||||
assert len(indices) == 1
|
||||
assert indices[0].index_type == "IvfPq"
|
||||
assert indices[0].columns == ["vector"]
|
||||
assert indices[0].name == "vector_idx"
|
||||
|
||||
stats = await some_table.index_stats("vector_idx")
|
||||
assert stats.index_type == "IVF_PQ"
|
||||
assert stats.distance_type == "l2"
|
||||
assert stats.num_indexed_rows == await some_table.count_rows()
|
||||
assert stats.num_unindexed_rows == 0
|
||||
assert stats.num_indices == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user