feat(bindings): expose new IndexConfig fields in Python and Node.js (#3534)

## Summary

Surfaces the rich per-index metadata added in #3497 to the Python and
Node.js language bindings. Closes #3495.

New optional fields exposed on `IndexConfig` in both bindings:

- `index_uuid` / `indexUuid` — UUID of the first index segment
- `type_url` / `typeUrl` — protobuf type URL for the index
- `created_at` / `createdAt` — creation timestamp (milliseconds since
Unix epoch)
- `num_indexed_rows` / `numIndexedRows` — rows covered by the index
- `num_unindexed_rows` / `numUnindexedRows` — rows not yet indexed
- `size_bytes` / `sizeBytes` — total index file size in bytes
- `num_segments` / `numSegments` — number of index segments
- `index_version` / `indexVersion` — on-disk format version
- `index_details` / `indexDetails` — type-specific JSON details string

All fields are `None`/`undefined` for remote tables (which don't yet
surface this metadata through the server response).

## Changes

- `python/src/index.rs`: extend `IndexConfig` pyclass; update `From`
impl; update `__getitem__`
- `python/python/lancedb/_lancedb.pyi`: add type hints for new fields
- `python/python/tests/test_table.py`: new `test_index_config_fields`
test
- `nodejs/src/table.rs`: extend `IndexConfig` napi struct; update `From`
impl
- `nodejs/__test__/table.test.ts`: new test; update existing `toEqual`
assertions to `expect.objectContaining` to accommodate new fields

## Test plan

- [x] Python: `uv run --extra tests pytest
python/tests/test_table.py::test_index_config_fields`
- [x] Node.js: `pnpm test __test__/table.test.ts`

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Will Jones
2026-06-11 13:37:39 -07:00
committed by GitHub
parent 40f3e22600
commit f8caef3aca
10 changed files with 359 additions and 45 deletions

View File

@@ -26,7 +26,8 @@ lance-namespace-impls.workspace = true
lance-io.workspace = true
env_logger.workspace = true
log.workspace = true
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39", "chrono"] }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
pyo3-async-runtimes = { version = "0.28", features = [
"attributes",
"tokio-runtime",

View File

@@ -1,4 +1,4 @@
from datetime import timedelta
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
import pyarrow as pa
@@ -259,6 +259,15 @@ class IndexConfig:
name: str
index_type: str
columns: List[str]
index_uuid: Optional[str]
type_url: Optional[str]
created_at: Optional[datetime]
num_indexed_rows: Optional[int]
num_unindexed_rows: Optional[int]
size_bytes: Optional[int]
num_segments: Optional[int]
index_version: Optional[int]
index_details: Optional[Any]
async def connect(
uri: str,

View File

@@ -2566,6 +2566,55 @@ def test_create_index_nested_field_paths(mem_db: DBConnection):
assert fts_results[0]["payload"]["text"] == "document 44"
def test_index_config_fields(mem_db: DBConnection):
"""Test that IndexConfig exposes the new rich metadata fields."""
vec_array = pa.array(
[[float(i), float(i + 1)] for i in range(300)], pa.list_(pa.float32(), 2)
)
data = pa.Table.from_pydict({"x": list(range(300)), "vector": vec_array})
table = mem_db.create_table("index_config_fields", data=data)
table.create_scalar_index("x", index_type="BTREE")
table.create_index(
vector_column_name="vector",
num_partitions=1,
num_sub_vectors=1,
)
indices = {idx.name: idx for idx in table.list_indices()}
scalar_idx = indices["x_idx"]
assert scalar_idx.index_uuid is not None
assert isinstance(scalar_idx.index_uuid, str)
assert scalar_idx.num_indexed_rows is not None
assert scalar_idx.num_indexed_rows == 300
assert scalar_idx.num_unindexed_rows is not None
assert scalar_idx.num_unindexed_rows == 0
assert scalar_idx.num_segments is not None
assert scalar_idx.num_segments >= 1
assert scalar_idx.size_bytes is not None
assert scalar_idx.size_bytes > 0
assert scalar_idx.created_at is not None
from datetime import datetime, timezone
assert isinstance(scalar_idx.created_at, datetime)
assert scalar_idx.created_at.tzinfo == timezone.utc
# __getitem__ compatibility
assert scalar_idx["index_uuid"] == scalar_idx.index_uuid
assert scalar_idx["num_indexed_rows"] == scalar_idx.num_indexed_rows
assert scalar_idx["created_at"] == scalar_idx.created_at
# index_details is parsed from JSON into a Python object
assert scalar_idx.index_details is not None
assert isinstance(scalar_idx.index_details, dict)
assert scalar_idx["index_details"] == scalar_idx.index_details
vector_idx = indices["vector_idx"]
assert vector_idx.index_uuid is not None
assert vector_idx.num_indexed_rows == 300
assert isinstance(vector_idx.index_details, dict)
def test_empty_query(mem_db: DBConnection):
table = mem_db.create_table(
"my_table",

View File

@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use chrono::{DateTime, Utc};
use lancedb::index::vector::{
IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
IvfPqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
@@ -12,7 +13,7 @@ use lancedb::index::{
use pyo3::IntoPyObject;
use pyo3::types::PyStringMethods;
use pyo3::{
Bound, FromPyObject, PyAny, PyResult, Python,
Bound, FromPyObject, Py, PyAny, PyResult, Python,
exceptions::{PyKeyError, PyValueError},
intern, pyclass, pymethods,
types::{PyAnyMethods, PyString},
@@ -294,6 +295,26 @@ pub struct IndexConfig {
pub columns: Vec<String>,
/// Name of the index.
pub name: String,
/// The UUID of the first segment of the index.
pub index_uuid: Option<String>,
/// The protobuf type URL, a precise type identifier for the index.
pub type_url: Option<String>,
/// When the index was created.
pub created_at: Option<DateTime<Utc>>,
/// The number of rows indexed, across all segments.
pub num_indexed_rows: Option<u64>,
/// The number of rows not yet covered by this index.
pub num_unindexed_rows: Option<u64>,
/// The total size in bytes of all index files across all segments.
pub size_bytes: Option<u64>,
/// The number of segments that make up the index.
pub num_segments: Option<u32>,
/// The on-disk index format version.
pub index_version: Option<i32>,
/// Index-type-specific details parsed as a Python object (dict, list, etc.).
///
/// Falls back to a raw string if JSON parsing fails. `None` when unavailable.
pub index_details: Option<Py<PyAny>>,
}
#[pymethods]
@@ -312,18 +333,49 @@ impl IndexConfig {
"index_type" => Ok(self.index_type.clone().into_pyobject(py)?.into_any()),
"columns" => Ok(self.columns.clone().into_pyobject(py)?.into_any()),
"name" | "index_name" => Ok(self.name.clone().into_pyobject(py)?.into_any()),
"index_uuid" => Ok(self.index_uuid.clone().into_pyobject(py)?.into_any()),
"type_url" => Ok(self.type_url.clone().into_pyobject(py)?.into_any()),
"created_at" => Ok(self.created_at.into_pyobject(py)?.into_any()),
"num_indexed_rows" => Ok(self.num_indexed_rows.into_pyobject(py)?.into_any()),
"num_unindexed_rows" => Ok(self.num_unindexed_rows.into_pyobject(py)?.into_any()),
"size_bytes" => Ok(self.size_bytes.into_pyobject(py)?.into_any()),
"num_segments" => Ok(self.num_segments.into_pyobject(py)?.into_any()),
"index_version" => Ok(self.index_version.into_pyobject(py)?.into_any()),
"index_details" => Ok(self
.index_details
.as_ref()
.map(|obj| obj.clone_ref(py))
.into_pyobject(py)?
.into_any()),
_ => Err(PyKeyError::new_err(format!("Invalid key: {}", key))),
}
}
}
impl From<lancedb::index::IndexConfig> for IndexConfig {
fn from(value: lancedb::index::IndexConfig) -> Self {
fn parse_index_details(py: Python<'_>, s: String) -> Py<PyAny> {
let json = py.import("json").expect("json module is always available");
match json.call_method1("loads", (s.as_str(),)) {
Ok(obj) => obj.into_any().unbind(),
Err(_) => s.into_pyobject(py).unwrap().into_any().unbind(),
}
}
impl IndexConfig {
pub fn from_lancedb(py: Python<'_>, value: lancedb::index::IndexConfig) -> Self {
let index_type = format!("{:?}", value.index_type);
Self {
index_type,
columns: value.columns,
name: value.name,
index_uuid: value.index_uuid,
type_url: value.type_url,
created_at: value.created_at,
num_indexed_rows: value.num_indexed_rows,
num_unindexed_rows: value.num_unindexed_rows,
size_bytes: value.size_bytes,
num_segments: value.num_segments,
index_version: value.index_version,
index_details: value.index_details.map(|s| parse_index_details(py, s)),
}
}
}

View File

@@ -694,13 +694,13 @@ impl Table {
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
let inner = self_.inner_ref()?.clone();
future_into_py(self_.py(), async move {
Ok(inner
.list_indices()
.await
.infer_error()?
.into_iter()
.map(IndexConfig::from)
.collect::<Vec<_>>())
let indices = inner.list_indices().await.infer_error()?;
Python::attach(|py| {
Ok(indices
.into_iter()
.map(|idx| IndexConfig::from_lancedb(py, idx))
.collect::<Vec<_>>())
})
})
}