mirror of
https://github.com/lancedb/lancedb.git
synced 2026-07-04 03:20:40 +00:00
feat(bindings): expose new IndexConfig fields in Python and Node.js (#3534)
## Summary Surfaces the rich per-index metadata added in #3497 to the Python and Node.js language bindings. Closes #3495. New optional fields exposed on `IndexConfig` in both bindings: - `index_uuid` / `indexUuid` — UUID of the first index segment - `type_url` / `typeUrl` — protobuf type URL for the index - `created_at` / `createdAt` — creation timestamp (milliseconds since Unix epoch) - `num_indexed_rows` / `numIndexedRows` — rows covered by the index - `num_unindexed_rows` / `numUnindexedRows` — rows not yet indexed - `size_bytes` / `sizeBytes` — total index file size in bytes - `num_segments` / `numSegments` — number of index segments - `index_version` / `indexVersion` — on-disk format version - `index_details` / `indexDetails` — type-specific JSON details string All fields are `None`/`undefined` for remote tables (which don't yet surface this metadata through the server response). ## Changes - `python/src/index.rs`: extend `IndexConfig` pyclass; update `From` impl; update `__getitem__` - `python/python/lancedb/_lancedb.pyi`: add type hints for new fields - `python/python/tests/test_table.py`: new `test_index_config_fields` test - `nodejs/src/table.rs`: extend `IndexConfig` napi struct; update `From` impl - `nodejs/__test__/table.test.ts`: new test; update existing `toEqual` assertions to `expect.objectContaining` to accommodate new fields ## Test plan - [x] Python: `uv run --extra tests pytest python/tests/test_table.py::test_index_config_fields` - [x] Node.js: `pnpm test __test__/table.test.ts` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,8 @@ lance-namespace-impls.workspace = true
|
||||
lance-io.workspace = true
|
||||
env_logger.workspace = true
|
||||
log.workspace = true
|
||||
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39"] }
|
||||
pyo3 = { version = "0.28", features = ["extension-module", "abi3-py39", "chrono"] }
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
pyo3-async-runtimes = { version = "0.28", features = [
|
||||
"attributes",
|
||||
"tokio-runtime",
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from datetime import timedelta
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple, Any, TypedDict, Union, Literal
|
||||
|
||||
import pyarrow as pa
|
||||
@@ -259,6 +259,15 @@ class IndexConfig:
|
||||
name: str
|
||||
index_type: str
|
||||
columns: List[str]
|
||||
index_uuid: Optional[str]
|
||||
type_url: Optional[str]
|
||||
created_at: Optional[datetime]
|
||||
num_indexed_rows: Optional[int]
|
||||
num_unindexed_rows: Optional[int]
|
||||
size_bytes: Optional[int]
|
||||
num_segments: Optional[int]
|
||||
index_version: Optional[int]
|
||||
index_details: Optional[Any]
|
||||
|
||||
async def connect(
|
||||
uri: str,
|
||||
|
||||
@@ -2566,6 +2566,55 @@ def test_create_index_nested_field_paths(mem_db: DBConnection):
|
||||
assert fts_results[0]["payload"]["text"] == "document 44"
|
||||
|
||||
|
||||
def test_index_config_fields(mem_db: DBConnection):
|
||||
"""Test that IndexConfig exposes the new rich metadata fields."""
|
||||
vec_array = pa.array(
|
||||
[[float(i), float(i + 1)] for i in range(300)], pa.list_(pa.float32(), 2)
|
||||
)
|
||||
data = pa.Table.from_pydict({"x": list(range(300)), "vector": vec_array})
|
||||
table = mem_db.create_table("index_config_fields", data=data)
|
||||
table.create_scalar_index("x", index_type="BTREE")
|
||||
table.create_index(
|
||||
vector_column_name="vector",
|
||||
num_partitions=1,
|
||||
num_sub_vectors=1,
|
||||
)
|
||||
|
||||
indices = {idx.name: idx for idx in table.list_indices()}
|
||||
|
||||
scalar_idx = indices["x_idx"]
|
||||
assert scalar_idx.index_uuid is not None
|
||||
assert isinstance(scalar_idx.index_uuid, str)
|
||||
assert scalar_idx.num_indexed_rows is not None
|
||||
assert scalar_idx.num_indexed_rows == 300
|
||||
assert scalar_idx.num_unindexed_rows is not None
|
||||
assert scalar_idx.num_unindexed_rows == 0
|
||||
assert scalar_idx.num_segments is not None
|
||||
assert scalar_idx.num_segments >= 1
|
||||
assert scalar_idx.size_bytes is not None
|
||||
assert scalar_idx.size_bytes > 0
|
||||
assert scalar_idx.created_at is not None
|
||||
from datetime import datetime, timezone
|
||||
|
||||
assert isinstance(scalar_idx.created_at, datetime)
|
||||
assert scalar_idx.created_at.tzinfo == timezone.utc
|
||||
|
||||
# __getitem__ compatibility
|
||||
assert scalar_idx["index_uuid"] == scalar_idx.index_uuid
|
||||
assert scalar_idx["num_indexed_rows"] == scalar_idx.num_indexed_rows
|
||||
assert scalar_idx["created_at"] == scalar_idx.created_at
|
||||
|
||||
# index_details is parsed from JSON into a Python object
|
||||
assert scalar_idx.index_details is not None
|
||||
assert isinstance(scalar_idx.index_details, dict)
|
||||
assert scalar_idx["index_details"] == scalar_idx.index_details
|
||||
|
||||
vector_idx = indices["vector_idx"]
|
||||
assert vector_idx.index_uuid is not None
|
||||
assert vector_idx.num_indexed_rows == 300
|
||||
assert isinstance(vector_idx.index_details, dict)
|
||||
|
||||
|
||||
def test_empty_query(mem_db: DBConnection):
|
||||
table = mem_db.create_table(
|
||||
"my_table",
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use lancedb::index::vector::{
|
||||
IvfFlatIndexBuilder, IvfHnswFlatIndexBuilder, IvfHnswPqIndexBuilder, IvfHnswSqIndexBuilder,
|
||||
IvfPqIndexBuilder, IvfRqIndexBuilder, IvfSqIndexBuilder,
|
||||
@@ -12,7 +13,7 @@ use lancedb::index::{
|
||||
use pyo3::IntoPyObject;
|
||||
use pyo3::types::PyStringMethods;
|
||||
use pyo3::{
|
||||
Bound, FromPyObject, PyAny, PyResult, Python,
|
||||
Bound, FromPyObject, Py, PyAny, PyResult, Python,
|
||||
exceptions::{PyKeyError, PyValueError},
|
||||
intern, pyclass, pymethods,
|
||||
types::{PyAnyMethods, PyString},
|
||||
@@ -294,6 +295,26 @@ pub struct IndexConfig {
|
||||
pub columns: Vec<String>,
|
||||
/// Name of the index.
|
||||
pub name: String,
|
||||
/// The UUID of the first segment of the index.
|
||||
pub index_uuid: Option<String>,
|
||||
/// The protobuf type URL, a precise type identifier for the index.
|
||||
pub type_url: Option<String>,
|
||||
/// When the index was created.
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
/// The number of rows indexed, across all segments.
|
||||
pub num_indexed_rows: Option<u64>,
|
||||
/// The number of rows not yet covered by this index.
|
||||
pub num_unindexed_rows: Option<u64>,
|
||||
/// The total size in bytes of all index files across all segments.
|
||||
pub size_bytes: Option<u64>,
|
||||
/// The number of segments that make up the index.
|
||||
pub num_segments: Option<u32>,
|
||||
/// The on-disk index format version.
|
||||
pub index_version: Option<i32>,
|
||||
/// Index-type-specific details parsed as a Python object (dict, list, etc.).
|
||||
///
|
||||
/// Falls back to a raw string if JSON parsing fails. `None` when unavailable.
|
||||
pub index_details: Option<Py<PyAny>>,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
@@ -312,18 +333,49 @@ impl IndexConfig {
|
||||
"index_type" => Ok(self.index_type.clone().into_pyobject(py)?.into_any()),
|
||||
"columns" => Ok(self.columns.clone().into_pyobject(py)?.into_any()),
|
||||
"name" | "index_name" => Ok(self.name.clone().into_pyobject(py)?.into_any()),
|
||||
"index_uuid" => Ok(self.index_uuid.clone().into_pyobject(py)?.into_any()),
|
||||
"type_url" => Ok(self.type_url.clone().into_pyobject(py)?.into_any()),
|
||||
"created_at" => Ok(self.created_at.into_pyobject(py)?.into_any()),
|
||||
"num_indexed_rows" => Ok(self.num_indexed_rows.into_pyobject(py)?.into_any()),
|
||||
"num_unindexed_rows" => Ok(self.num_unindexed_rows.into_pyobject(py)?.into_any()),
|
||||
"size_bytes" => Ok(self.size_bytes.into_pyobject(py)?.into_any()),
|
||||
"num_segments" => Ok(self.num_segments.into_pyobject(py)?.into_any()),
|
||||
"index_version" => Ok(self.index_version.into_pyobject(py)?.into_any()),
|
||||
"index_details" => Ok(self
|
||||
.index_details
|
||||
.as_ref()
|
||||
.map(|obj| obj.clone_ref(py))
|
||||
.into_pyobject(py)?
|
||||
.into_any()),
|
||||
_ => Err(PyKeyError::new_err(format!("Invalid key: {}", key))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<lancedb::index::IndexConfig> for IndexConfig {
|
||||
fn from(value: lancedb::index::IndexConfig) -> Self {
|
||||
fn parse_index_details(py: Python<'_>, s: String) -> Py<PyAny> {
|
||||
let json = py.import("json").expect("json module is always available");
|
||||
match json.call_method1("loads", (s.as_str(),)) {
|
||||
Ok(obj) => obj.into_any().unbind(),
|
||||
Err(_) => s.into_pyobject(py).unwrap().into_any().unbind(),
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexConfig {
|
||||
pub fn from_lancedb(py: Python<'_>, value: lancedb::index::IndexConfig) -> Self {
|
||||
let index_type = format!("{:?}", value.index_type);
|
||||
Self {
|
||||
index_type,
|
||||
columns: value.columns,
|
||||
name: value.name,
|
||||
index_uuid: value.index_uuid,
|
||||
type_url: value.type_url,
|
||||
created_at: value.created_at,
|
||||
num_indexed_rows: value.num_indexed_rows,
|
||||
num_unindexed_rows: value.num_unindexed_rows,
|
||||
size_bytes: value.size_bytes,
|
||||
num_segments: value.num_segments,
|
||||
index_version: value.index_version,
|
||||
index_details: value.index_details.map(|s| parse_index_details(py, s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -694,13 +694,13 @@ impl Table {
|
||||
pub fn list_indices(self_: PyRef<'_, Self>) -> PyResult<Bound<'_, PyAny>> {
|
||||
let inner = self_.inner_ref()?.clone();
|
||||
future_into_py(self_.py(), async move {
|
||||
Ok(inner
|
||||
.list_indices()
|
||||
.await
|
||||
.infer_error()?
|
||||
.into_iter()
|
||||
.map(IndexConfig::from)
|
||||
.collect::<Vec<_>>())
|
||||
let indices = inner.list_indices().await.infer_error()?;
|
||||
Python::attach(|py| {
|
||||
Ok(indices
|
||||
.into_iter()
|
||||
.map(|idx| IndexConfig::from_lancedb(py, idx))
|
||||
.collect::<Vec<_>>())
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user