feat: expand IndexConfig with rich per-index metadata (#3497)

`IndexConfig` (returned by `Table::list_indices`) previously exposed
only `name`, `index_type`, and `columns`. Lance's `describe_indices`
provides richer per-index info cheaply (reads manifest metadata, often
cached), so this surfaces it.

Adds these `Option<T>` fields to `lancedb::index::IndexConfig`,
populated in `NativeTable::list_indices` from the `IndexDescription`:

- `index_uuid`: uuid of the first segment
- `type_url`: protobuf type URL (`IndexDescription::type_url`)
- `created_at`: minimum creation time across segments
- `num_indexed_rows`: approximate rows indexed across segments
- `num_unindexed_rows`: table row count minus `num_indexed_rows`
- `size_bytes`: total size of index files across segments
- `num_segments`: number of segments making up the index
- `index_version`: on-disk index format version (first segment)
- `index_details`: index-type-specific details as JSON

This field set mirrors the lance-namespace `IndexContent` contract
(lance-format/lance-namespace#348) so client and server agree on the
same shape. Note these are populated **locally** via `describe_indices`
— `NativeTable::list_indices` reads the dataset directly and does not
depend on the namespace spec change.

`RemoteTable` leaves the new fields `None` until a follow-up wires them
through the server response (#3494). Bindings exposure will also be a
follow up: #3495

Existing `list_indices` tests in `rust/lancedb/src/table.rs` are
extended to assert the new fields.

Fixes #3492

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Will Jones
2026-06-10 16:14:33 -07:00
committed by GitHub
parent 85d9c1ce63
commit f03abc27e3
3 changed files with 148 additions and 47 deletions

View File

@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
use chrono::{DateTime, Utc};
use scalar::FtsIndexBuilder;
use serde::Deserialize;
use serde_with::skip_serializing_none;
@@ -373,6 +374,51 @@ pub struct IndexConfig {
/// Currently this is always a Vec of size 1. In the future there may
/// be more columns to represent composite indices.
pub columns: Vec<String>,
/// The UUID of the first segment of the index.
///
/// An index may be made up of multiple segments, each with their own UUID.
/// This is the UUID of the first segment. `None` if it could not be
/// determined (e.g. for remote tables, which do not yet surface this).
pub index_uuid: Option<String>,
/// The protobuf type URL, a precise type identifier for the index.
///
/// `None` if unavailable (e.g. for remote tables).
pub type_url: Option<String>,
/// When the index was created, taken as the minimum creation time across
/// all segments.
///
/// `None` if unavailable, such as for indices created before creation
/// timestamps were tracked, or for remote tables.
pub created_at: Option<DateTime<Utc>>,
/// The number of rows indexed, across all segments.
///
/// This is approximate and may include rows that have since been deleted.
/// `None` if unavailable (e.g. for remote tables).
pub num_indexed_rows: Option<u64>,
/// The number of rows in the table that are not yet covered by this index.
///
/// Computed as the table's total row count minus [`Self::num_indexed_rows`].
/// Optimizing the index will fold these rows into it. `None` if unavailable
/// (e.g. for remote tables).
pub num_unindexed_rows: Option<u64>,
/// The total size in bytes of all index files across all segments.
///
/// `None` if size information is unavailable, such as for indices created
/// before file sizes were tracked, or for remote tables.
pub size_bytes: Option<u64>,
/// The number of segments that make up the index.
///
/// `None` if unavailable (e.g. for remote tables).
pub num_segments: Option<u32>,
/// The on-disk index format version, taken from the first segment.
///
/// `None` if unavailable (e.g. for remote tables).
pub index_version: Option<i32>,
/// Index-type-specific details, serialized as JSON.
///
/// The shape of this JSON varies by index type. `None` if the details
/// could not be produced (e.g. no plugin available) or for remote tables.
pub index_details: Option<String>,
}
#[skip_serializing_none]

View File

@@ -2152,6 +2152,17 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
name: index.index_name,
index_type: stats.index_type,
columns,
// These are left None until the server response wires
// them through. See https://github.com/lancedb/lancedb/issues/3494
index_uuid: None,
type_url: None,
created_at: None,
num_indexed_rows: None,
num_unindexed_rows: None,
size_bytes: None,
num_segments: None,
index_version: None,
index_details: None,
})),
Ok(None) => Ok(None), // The index must have been deleted since we listed it.
Err(e) => Err(e),
@@ -4114,11 +4125,29 @@ mod tests {
name: "vector_idx".into(),
index_type: IndexType::IvfPq,
columns: vec!["vector".into()],
index_uuid: None,
type_url: None,
created_at: None,
num_indexed_rows: None,
num_unindexed_rows: None,
size_bytes: None,
num_segments: None,
index_version: None,
index_details: None,
},
IndexConfig {
name: "my_idx".into(),
index_type: IndexType::LabelList,
columns: vec!["metadata.`my.column`".into()],
index_uuid: None,
type_url: None,
created_at: None,
num_indexed_rows: None,
num_unindexed_rows: None,
size_bytes: None,
num_segments: None,
index_version: None,
index_details: None,
},
];
assert_eq!(indices, expected);
@@ -4234,53 +4263,43 @@ mod tests {
});
let indices = table.list_indices().await.unwrap();
let expected = vec![
IndexConfig {
name: "row_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["rowId".into()],
},
IndexConfig {
name: "row_dash_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["`row-id`".into()],
},
IndexConfig {
name: "user_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["userId".into()],
},
IndexConfig {
name: "mixed_case_metadata_user_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["MetaData.userId".into()],
},
IndexConfig {
name: "metadata_user_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["metadata.user_id".into()],
},
IndexConfig {
name: "image_embedding_idx".into(),
index_type: IndexType::IvfPq,
columns: vec!["image.embedding".into()],
},
IndexConfig {
name: "payload_text_idx".into(),
index_type: IndexType::FTS,
columns: vec!["payload.text".into()],
},
IndexConfig {
name: "meta_data_user_id_idx".into(),
index_type: IndexType::BTree,
columns: vec!["`meta-data`.`user-id`".into()],
},
IndexConfig {
name: "literal_dot_idx".into(),
index_type: IndexType::BTree,
columns: vec!["literal.`a.b`".into()],
},
];
// The remote path leaves the rich metadata fields None until the server
// wires them through. See https://github.com/lancedb/lancedb/issues/3494
let expected: Vec<IndexConfig> = [
("row_id_idx", IndexType::BTree, "rowId"),
("row_dash_id_idx", IndexType::BTree, "`row-id`"),
("user_id_idx", IndexType::BTree, "userId"),
(
"mixed_case_metadata_user_id_idx",
IndexType::BTree,
"MetaData.userId",
),
("metadata_user_id_idx", IndexType::BTree, "metadata.user_id"),
("image_embedding_idx", IndexType::IvfPq, "image.embedding"),
("payload_text_idx", IndexType::FTS, "payload.text"),
(
"meta_data_user_id_idx",
IndexType::BTree,
"`meta-data`.`user-id`",
),
("literal_dot_idx", IndexType::BTree, "literal.`a.b`"),
]
.into_iter()
.map(|(name, index_type, column)| IndexConfig {
name: name.into(),
index_type,
columns: vec![column.into()],
index_uuid: None,
type_url: None,
created_at: None,
num_indexed_rows: None,
num_unindexed_rows: None,
size_bytes: None,
num_segments: None,
index_version: None,
index_details: None,
})
.collect();
assert_eq!(indices, expected);
}

View File

@@ -3104,6 +3104,7 @@ impl BaseTable for NativeTable {
async fn list_indices(&self) -> Result<Vec<IndexConfig>> {
let dataset = self.dataset.get().await?;
let total_rows = dataset.count_rows(None).await? as u64;
let indices = dataset
.describe_indices(None)
.await?
@@ -3139,10 +3140,25 @@ impl BaseTable for NativeTable {
columns.push(field_path);
}
let segments = idx_desc.segments();
let index_uuid = segments.first().map(|seg| seg.uuid.to_string());
let created_at = segments.iter().filter_map(|seg| seg.created_at).min();
let index_version = segments.first().map(|seg| seg.index_version);
let num_indexed_rows = idx_desc.rows_indexed();
Some(IndexConfig {
name: idx_desc.name().to_string(),
index_type,
columns,
index_uuid,
type_url: Some(idx_desc.type_url().to_string()),
created_at,
num_indexed_rows: Some(num_indexed_rows),
num_unindexed_rows: Some(total_rows.saturating_sub(num_indexed_rows)),
size_bytes: idx_desc.total_size_bytes(),
num_segments: Some(segments.len() as u32),
index_version,
index_details: idx_desc.details().ok(),
})
})
.collect();
@@ -3942,6 +3958,15 @@ mod tests {
let index = index_configs.into_iter().next().unwrap();
assert_eq!(index.index_type, crate::index::IndexType::IvfPq);
assert_eq!(index.columns, vec!["embeddings".to_string()]);
assert!(index.index_uuid.is_some());
assert!(index.type_url.is_some());
assert_eq!(index.num_segments, Some(1));
assert_eq!(index.num_indexed_rows, Some(512));
assert_eq!(index.num_unindexed_rows, Some(0));
assert!(index.created_at.is_some());
assert!(index.size_bytes.is_some());
assert!(index.index_version.is_some());
assert!(index.index_details.is_some());
assert_eq!(table.count_rows(None).await.unwrap(), 512);
assert_eq!(table.name(), "test");
@@ -4304,6 +4329,17 @@ mod tests {
assert_eq!(index.index_type, crate::index::IndexType::BTree);
assert_eq!(index.columns, vec!["i".to_string()]);
// The richer metadata surfaced from describe_indices should be populated.
assert!(index.index_uuid.is_some());
assert!(index.type_url.is_some());
assert_eq!(index.num_segments, Some(1));
assert_eq!(index.num_indexed_rows, Some(1));
assert_eq!(index.num_unindexed_rows, Some(0));
assert!(index.created_at.is_some());
assert!(index.size_bytes.is_some());
assert!(index.index_version.is_some());
assert!(index.index_details.is_some());
let indices = table.as_native().unwrap().load_indices().await.unwrap();
let index_name = &indices[0].index_name;
let stats = table.index_stats(index_name).await.unwrap().unwrap();