From f03abc27e31f55b03cc09dec29abb41054dff751 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Wed, 10 Jun 2026 16:14:33 -0700 Subject: [PATCH] feat: expand IndexConfig with rich per-index metadata (#3497) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `IndexConfig` (returned by `Table::list_indices`) previously exposed only `name`, `index_type`, and `columns`. Lance's `describe_indices` provides richer per-index info cheaply (reads manifest metadata, often cached), so this surfaces it. Adds these `Option` fields to `lancedb::index::IndexConfig`, populated in `NativeTable::list_indices` from the `IndexDescription`: - `index_uuid`: uuid of the first segment - `type_url`: protobuf type URL (`IndexDescription::type_url`) - `created_at`: minimum creation time across segments - `num_indexed_rows`: approximate rows indexed across segments - `num_unindexed_rows`: table row count minus `num_indexed_rows` - `size_bytes`: total size of index files across segments - `num_segments`: number of segments making up the index - `index_version`: on-disk index format version (first segment) - `index_details`: index-type-specific details as JSON This field set mirrors the lance-namespace `IndexContent` contract (lance-format/lance-namespace#348) so client and server agree on the same shape. Note these are populated **locally** via `describe_indices` — `NativeTable::list_indices` reads the dataset directly and does not depend on the namespace spec change. `RemoteTable` leaves the new fields `None` until a follow-up wires them through the server response (#3494). Bindings exposure will also be a follow up: #3495 Existing `list_indices` tests in `rust/lancedb/src/table.rs` are extended to assert the new fields. Fixes #3492 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.8 --- rust/lancedb/src/index.rs | 46 +++++++++++++ rust/lancedb/src/remote/table.rs | 113 ++++++++++++++++++------------- rust/lancedb/src/table.rs | 36 ++++++++++ 3 files changed, 148 insertions(+), 47 deletions(-) diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index 69dbaf21b..d3f237308 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors +use chrono::{DateTime, Utc}; use scalar::FtsIndexBuilder; use serde::Deserialize; use serde_with::skip_serializing_none; @@ -373,6 +374,51 @@ pub struct IndexConfig { /// Currently this is always a Vec of size 1. In the future there may /// be more columns to represent composite indices. pub columns: Vec, + /// The UUID of the first segment of the index. + /// + /// An index may be made up of multiple segments, each with their own UUID. + /// This is the UUID of the first segment. `None` if it could not be + /// determined (e.g. for remote tables, which do not yet surface this). + pub index_uuid: Option, + /// The protobuf type URL, a precise type identifier for the index. + /// + /// `None` if unavailable (e.g. for remote tables). + pub type_url: Option, + /// When the index was created, taken as the minimum creation time across + /// all segments. + /// + /// `None` if unavailable, such as for indices created before creation + /// timestamps were tracked, or for remote tables. + pub created_at: Option>, + /// The number of rows indexed, across all segments. + /// + /// This is approximate and may include rows that have since been deleted. + /// `None` if unavailable (e.g. for remote tables). + pub num_indexed_rows: Option, + /// The number of rows in the table that are not yet covered by this index. + /// + /// Computed as the table's total row count minus [`Self::num_indexed_rows`]. + /// Optimizing the index will fold these rows into it. `None` if unavailable + /// (e.g. for remote tables). + pub num_unindexed_rows: Option, + /// The total size in bytes of all index files across all segments. + /// + /// `None` if size information is unavailable, such as for indices created + /// before file sizes were tracked, or for remote tables. + pub size_bytes: Option, + /// The number of segments that make up the index. + /// + /// `None` if unavailable (e.g. for remote tables). + pub num_segments: Option, + /// The on-disk index format version, taken from the first segment. + /// + /// `None` if unavailable (e.g. for remote tables). + pub index_version: Option, + /// Index-type-specific details, serialized as JSON. + /// + /// The shape of this JSON varies by index type. `None` if the details + /// could not be produced (e.g. no plugin available) or for remote tables. + pub index_details: Option, } #[skip_serializing_none] diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index 6a7b5fe47..e399c2c4b 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -2152,6 +2152,17 @@ impl BaseTable for RemoteTable { name: index.index_name, index_type: stats.index_type, columns, + // These are left None until the server response wires + // them through. See https://github.com/lancedb/lancedb/issues/3494 + index_uuid: None, + type_url: None, + created_at: None, + num_indexed_rows: None, + num_unindexed_rows: None, + size_bytes: None, + num_segments: None, + index_version: None, + index_details: None, })), Ok(None) => Ok(None), // The index must have been deleted since we listed it. Err(e) => Err(e), @@ -4114,11 +4125,29 @@ mod tests { name: "vector_idx".into(), index_type: IndexType::IvfPq, columns: vec!["vector".into()], + index_uuid: None, + type_url: None, + created_at: None, + num_indexed_rows: None, + num_unindexed_rows: None, + size_bytes: None, + num_segments: None, + index_version: None, + index_details: None, }, IndexConfig { name: "my_idx".into(), index_type: IndexType::LabelList, columns: vec!["metadata.`my.column`".into()], + index_uuid: None, + type_url: None, + created_at: None, + num_indexed_rows: None, + num_unindexed_rows: None, + size_bytes: None, + num_segments: None, + index_version: None, + index_details: None, }, ]; assert_eq!(indices, expected); @@ -4234,53 +4263,43 @@ mod tests { }); let indices = table.list_indices().await.unwrap(); - let expected = vec![ - IndexConfig { - name: "row_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["rowId".into()], - }, - IndexConfig { - name: "row_dash_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["`row-id`".into()], - }, - IndexConfig { - name: "user_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["userId".into()], - }, - IndexConfig { - name: "mixed_case_metadata_user_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["MetaData.userId".into()], - }, - IndexConfig { - name: "metadata_user_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["metadata.user_id".into()], - }, - IndexConfig { - name: "image_embedding_idx".into(), - index_type: IndexType::IvfPq, - columns: vec!["image.embedding".into()], - }, - IndexConfig { - name: "payload_text_idx".into(), - index_type: IndexType::FTS, - columns: vec!["payload.text".into()], - }, - IndexConfig { - name: "meta_data_user_id_idx".into(), - index_type: IndexType::BTree, - columns: vec!["`meta-data`.`user-id`".into()], - }, - IndexConfig { - name: "literal_dot_idx".into(), - index_type: IndexType::BTree, - columns: vec!["literal.`a.b`".into()], - }, - ]; + // The remote path leaves the rich metadata fields None until the server + // wires them through. See https://github.com/lancedb/lancedb/issues/3494 + let expected: Vec = [ + ("row_id_idx", IndexType::BTree, "rowId"), + ("row_dash_id_idx", IndexType::BTree, "`row-id`"), + ("user_id_idx", IndexType::BTree, "userId"), + ( + "mixed_case_metadata_user_id_idx", + IndexType::BTree, + "MetaData.userId", + ), + ("metadata_user_id_idx", IndexType::BTree, "metadata.user_id"), + ("image_embedding_idx", IndexType::IvfPq, "image.embedding"), + ("payload_text_idx", IndexType::FTS, "payload.text"), + ( + "meta_data_user_id_idx", + IndexType::BTree, + "`meta-data`.`user-id`", + ), + ("literal_dot_idx", IndexType::BTree, "literal.`a.b`"), + ] + .into_iter() + .map(|(name, index_type, column)| IndexConfig { + name: name.into(), + index_type, + columns: vec![column.into()], + index_uuid: None, + type_url: None, + created_at: None, + num_indexed_rows: None, + num_unindexed_rows: None, + size_bytes: None, + num_segments: None, + index_version: None, + index_details: None, + }) + .collect(); assert_eq!(indices, expected); } diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index 397f754da..887487c34 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -3104,6 +3104,7 @@ impl BaseTable for NativeTable { async fn list_indices(&self) -> Result> { let dataset = self.dataset.get().await?; + let total_rows = dataset.count_rows(None).await? as u64; let indices = dataset .describe_indices(None) .await? @@ -3139,10 +3140,25 @@ impl BaseTable for NativeTable { columns.push(field_path); } + let segments = idx_desc.segments(); + let index_uuid = segments.first().map(|seg| seg.uuid.to_string()); + let created_at = segments.iter().filter_map(|seg| seg.created_at).min(); + let index_version = segments.first().map(|seg| seg.index_version); + let num_indexed_rows = idx_desc.rows_indexed(); + Some(IndexConfig { name: idx_desc.name().to_string(), index_type, columns, + index_uuid, + type_url: Some(idx_desc.type_url().to_string()), + created_at, + num_indexed_rows: Some(num_indexed_rows), + num_unindexed_rows: Some(total_rows.saturating_sub(num_indexed_rows)), + size_bytes: idx_desc.total_size_bytes(), + num_segments: Some(segments.len() as u32), + index_version, + index_details: idx_desc.details().ok(), }) }) .collect(); @@ -3942,6 +3958,15 @@ mod tests { let index = index_configs.into_iter().next().unwrap(); assert_eq!(index.index_type, crate::index::IndexType::IvfPq); assert_eq!(index.columns, vec!["embeddings".to_string()]); + assert!(index.index_uuid.is_some()); + assert!(index.type_url.is_some()); + assert_eq!(index.num_segments, Some(1)); + assert_eq!(index.num_indexed_rows, Some(512)); + assert_eq!(index.num_unindexed_rows, Some(0)); + assert!(index.created_at.is_some()); + assert!(index.size_bytes.is_some()); + assert!(index.index_version.is_some()); + assert!(index.index_details.is_some()); assert_eq!(table.count_rows(None).await.unwrap(), 512); assert_eq!(table.name(), "test"); @@ -4304,6 +4329,17 @@ mod tests { assert_eq!(index.index_type, crate::index::IndexType::BTree); assert_eq!(index.columns, vec!["i".to_string()]); + // The richer metadata surfaced from describe_indices should be populated. + assert!(index.index_uuid.is_some()); + assert!(index.type_url.is_some()); + assert_eq!(index.num_segments, Some(1)); + assert_eq!(index.num_indexed_rows, Some(1)); + assert_eq!(index.num_unindexed_rows, Some(0)); + assert!(index.created_at.is_some()); + assert!(index.size_bytes.is_some()); + assert!(index.index_version.is_some()); + assert!(index.index_details.is_some()); + let indices = table.as_native().unwrap().load_indices().await.unwrap(); let index_name = &indices[0].index_name; let stats = table.index_stats(index_name).await.unwrap().unwrap();