diff --git a/rust/lancedb/src/index.rs b/rust/lancedb/src/index.rs index 3a55eeedf..2ed4ba274 100644 --- a/rust/lancedb/src/index.rs +++ b/rust/lancedb/src/index.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The LanceDB Authors +use chrono::{DateTime, Utc}; use scalar::FtsIndexBuilder; use serde::Deserialize; use serde_with::skip_serializing_none; @@ -364,6 +365,37 @@ pub struct IndexConfig { /// Currently this is always a Vec of size 1. In the future there may /// be more columns to represent composite indices. pub columns: Vec, + /// The UUID of the first segment of the index. + /// + /// An index may be made up of multiple segments, each with their own UUID. + /// This is the UUID of the first segment. `None` if it could not be + /// determined (e.g. for remote tables, which do not yet surface this). + pub index_uuid: Option, + /// When the index was created, taken as the minimum creation time across + /// all segments. + /// + /// `None` if unavailable, such as for indices created before creation + /// timestamps were tracked, or for remote tables. + pub created_at: Option>, + /// The number of rows indexed, across all segments. + /// + /// This is approximate and may include rows that have since been deleted. + /// `None` if unavailable (e.g. for remote tables). + pub num_indexed_rows: Option, + /// The total size in bytes of all index files across all segments. + /// + /// `None` if size information is unavailable, such as for indices created + /// before file sizes were tracked, or for remote tables. + pub size_bytes: Option, + /// Index-type-specific details, serialized as JSON. + /// + /// The shape of this JSON varies by index type. `None` if the details + /// could not be produced (e.g. no plugin available) or for remote tables. + pub index_details: Option, + /// The number of segments that make up the index. + /// + /// `None` if unavailable (e.g. for remote tables). + pub num_segments: Option, } #[skip_serializing_none] diff --git a/rust/lancedb/src/remote/table.rs b/rust/lancedb/src/remote/table.rs index dc16b61c6..fbce83a64 100644 --- a/rust/lancedb/src/remote/table.rs +++ b/rust/lancedb/src/remote/table.rs @@ -2048,6 +2048,14 @@ impl BaseTable for RemoteTable { name: index.index_name, index_type: stats.index_type, columns, + // These are left None until the server response wires + // them through. See https://github.com/lancedb/lancedb/issues/3492 + index_uuid: None, + created_at: None, + num_indexed_rows: None, + size_bytes: None, + index_details: None, + num_segments: None, })), Ok(None) => Ok(None), // The index must have been deleted since we listed it. Err(e) => Err(e), @@ -3944,11 +3952,23 @@ mod tests { name: "vector_idx".into(), index_type: IndexType::IvfPq, columns: vec!["vector".into()], + index_uuid: None, + created_at: None, + num_indexed_rows: None, + size_bytes: None, + index_details: None, + num_segments: None, }, IndexConfig { name: "my_idx".into(), index_type: IndexType::LabelList, columns: vec!["metadata.`my.column`".into()], + index_uuid: None, + created_at: None, + num_indexed_rows: None, + size_bytes: None, + index_details: None, + num_segments: None, }, ]; assert_eq!(indices, expected); diff --git a/rust/lancedb/src/table.rs b/rust/lancedb/src/table.rs index ca34bbdf3..893581c39 100644 --- a/rust/lancedb/src/table.rs +++ b/rust/lancedb/src/table.rs @@ -2927,10 +2927,20 @@ impl BaseTable for NativeTable { columns.push(field_path); } + let segments = idx_desc.segments(); + let index_uuid = segments.first().map(|seg| seg.uuid.to_string()); + let created_at = segments.iter().filter_map(|seg| seg.created_at).min(); + Some(IndexConfig { name: idx_desc.name().to_string(), index_type, columns, + index_uuid, + created_at, + num_indexed_rows: Some(idx_desc.rows_indexed()), + size_bytes: idx_desc.total_size_bytes(), + index_details: idx_desc.details().ok(), + num_segments: Some(segments.len() as u32), }) }) .collect(); @@ -3394,6 +3404,12 @@ mod tests { let index = index_configs.into_iter().next().unwrap(); assert_eq!(index.index_type, crate::index::IndexType::IvfPq); assert_eq!(index.columns, vec!["embeddings".to_string()]); + assert!(index.index_uuid.is_some()); + assert_eq!(index.num_segments, Some(1)); + assert_eq!(index.num_indexed_rows, Some(512)); + assert!(index.created_at.is_some()); + assert!(index.size_bytes.is_some()); + assert!(index.index_details.is_some()); assert_eq!(table.count_rows(None).await.unwrap(), 512); assert_eq!(table.name(), "test"); @@ -3744,6 +3760,14 @@ mod tests { assert_eq!(index.index_type, crate::index::IndexType::BTree); assert_eq!(index.columns, vec!["i".to_string()]); + // The richer metadata surfaced from describe_indices should be populated. + assert!(index.index_uuid.is_some()); + assert_eq!(index.num_segments, Some(1)); + assert_eq!(index.num_indexed_rows, Some(1)); + assert!(index.created_at.is_some()); + assert!(index.size_bytes.is_some()); + assert!(index.index_details.is_some()); + let indices = table.as_native().unwrap().load_indices().await.unwrap(); let index_name = &indices[0].index_name; let stats = table.index_stats(index_name).await.unwrap().unwrap();