mirror of
https://github.com/lancedb/lancedb.git
synced 2026-06-05 13:20:39 +00:00
Compare commits
2 Commits
xuanwo/tab
...
will/index
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fe22f8cf43 | ||
|
|
a6cfbc9606 |
@@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
// SPDX-FileCopyrightText: Copyright The LanceDB Authors
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use scalar::FtsIndexBuilder;
|
||||
use serde::Deserialize;
|
||||
use serde_with::skip_serializing_none;
|
||||
@@ -364,6 +365,45 @@ pub struct IndexConfig {
|
||||
/// Currently this is always a Vec of size 1. In the future there may
|
||||
/// be more columns to represent composite indices.
|
||||
pub columns: Vec<String>,
|
||||
/// The UUID of the first segment of the index.
|
||||
///
|
||||
/// An index may be made up of multiple segments, each with their own UUID.
|
||||
/// This is the UUID of the first segment. `None` if it could not be
|
||||
/// determined (e.g. for remote tables, which do not yet surface this).
|
||||
pub index_uuid: Option<String>,
|
||||
/// The protobuf type URL, a precise type identifier for the index.
|
||||
///
|
||||
/// `None` if unavailable (e.g. for remote tables).
|
||||
pub type_url: Option<String>,
|
||||
/// When the index was created, taken as the minimum creation time across
|
||||
/// all segments.
|
||||
///
|
||||
/// `None` if unavailable, such as for indices created before creation
|
||||
/// timestamps were tracked, or for remote tables.
|
||||
pub created_at: Option<DateTime<Utc>>,
|
||||
/// The number of rows indexed, across all segments.
|
||||
///
|
||||
/// This is approximate and may include rows that have since been deleted.
|
||||
/// `None` if unavailable (e.g. for remote tables).
|
||||
pub num_indexed_rows: Option<u64>,
|
||||
/// The total size in bytes of all index files across all segments.
|
||||
///
|
||||
/// `None` if size information is unavailable, such as for indices created
|
||||
/// before file sizes were tracked, or for remote tables.
|
||||
pub size_bytes: Option<u64>,
|
||||
/// The number of segments that make up the index.
|
||||
///
|
||||
/// `None` if unavailable (e.g. for remote tables).
|
||||
pub num_segments: Option<u32>,
|
||||
/// The on-disk index format version, taken from the first segment.
|
||||
///
|
||||
/// `None` if unavailable (e.g. for remote tables).
|
||||
pub index_version: Option<i32>,
|
||||
/// Index-type-specific details, serialized as JSON.
|
||||
///
|
||||
/// The shape of this JSON varies by index type. `None` if the details
|
||||
/// could not be produced (e.g. no plugin available) or for remote tables.
|
||||
pub index_details: Option<String>,
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
|
||||
@@ -2048,6 +2048,16 @@ impl<S: HttpSend> BaseTable for RemoteTable<S> {
|
||||
name: index.index_name,
|
||||
index_type: stats.index_type,
|
||||
columns,
|
||||
// These are left None until the server response wires
|
||||
// them through. See https://github.com/lancedb/lancedb/issues/3494
|
||||
index_uuid: None,
|
||||
type_url: None,
|
||||
created_at: None,
|
||||
num_indexed_rows: None,
|
||||
size_bytes: None,
|
||||
num_segments: None,
|
||||
index_version: None,
|
||||
index_details: None,
|
||||
})),
|
||||
Ok(None) => Ok(None), // The index must have been deleted since we listed it.
|
||||
Err(e) => Err(e),
|
||||
@@ -3944,11 +3954,27 @@ mod tests {
|
||||
name: "vector_idx".into(),
|
||||
index_type: IndexType::IvfPq,
|
||||
columns: vec!["vector".into()],
|
||||
index_uuid: None,
|
||||
type_url: None,
|
||||
created_at: None,
|
||||
num_indexed_rows: None,
|
||||
size_bytes: None,
|
||||
num_segments: None,
|
||||
index_version: None,
|
||||
index_details: None,
|
||||
},
|
||||
IndexConfig {
|
||||
name: "my_idx".into(),
|
||||
index_type: IndexType::LabelList,
|
||||
columns: vec!["metadata.`my.column`".into()],
|
||||
index_uuid: None,
|
||||
type_url: None,
|
||||
created_at: None,
|
||||
num_indexed_rows: None,
|
||||
size_bytes: None,
|
||||
num_segments: None,
|
||||
index_version: None,
|
||||
index_details: None,
|
||||
},
|
||||
];
|
||||
assert_eq!(indices, expected);
|
||||
|
||||
@@ -2927,10 +2927,23 @@ impl BaseTable for NativeTable {
|
||||
columns.push(field_path);
|
||||
}
|
||||
|
||||
let segments = idx_desc.segments();
|
||||
let index_uuid = segments.first().map(|seg| seg.uuid.to_string());
|
||||
let created_at = segments.iter().filter_map(|seg| seg.created_at).min();
|
||||
let index_version = segments.first().map(|seg| seg.index_version);
|
||||
|
||||
Some(IndexConfig {
|
||||
name: idx_desc.name().to_string(),
|
||||
index_type,
|
||||
columns,
|
||||
index_uuid,
|
||||
type_url: Some(idx_desc.type_url().to_string()),
|
||||
created_at,
|
||||
num_indexed_rows: Some(idx_desc.rows_indexed()),
|
||||
size_bytes: idx_desc.total_size_bytes(),
|
||||
num_segments: Some(segments.len() as u32),
|
||||
index_version,
|
||||
index_details: idx_desc.details().ok(),
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
@@ -3394,6 +3407,14 @@ mod tests {
|
||||
let index = index_configs.into_iter().next().unwrap();
|
||||
assert_eq!(index.index_type, crate::index::IndexType::IvfPq);
|
||||
assert_eq!(index.columns, vec!["embeddings".to_string()]);
|
||||
assert!(index.index_uuid.is_some());
|
||||
assert!(index.type_url.is_some());
|
||||
assert_eq!(index.num_segments, Some(1));
|
||||
assert_eq!(index.num_indexed_rows, Some(512));
|
||||
assert!(index.created_at.is_some());
|
||||
assert!(index.size_bytes.is_some());
|
||||
assert!(index.index_version.is_some());
|
||||
assert!(index.index_details.is_some());
|
||||
assert_eq!(table.count_rows(None).await.unwrap(), 512);
|
||||
assert_eq!(table.name(), "test");
|
||||
|
||||
@@ -3744,6 +3765,16 @@ mod tests {
|
||||
assert_eq!(index.index_type, crate::index::IndexType::BTree);
|
||||
assert_eq!(index.columns, vec!["i".to_string()]);
|
||||
|
||||
// The richer metadata surfaced from describe_indices should be populated.
|
||||
assert!(index.index_uuid.is_some());
|
||||
assert!(index.type_url.is_some());
|
||||
assert_eq!(index.num_segments, Some(1));
|
||||
assert_eq!(index.num_indexed_rows, Some(1));
|
||||
assert!(index.created_at.is_some());
|
||||
assert!(index.size_bytes.is_some());
|
||||
assert!(index.index_version.is_some());
|
||||
assert!(index.index_details.is_some());
|
||||
|
||||
let indices = table.as_native().unwrap().load_indices().await.unwrap();
|
||||
let index_name = &indices[0].index_name;
|
||||
let stats = table.index_stats(index_name).await.unwrap().unwrap();
|
||||
|
||||
Reference in New Issue
Block a user