From 593befbc0f130acc77f268df3c8fd9a59ffc02af Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Fri, 16 Jan 2026 11:45:42 +0800 Subject: [PATCH] fix: vector index metadata missing (#7575) * fix: vector index metadata missing Signed-off-by: Dennis Zhuang * refactor: constants and test Signed-off-by: Dennis Zhuang --------- Signed-off-by: Dennis Zhuang --- src/api/src/v1/column_def.rs | 49 +++++++++++++++++++++-- src/datatypes/src/schema.rs | 11 +++-- src/datatypes/src/schema/column_schema.rs | 7 ++++ src/query/src/error.rs | 8 ++++ src/query/src/sql/show_create_table.rs | 46 +++++++++++++++++++-- src/sql/src/parsers/utils.rs | 15 ++----- 6 files changed, 113 insertions(+), 23 deletions(-) diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index b02b06feaf..9b39d0f329 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -18,7 +18,7 @@ use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_K use datatypes::schema::{ COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY, - SkippingIndexOptions, SkippingIndexType, + SkippingIndexOptions, SkippingIndexType, VECTOR_INDEX_KEY, }; use greptime_proto::v1::{ Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType, @@ -35,11 +35,14 @@ const FULLTEXT_GRPC_KEY: &str = "fulltext"; const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; /// Key used to store skip index options in gRPC column options. const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index"; +/// Key used to store vector index options in gRPC column options. +const VECTOR_INDEX_GRPC_KEY: &str = "vector_index"; -const COLUMN_OPTION_MAPPINGS: [(&str, &str); 5] = [ +const COLUMN_OPTION_MAPPINGS: [(&str, &str); 6] = [ (FULLTEXT_GRPC_KEY, FULLTEXT_KEY), (INVERTED_INDEX_GRPC_KEY, INVERTED_INDEX_KEY), (SKIPPING_INDEX_GRPC_KEY, SKIPPING_INDEX_KEY), + (VECTOR_INDEX_GRPC_KEY, VECTOR_INDEX_KEY), (EXTENSION_TYPE_NAME_KEY, EXTENSION_TYPE_NAME_KEY), (EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_METADATA_KEY), ]; @@ -77,6 +80,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) { metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned()); } + if let Some(vector_index) = options.options.get(VECTOR_INDEX_GRPC_KEY) { + metadata.insert(VECTOR_INDEX_KEY.to_string(), vector_index.to_owned()); + } if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) { metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone()); } @@ -172,6 +178,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option Skipp mod tests { use datatypes::data_type::ConcreteDataType; - use datatypes::schema::{FulltextAnalyzer, FulltextBackend}; + use datatypes::schema::{ + FulltextAnalyzer, FulltextBackend, VectorDistanceMetric, VectorIndexOptions, + }; + use serde_json::json; use super::*; use crate::v1::ColumnDataType; @@ -283,6 +297,10 @@ mod tests { "{\"enable\":true}".to_string(), ), (INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()), + ( + VECTOR_INDEX_GRPC_KEY.to_string(), + "{\"engine\":\"usearch\",\"metric\":\"l2sq\",\"connectivity\":16,\"expansion-add\":128,\"expansion-search\":64}".to_string(), + ), ]), }), }; @@ -305,6 +323,8 @@ mod tests { } ); assert!(schema.is_inverted_indexed()); + let vector_options = schema.vector_index_options().unwrap().unwrap(); + assert_eq!(vector_options.metric, VectorDistanceMetric::L2sq); } #[test] @@ -335,6 +355,29 @@ mod tests { ); } + #[test] + fn test_vector_index_options_roundtrip() { + let schema = ColumnSchema::new("test", ConcreteDataType::vector_datatype(4), true) + .with_vector_index_options(&VectorIndexOptions::default()) + .unwrap(); + let column_def = try_as_column_def(&schema, false).unwrap(); + let roundtrip = try_as_column_schema(&column_def).unwrap(); + let options = roundtrip.vector_index_options().unwrap().unwrap(); + assert_eq!(options.metric, VectorDistanceMetric::L2sq); + + let options = column_def.options.unwrap(); + let raw = options.options.get(VECTOR_INDEX_GRPC_KEY).unwrap(); + let json_value: serde_json::Value = serde_json::from_str(raw).unwrap(); + let expected = json!({ + "engine": "usearch", + "metric": "l2sq", + "connectivity": 16, + "expansion-add": 128, + "expansion-search": 64 + }); + assert_eq!(json_value, expected); + } + #[test] fn test_options_with_fulltext() { let fulltext = FulltextOptions::new_unchecked( diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index b5451617f8..46305d0717 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -31,10 +31,13 @@ pub use crate::schema::column_schema::{ COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, - COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY, - FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, - SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY, VECTOR_INDEX_KEY, - VectorDistanceMetric, VectorIndexEngineType, VectorIndexOptions, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, + COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, + COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH, COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY, + ColumnExtType, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer, FulltextBackend, FulltextOptions, + INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, + TIME_INDEX_KEY, VECTOR_INDEX_KEY, VectorDistanceMetric, VectorIndexEngineType, + VectorIndexOptions, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index 38cdd7bb06..183cf05da8 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -62,6 +62,13 @@ pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate"; pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; +/// Keys used in VECTOR index options +pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine"; +pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric"; +pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity"; +pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add"; +pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search"; + pub const DEFAULT_GRANULARITY: u32 = 10240; pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01; diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 3f8f9332c8..f863a26c4a 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -331,6 +331,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to get VECTOR index options"))] + GetVectorIndexOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, + #[snafu(display( "Column schema mismatch in CTE {}, original: {:?}, expected: {:?}", cte_name, @@ -424,6 +431,7 @@ impl ErrorExt for Error { GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } + | GetVectorIndexOptions { source, .. } | Datatypes { source, .. } => source.status_code(), } } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index 95e8ae175b..ee3049c9f7 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -23,8 +23,11 @@ use datatypes::schema::{ COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, - COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, - ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE, + COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH, + COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, + FulltextBackend, SchemaRef, }; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -40,7 +43,7 @@ use table::requests::{ use crate::error::{ ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, - GetSkippingIndexOptionsSnafu, Result, SqlSnafu, + GetSkippingIndexOptionsSnafu, GetVectorIndexOptionsSnafu, Result, SqlSnafu, }; /// Generates CREATE TABLE options from given table metadata and schema-level options. @@ -161,6 +164,35 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result bool { .contains(&key) } -/// Valid options for VECTOR INDEX: -/// - engine: Vector index engine (usearch) -/// - metric: Distance metric (l2sq, cosine, inner_product) -/// - connectivity: HNSW M parameter -/// - expansion_add: ef_construction parameter -/// - expansion_search: ef_search parameter -pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine"; -pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric"; -pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity"; -pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add"; -pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search"; - pub fn validate_column_vector_index_create_option(key: &str) -> bool { [ COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,