fix: vector index metadata missing (#7575)

* fix: vector index metadata missing

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

* refactor: constants and test

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>

---------

Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
dennis zhuang
2026-01-16 11:45:42 +08:00
committed by GitHub
parent 007e6cc860
commit 593befbc0f
6 changed files with 113 additions and 23 deletions

View File

@@ -18,7 +18,7 @@ use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_K
use datatypes::schema::{
COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY,
SkippingIndexOptions, SkippingIndexType,
SkippingIndexOptions, SkippingIndexType, VECTOR_INDEX_KEY,
};
use greptime_proto::v1::{
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
@@ -35,11 +35,14 @@ const FULLTEXT_GRPC_KEY: &str = "fulltext";
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
/// Key used to store skip index options in gRPC column options.
const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
/// Key used to store vector index options in gRPC column options.
const VECTOR_INDEX_GRPC_KEY: &str = "vector_index";
const COLUMN_OPTION_MAPPINGS: [(&str, &str); 5] = [
const COLUMN_OPTION_MAPPINGS: [(&str, &str); 6] = [
(FULLTEXT_GRPC_KEY, FULLTEXT_KEY),
(INVERTED_INDEX_GRPC_KEY, INVERTED_INDEX_KEY),
(SKIPPING_INDEX_GRPC_KEY, SKIPPING_INDEX_KEY),
(VECTOR_INDEX_GRPC_KEY, VECTOR_INDEX_KEY),
(EXTENSION_TYPE_NAME_KEY, EXTENSION_TYPE_NAME_KEY),
(EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_METADATA_KEY),
];
@@ -77,6 +80,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
}
if let Some(vector_index) = options.options.get(VECTOR_INDEX_GRPC_KEY) {
metadata.insert(VECTOR_INDEX_KEY.to_string(), vector_index.to_owned());
}
if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) {
metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
}
@@ -172,6 +178,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
.options
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
}
if let Some(vector_index) = column_schema.metadata().get(VECTOR_INDEX_KEY) {
options
.options
.insert(VECTOR_INDEX_GRPC_KEY.to_string(), vector_index.clone());
}
if let Some(extension_name) = column_schema.metadata().get(EXTENSION_TYPE_NAME_KEY) {
options
.options
@@ -259,7 +270,10 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp
mod tests {
use datatypes::data_type::ConcreteDataType;
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
use datatypes::schema::{
FulltextAnalyzer, FulltextBackend, VectorDistanceMetric, VectorIndexOptions,
};
use serde_json::json;
use super::*;
use crate::v1::ColumnDataType;
@@ -283,6 +297,10 @@ mod tests {
"{\"enable\":true}".to_string(),
),
(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
(
VECTOR_INDEX_GRPC_KEY.to_string(),
"{\"engine\":\"usearch\",\"metric\":\"l2sq\",\"connectivity\":16,\"expansion-add\":128,\"expansion-search\":64}".to_string(),
),
]),
}),
};
@@ -305,6 +323,8 @@ mod tests {
}
);
assert!(schema.is_inverted_indexed());
let vector_options = schema.vector_index_options().unwrap().unwrap();
assert_eq!(vector_options.metric, VectorDistanceMetric::L2sq);
}
#[test]
@@ -335,6 +355,29 @@ mod tests {
);
}
#[test]
fn test_vector_index_options_roundtrip() {
let schema = ColumnSchema::new("test", ConcreteDataType::vector_datatype(4), true)
.with_vector_index_options(&VectorIndexOptions::default())
.unwrap();
let column_def = try_as_column_def(&schema, false).unwrap();
let roundtrip = try_as_column_schema(&column_def).unwrap();
let options = roundtrip.vector_index_options().unwrap().unwrap();
assert_eq!(options.metric, VectorDistanceMetric::L2sq);
let options = column_def.options.unwrap();
let raw = options.options.get(VECTOR_INDEX_GRPC_KEY).unwrap();
let json_value: serde_json::Value = serde_json::from_str(raw).unwrap();
let expected = json!({
"engine": "usearch",
"metric": "l2sq",
"connectivity": 16,
"expansion-add": 128,
"expansion-search": 64
});
assert_eq!(json_value, expected);
}
#[test]
fn test_options_with_fulltext() {
let fulltext = FulltextOptions::new_unchecked(

View File

@@ -31,10 +31,13 @@ pub use crate::schema::column_schema::{
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY,
FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata,
SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY, VECTOR_INDEX_KEY,
VectorDistanceMetric, VectorIndexEngineType, VectorIndexOptions,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY,
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD,
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH, COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY,
ColumnExtType, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer, FulltextBackend, FulltextOptions,
INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType,
TIME_INDEX_KEY, VECTOR_INDEX_KEY, VectorDistanceMetric, VectorIndexEngineType,
VectorIndexOptions,
};
pub use crate::schema::constraint::ColumnDefaultConstraint;
pub use crate::schema::raw::RawSchema;

View File

@@ -62,6 +62,13 @@ pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
/// Keys used in VECTOR index options
pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
pub const DEFAULT_GRANULARITY: u32 = 10240;
pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;

View File

@@ -331,6 +331,13 @@ pub enum Error {
location: Location,
},
#[snafu(display("Failed to get VECTOR index options"))]
GetVectorIndexOptions {
source: datatypes::error::Error,
#[snafu(implicit)]
location: Location,
},
#[snafu(display(
"Column schema mismatch in CTE {}, original: {:?}, expected: {:?}",
cte_name,
@@ -424,6 +431,7 @@ impl ErrorExt for Error {
GetFulltextOptions { source, .. }
| GetSkippingIndexOptions { source, .. }
| GetVectorIndexOptions { source, .. }
| Datatypes { source, .. } => source.status_code(),
}
}

View File

@@ -23,8 +23,11 @@ use datatypes::schema::{
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef,
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH,
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema,
FulltextBackend, SchemaRef,
};
use snafu::ResultExt;
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
@@ -40,7 +43,7 @@ use table::requests::{
use crate::error::{
ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu,
GetSkippingIndexOptionsSnafu, Result, SqlSnafu,
GetSkippingIndexOptionsSnafu, GetVectorIndexOptionsSnafu, Result, SqlSnafu,
};
/// Generates CREATE TABLE options from given table metadata and schema-level options.
@@ -161,6 +164,35 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
extensions.skipping_index_options = Some(map.into());
}
if let Some(opt) = column_schema
.vector_index_options()
.context(GetVectorIndexOptionsSnafu)?
{
let map = HashMap::from([
(
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE.to_string(),
opt.engine.to_string(),
),
(
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC.to_string(),
opt.metric.to_string(),
),
(
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY.to_string(),
opt.connectivity.to_string(),
),
(
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD.to_string(),
opt.expansion_add.to_string(),
),
(
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH.to_string(),
opt.expansion_search.to_string(),
),
]);
extensions.vector_index_options = Some(map.into());
}
if column_schema.is_inverted_indexed() {
extensions.inverted_index_options = Some(HashMap::new().into());
}
@@ -279,7 +311,9 @@ mod tests {
use common_time::timestamp::TimeUnit;
use datatypes::prelude::ConcreteDataType;
use datatypes::schema::{FulltextOptions, Schema, SchemaRef, SkippingIndexOptions};
use datatypes::schema::{
FulltextOptions, Schema, SchemaRef, SkippingIndexOptions, VectorIndexOptions,
};
use table::metadata::*;
use table::requests::{
FILE_TABLE_FORMAT_KEY, FILE_TABLE_LOCATION_KEY, FILE_TABLE_META_KEY, TableOptions,
@@ -306,6 +340,9 @@ mod tests {
..Default::default()
})
.unwrap(),
ColumnSchema::new("embedding", ConcreteDataType::vector_datatype(4), true)
.with_vector_index_options(&VectorIndexOptions::default())
.unwrap(),
ColumnSchema::new(
"ts",
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond),
@@ -368,6 +405,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
"cpu" DOUBLE NULL,
"disk" FLOAT NULL,
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'),
"embedding" VECTOR(4) NULL VECTOR INDEX WITH(connectivity = '16', engine = 'usearch', expansion_add = '128', expansion_search = '64', metric = 'l2sq'),
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
TIME INDEX ("ts"),
PRIMARY KEY ("id", "host")

View File

@@ -33,6 +33,9 @@ use datatypes::schema::{
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH,
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC,
};
use snafu::{ResultExt, ensure};
use sqlparser::dialect::Dialect;
@@ -222,18 +225,6 @@ pub fn validate_column_skipping_index_create_option(key: &str) -> bool {
.contains(&key)
}
/// Valid options for VECTOR INDEX:
/// - engine: Vector index engine (usearch)
/// - metric: Distance metric (l2sq, cosine, inner_product)
/// - connectivity: HNSW M parameter
/// - expansion_add: ef_construction parameter
/// - expansion_search: ef_search parameter
pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
pub fn validate_column_vector_index_create_option(key: &str) -> bool {
[
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,