mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-01-16 10:12:58 +00:00
fix: vector index metadata missing (#7575)
* fix: vector index metadata missing Signed-off-by: Dennis Zhuang <killme2008@gmail.com> * refactor: constants and test Signed-off-by: Dennis Zhuang <killme2008@gmail.com> --------- Signed-off-by: Dennis Zhuang <killme2008@gmail.com>
This commit is contained in:
@@ -18,7 +18,7 @@ use arrow_schema::extension::{EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_NAME_K
|
||||
use datatypes::schema::{
|
||||
COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer,
|
||||
FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY,
|
||||
SkippingIndexOptions, SkippingIndexType,
|
||||
SkippingIndexOptions, SkippingIndexType, VECTOR_INDEX_KEY,
|
||||
};
|
||||
use greptime_proto::v1::{
|
||||
Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType,
|
||||
@@ -35,11 +35,14 @@ const FULLTEXT_GRPC_KEY: &str = "fulltext";
|
||||
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
|
||||
/// Key used to store skip index options in gRPC column options.
|
||||
const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index";
|
||||
/// Key used to store vector index options in gRPC column options.
|
||||
const VECTOR_INDEX_GRPC_KEY: &str = "vector_index";
|
||||
|
||||
const COLUMN_OPTION_MAPPINGS: [(&str, &str); 5] = [
|
||||
const COLUMN_OPTION_MAPPINGS: [(&str, &str); 6] = [
|
||||
(FULLTEXT_GRPC_KEY, FULLTEXT_KEY),
|
||||
(INVERTED_INDEX_GRPC_KEY, INVERTED_INDEX_KEY),
|
||||
(SKIPPING_INDEX_GRPC_KEY, SKIPPING_INDEX_KEY),
|
||||
(VECTOR_INDEX_GRPC_KEY, VECTOR_INDEX_KEY),
|
||||
(EXTENSION_TYPE_NAME_KEY, EXTENSION_TYPE_NAME_KEY),
|
||||
(EXTENSION_TYPE_METADATA_KEY, EXTENSION_TYPE_METADATA_KEY),
|
||||
];
|
||||
@@ -77,6 +80,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
|
||||
if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) {
|
||||
metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.to_owned());
|
||||
}
|
||||
if let Some(vector_index) = options.options.get(VECTOR_INDEX_GRPC_KEY) {
|
||||
metadata.insert(VECTOR_INDEX_KEY.to_string(), vector_index.to_owned());
|
||||
}
|
||||
if let Some(extension_name) = options.options.get(EXTENSION_TYPE_NAME_KEY) {
|
||||
metadata.insert(EXTENSION_TYPE_NAME_KEY.to_string(), extension_name.clone());
|
||||
}
|
||||
@@ -172,6 +178,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
|
||||
.options
|
||||
.insert(SKIPPING_INDEX_GRPC_KEY.to_string(), skipping_index.clone());
|
||||
}
|
||||
if let Some(vector_index) = column_schema.metadata().get(VECTOR_INDEX_KEY) {
|
||||
options
|
||||
.options
|
||||
.insert(VECTOR_INDEX_GRPC_KEY.to_string(), vector_index.clone());
|
||||
}
|
||||
if let Some(extension_name) = column_schema.metadata().get(EXTENSION_TYPE_NAME_KEY) {
|
||||
options
|
||||
.options
|
||||
@@ -259,7 +270,10 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp
|
||||
mod tests {
|
||||
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use datatypes::schema::{FulltextAnalyzer, FulltextBackend};
|
||||
use datatypes::schema::{
|
||||
FulltextAnalyzer, FulltextBackend, VectorDistanceMetric, VectorIndexOptions,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::v1::ColumnDataType;
|
||||
@@ -283,6 +297,10 @@ mod tests {
|
||||
"{\"enable\":true}".to_string(),
|
||||
),
|
||||
(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
|
||||
(
|
||||
VECTOR_INDEX_GRPC_KEY.to_string(),
|
||||
"{\"engine\":\"usearch\",\"metric\":\"l2sq\",\"connectivity\":16,\"expansion-add\":128,\"expansion-search\":64}".to_string(),
|
||||
),
|
||||
]),
|
||||
}),
|
||||
};
|
||||
@@ -305,6 +323,8 @@ mod tests {
|
||||
}
|
||||
);
|
||||
assert!(schema.is_inverted_indexed());
|
||||
let vector_options = schema.vector_index_options().unwrap().unwrap();
|
||||
assert_eq!(vector_options.metric, VectorDistanceMetric::L2sq);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -335,6 +355,29 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vector_index_options_roundtrip() {
|
||||
let schema = ColumnSchema::new("test", ConcreteDataType::vector_datatype(4), true)
|
||||
.with_vector_index_options(&VectorIndexOptions::default())
|
||||
.unwrap();
|
||||
let column_def = try_as_column_def(&schema, false).unwrap();
|
||||
let roundtrip = try_as_column_schema(&column_def).unwrap();
|
||||
let options = roundtrip.vector_index_options().unwrap().unwrap();
|
||||
assert_eq!(options.metric, VectorDistanceMetric::L2sq);
|
||||
|
||||
let options = column_def.options.unwrap();
|
||||
let raw = options.options.get(VECTOR_INDEX_GRPC_KEY).unwrap();
|
||||
let json_value: serde_json::Value = serde_json::from_str(raw).unwrap();
|
||||
let expected = json!({
|
||||
"engine": "usearch",
|
||||
"metric": "l2sq",
|
||||
"connectivity": 16,
|
||||
"expansion-add": 128,
|
||||
"expansion-search": 64
|
||||
});
|
||||
assert_eq!(json_value, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_options_with_fulltext() {
|
||||
let fulltext = FulltextOptions::new_unchecked(
|
||||
|
||||
@@ -31,10 +31,13 @@ pub use crate::schema::column_schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, ColumnExtType, ColumnSchema, FULLTEXT_KEY,
|
||||
FulltextAnalyzer, FulltextBackend, FulltextOptions, INVERTED_INDEX_KEY, Metadata,
|
||||
SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType, TIME_INDEX_KEY, VECTOR_INDEX_KEY,
|
||||
VectorDistanceMetric, VectorIndexEngineType, VectorIndexOptions,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH, COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY,
|
||||
ColumnExtType, ColumnSchema, FULLTEXT_KEY, FulltextAnalyzer, FulltextBackend, FulltextOptions,
|
||||
INVERTED_INDEX_KEY, Metadata, SKIPPING_INDEX_KEY, SkippingIndexOptions, SkippingIndexType,
|
||||
TIME_INDEX_KEY, VECTOR_INDEX_KEY, VectorDistanceMetric, VectorIndexEngineType,
|
||||
VectorIndexOptions,
|
||||
};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
@@ -62,6 +62,13 @@ pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
|
||||
|
||||
/// Keys used in VECTOR index options
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
|
||||
|
||||
pub const DEFAULT_GRANULARITY: u32 = 10240;
|
||||
|
||||
pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
@@ -331,6 +331,13 @@ pub enum Error {
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Failed to get VECTOR index options"))]
|
||||
GetVectorIndexOptions {
|
||||
source: datatypes::error::Error,
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display(
|
||||
"Column schema mismatch in CTE {}, original: {:?}, expected: {:?}",
|
||||
cte_name,
|
||||
@@ -424,6 +431,7 @@ impl ErrorExt for Error {
|
||||
|
||||
GetFulltextOptions { source, .. }
|
||||
| GetSkippingIndexOptions { source, .. }
|
||||
| GetVectorIndexOptions { source, .. }
|
||||
| Datatypes { source, .. } => source.status_code(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,8 +23,11 @@ use datatypes::schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC, COMMENT_KEY, ColumnDefaultConstraint, ColumnSchema,
|
||||
FulltextBackend, SchemaRef,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
|
||||
@@ -40,7 +43,7 @@ use table::requests::{
|
||||
|
||||
use crate::error::{
|
||||
ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu,
|
||||
GetSkippingIndexOptionsSnafu, Result, SqlSnafu,
|
||||
GetSkippingIndexOptionsSnafu, GetVectorIndexOptionsSnafu, Result, SqlSnafu,
|
||||
};
|
||||
|
||||
/// Generates CREATE TABLE options from given table metadata and schema-level options.
|
||||
@@ -161,6 +164,35 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
|
||||
extensions.skipping_index_options = Some(map.into());
|
||||
}
|
||||
|
||||
if let Some(opt) = column_schema
|
||||
.vector_index_options()
|
||||
.context(GetVectorIndexOptionsSnafu)?
|
||||
{
|
||||
let map = HashMap::from([
|
||||
(
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE.to_string(),
|
||||
opt.engine.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC.to_string(),
|
||||
opt.metric.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY.to_string(),
|
||||
opt.connectivity.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD.to_string(),
|
||||
opt.expansion_add.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH.to_string(),
|
||||
opt.expansion_search.to_string(),
|
||||
),
|
||||
]);
|
||||
extensions.vector_index_options = Some(map.into());
|
||||
}
|
||||
|
||||
if column_schema.is_inverted_indexed() {
|
||||
extensions.inverted_index_options = Some(HashMap::new().into());
|
||||
}
|
||||
@@ -279,7 +311,9 @@ mod tests {
|
||||
|
||||
use common_time::timestamp::TimeUnit;
|
||||
use datatypes::prelude::ConcreteDataType;
|
||||
use datatypes::schema::{FulltextOptions, Schema, SchemaRef, SkippingIndexOptions};
|
||||
use datatypes::schema::{
|
||||
FulltextOptions, Schema, SchemaRef, SkippingIndexOptions, VectorIndexOptions,
|
||||
};
|
||||
use table::metadata::*;
|
||||
use table::requests::{
|
||||
FILE_TABLE_FORMAT_KEY, FILE_TABLE_LOCATION_KEY, FILE_TABLE_META_KEY, TableOptions,
|
||||
@@ -306,6 +340,9 @@ mod tests {
|
||||
..Default::default()
|
||||
})
|
||||
.unwrap(),
|
||||
ColumnSchema::new("embedding", ConcreteDataType::vector_datatype(4), true)
|
||||
.with_vector_index_options(&VectorIndexOptions::default())
|
||||
.unwrap(),
|
||||
ColumnSchema::new(
|
||||
"ts",
|
||||
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond),
|
||||
@@ -368,6 +405,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
|
||||
"cpu" DOUBLE NULL,
|
||||
"disk" FLOAT NULL,
|
||||
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'),
|
||||
"embedding" VECTOR(4) NULL VECTOR INDEX WITH(connectivity = '16', engine = 'usearch', expansion_add = '128', expansion_search = '64', metric = 'l2sq'),
|
||||
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
|
||||
TIME INDEX ("ts"),
|
||||
PRIMARY KEY ("id", "host")
|
||||
|
||||
@@ -33,6 +33,9 @@ use datatypes::schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY, COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD, COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH,
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_METRIC,
|
||||
};
|
||||
use snafu::{ResultExt, ensure};
|
||||
use sqlparser::dialect::Dialect;
|
||||
@@ -222,18 +225,6 @@ pub fn validate_column_skipping_index_create_option(key: &str) -> bool {
|
||||
.contains(&key)
|
||||
}
|
||||
|
||||
/// Valid options for VECTOR INDEX:
|
||||
/// - engine: Vector index engine (usearch)
|
||||
/// - metric: Distance metric (l2sq, cosine, inner_product)
|
||||
/// - connectivity: HNSW M parameter
|
||||
/// - expansion_add: ef_construction parameter
|
||||
/// - expansion_search: ef_search parameter
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE: &str = "engine";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_METRIC: &str = "metric";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_CONNECTIVITY: &str = "connectivity";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_ADD: &str = "expansion_add";
|
||||
pub const COLUMN_VECTOR_INDEX_OPT_KEY_EXPANSION_SEARCH: &str = "expansion_search";
|
||||
|
||||
pub fn validate_column_vector_index_create_option(key: &str) -> bool {
|
||||
[
|
||||
COLUMN_VECTOR_INDEX_OPT_KEY_ENGINE,
|
||||
|
||||
Reference in New Issue
Block a user