feat: add granularity and false_positive_rate options for indexes (#6416)

* feat: add `granularity` and `false_positive_rate` options for indexes

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* address comments

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* upgrade proto

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2025-07-02 15:33:39 +08:00
committed by GitHub
parent 385f12a62e
commit 599f289f59
40 changed files with 964 additions and 411 deletions

View File

@@ -18,8 +18,10 @@ use std::collections::HashMap;
use common_meta::SchemaOptions;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef,
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
};
use snafu::ResultExt;
@@ -104,7 +106,7 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
.context(GetFulltextOptionsSnafu)?
&& opt.enable
{
let map = HashMap::from([
let mut map = HashMap::from([
(
COLUMN_FULLTEXT_OPT_KEY_ANALYZER.to_string(),
opt.analyzer.to_string(),
@@ -118,6 +120,16 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
opt.backend.to_string(),
),
]);
if opt.backend == FulltextBackend::Bloom {
map.insert(
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY.to_string(),
opt.granularity.to_string(),
);
map.insert(
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE.to_string(),
opt.false_positive_rate().to_string(),
);
}
extensions.fulltext_index_options = Some(map.into());
}
@@ -130,6 +142,10 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY.to_string(),
opt.granularity.to_string(),
),
(
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE.to_string(),
opt.false_positive_rate().to_string(),
),
(
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE.to_string(),
opt.index_type.to_string(),
@@ -327,11 +343,11 @@ mod tests {
assert_eq!(
r#"
CREATE TABLE IF NOT EXISTS "system_metrics" (
"id" INT UNSIGNED NULL SKIPPING INDEX WITH(granularity = '4096', type = 'BLOOM'),
"id" INT UNSIGNED NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '4096', type = 'BLOOM'),
"host" STRING NULL INVERTED INDEX,
"cpu" DOUBLE NULL,
"disk" FLOAT NULL,
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'),
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
TIME INDEX ("ts"),
PRIMARY KEY ("id", "host")