feat: support altering fulltext backend (#5896)

* feat: add `greptime_index_type` to `information_schema.key_column_usage`

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: show create

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2025-04-15 14:36:06 +08:00
committed by GitHub
parent 6a50d71920
commit 8d485e9be0
14 changed files with 294 additions and 191 deletions

View File

@@ -24,7 +24,7 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef};
use datatypes::value::Value;
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
use futures_util::TryStreamExt;
@@ -47,20 +47,38 @@ pub const TABLE_SCHEMA: &str = "table_schema";
pub const TABLE_NAME: &str = "table_name";
pub const COLUMN_NAME: &str = "column_name";
pub const ORDINAL_POSITION: &str = "ordinal_position";
/// The type of the index.
pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
const INIT_CAPACITY: usize = 42;
/// Primary key constraint name
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
/// Time index constraint name
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
/// Primary key constraint name
pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY";
/// Primary key index type
pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1";
/// Inverted index constraint name
pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX";
/// Inverted index type
pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
/// Fulltext index constraint name
pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX";
/// Fulltext index v1 type
pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
/// Fulltext index bloom type
pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
/// Skipping index constraint name
pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX";
/// Skipping index type
pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
///
/// Provides an extra column `greptime_index_type` for the index type of the key column.
#[derive(Debug)]
pub(super) struct InformationSchemaKeyColumnUsage {
schema: SchemaRef,
@@ -120,6 +138,11 @@ impl InformationSchemaKeyColumnUsage {
ConcreteDataType::string_datatype(),
true,
),
ColumnSchema::new(
GREPTIME_INDEX_TYPE,
ConcreteDataType::string_datatype(),
true,
),
]))
}
@@ -184,6 +207,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
column_name: StringVectorBuilder,
ordinal_position: UInt32VectorBuilder,
position_in_unique_constraint: UInt32VectorBuilder,
greptime_index_type: StringVectorBuilder,
}
impl InformationSchemaKeyColumnUsageBuilder {
@@ -206,6 +230,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
}
}
@@ -229,34 +254,47 @@ impl InformationSchemaKeyColumnUsageBuilder {
for (idx, column) in schema.column_schemas().iter().enumerate() {
let mut constraints = vec![];
let mut greptime_index_type = vec![];
if column.is_time_index() {
self.add_key_column_usage(
&predicates,
&schema_name,
TIME_INDEX_CONSTRAINT_NAME,
CONSTRAINT_NAME_TIME_INDEX,
&catalog_name,
&schema_name,
table_name,
&column.name,
1, //always 1 for time index
"",
);
}
// TODO(dimbtp): foreign key constraint not supported yet
if keys.contains(&idx) {
constraints.push(PRI_CONSTRAINT_NAME);
constraints.push(CONSTRAINT_NAME_PRI);
greptime_index_type.push(INDEX_TYPE_PRI);
}
if column.is_inverted_indexed() {
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
constraints.push(CONSTRAINT_NAME_INVERTED_INDEX);
greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
}
if column.is_fulltext_indexed() {
constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
if let Ok(Some(options)) = column.fulltext_options() {
if options.enable {
constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
let index_type = match options.backend {
FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
};
greptime_index_type.push(index_type);
}
}
if column.is_skipping_indexed() {
constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX);
greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
}
if !constraints.is_empty() {
let aggregated_constraints = constraints.join(", ");
let aggregated_index_types = greptime_index_type.join(", ");
self.add_key_column_usage(
&predicates,
&schema_name,
@@ -266,6 +304,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
table_name,
&column.name,
idx as u32 + 1,
&aggregated_index_types,
);
}
}
@@ -288,6 +327,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
table_name: &str,
column_name: &str,
ordinal_position: u32,
index_types: &str,
) {
let row = [
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
@@ -297,6 +337,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
(TABLE_NAME, &Value::from(table_name)),
(COLUMN_NAME, &Value::from(column_name)),
(ORDINAL_POSITION, &Value::from(ordinal_position)),
(GREPTIME_INDEX_TYPE, &Value::from(index_types)),
];
if !predicates.eval(&row) {
@@ -313,6 +354,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
self.column_name.push(Some(column_name));
self.ordinal_position.push(Some(ordinal_position));
self.position_in_unique_constraint.push(None);
self.greptime_index_type.push(Some(index_types));
}
fn finish(&mut self) -> Result<RecordBatch> {
@@ -336,6 +378,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
null_string_vector.clone(),
null_string_vector.clone(),
null_string_vector,
Arc::new(self.greptime_index_type.finish()),
];
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
}

View File

@@ -36,7 +36,7 @@ use crate::error::{
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
};
use crate::information_schema::key_column_usage::{
PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX,
};
use crate::information_schema::Predicates;
use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
self.add_table_constraint(
&predicates,
&schema_name,
TIME_INDEX_CONSTRAINT_NAME,
CONSTRAINT_NAME_TIME_INDEX,
&schema_name,
&table.table_info().name,
TIME_INDEX_CONSTRAINT_TYPE,
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
self.add_table_constraint(
&predicates,
&schema_name,
PRI_CONSTRAINT_NAME,
CONSTRAINT_NAME_PRI,
&schema_name,
&table.table_info().name,
PRI_KEY_CONSTRAINT_TYPE,

View File

@@ -537,8 +537,8 @@ impl fmt::Display for FulltextOptions {
#[serde(rename_all = "kebab-case")]
pub enum FulltextBackend {
#[default]
Bloom,
Tantivy,
Bloom, // TODO(zhongzc): when bloom is ready, use it as default
}
impl fmt::Display for FulltextBackend {

View File

@@ -40,7 +40,7 @@ use common_recordbatch::RecordBatches;
use common_time::timezone::get_timezone;
use common_time::Timestamp;
use datafusion::common::ScalarValue;
use datafusion::prelude::{concat_ws, SessionContext};
use datafusion::prelude::SessionContext;
use datafusion_expr::expr::WildcardOptions;
use datafusion_expr::{case, col, lit, Expr, SortExpr};
use datatypes::prelude::*;
@@ -399,23 +399,6 @@ pub async fn show_index(
query_ctx.current_schema()
};
let primary_key_expr = case(col("constraint_name").like(lit("%PRIMARY%")))
.when(lit(true), lit("greptime-primary-key-v1"))
.otherwise(null())
.context(error::PlanSqlSnafu)?;
let inverted_index_expr = case(col("constraint_name").like(lit("%INVERTED INDEX%")))
.when(lit(true), lit("greptime-inverted-index-v1"))
.otherwise(null())
.context(error::PlanSqlSnafu)?;
let fulltext_index_expr = case(col("constraint_name").like(lit("%FULLTEXT INDEX%")))
.when(lit(true), lit("greptime-fulltext-index-v1"))
.otherwise(null())
.context(error::PlanSqlSnafu)?;
let skipping_index_expr = case(col("constraint_name").like(lit("%SKIPPING INDEX%")))
.when(lit(true), lit("greptime-bloom-filter-v1"))
.otherwise(null())
.context(error::PlanSqlSnafu)?;
let select = vec![
// 1 as `Non_unique`: contain duplicates
lit(1).alias(INDEX_NONT_UNIQUE_COLUMN),
@@ -433,16 +416,6 @@ pub async fn show_index(
.otherwise(lit(YES_STR))
.context(error::PlanSqlSnafu)?
.alias(COLUMN_NULLABLE_COLUMN),
concat_ws(
lit(", "),
vec![
primary_key_expr,
inverted_index_expr,
fulltext_index_expr,
skipping_index_expr,
],
)
.alias(INDEX_INDEX_TYPE_COLUMN),
lit("").alias(COLUMN_COMMENT_COLUMN),
lit("").alias(INDEX_COMMENT_COLUMN),
lit(YES_STR).alias(INDEX_VISIBLE_COLUMN),
@@ -467,7 +440,10 @@ pub async fn show_index(
(INDEX_SUB_PART_COLUMN, INDEX_SUB_PART_COLUMN),
(INDEX_PACKED_COLUMN, INDEX_PACKED_COLUMN),
(COLUMN_NULLABLE_COLUMN, COLUMN_NULLABLE_COLUMN),
(INDEX_INDEX_TYPE_COLUMN, INDEX_INDEX_TYPE_COLUMN),
(
key_column_usage::GREPTIME_INDEX_TYPE,
INDEX_INDEX_TYPE_COLUMN,
),
(COLUMN_COMMENT_COLUMN, COLUMN_COMMENT_COLUMN),
(INDEX_COMMENT_COLUMN, INDEX_COMMENT_COLUMN),
(INDEX_VISIBLE_COLUMN, INDEX_VISIBLE_COLUMN),

View File

@@ -19,8 +19,8 @@ use std::collections::HashMap;
use common_meta::SchemaOptions;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
};
use snafu::ResultExt;
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
@@ -113,6 +113,10 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE.to_string(),
opt.case_sensitive.to_string(),
),
(
COLUMN_FULLTEXT_OPT_KEY_BACKEND.to_string(),
opt.backend.to_string(),
),
]);
extensions.fulltext_index_options = Some(map.into());
}
@@ -327,7 +331,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
"host" STRING NULL INVERTED INDEX,
"cpu" DOUBLE NULL,
"disk" FLOAT NULL,
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
TIME INDEX ("ts"),
PRIMARY KEY ("id", "host")

View File

@@ -969,6 +969,14 @@ impl ErrorExt for MetadataError {
}
}
/// Set column fulltext options if it passed the validation.
///
/// Options allowed to modify:
/// * backend
///
/// Options not allowed to modify:
/// * analyzer
/// * case_sensitive
fn set_column_fulltext_options(
column_meta: &mut ColumnMetadata,
column_name: String,
@@ -976,14 +984,6 @@ fn set_column_fulltext_options(
current_options: Option<FulltextOptions>,
) -> Result<()> {
if let Some(current_options) = current_options {
ensure!(
!current_options.enable,
InvalidColumnOptionSnafu {
column_name,
msg: "FULLTEXT index already enabled".to_string(),
}
);
ensure!(
current_options.analyzer == options.analyzer
&& current_options.case_sensitive == options.case_sensitive,

View File

@@ -1149,6 +1149,14 @@ impl TryFrom<RawTableInfo> for TableInfo {
}
}
/// Set column fulltext options if it passed the validation.
///
/// Options allowed to modify:
/// * backend
///
/// Options not allowed to modify:
/// * analyzer
/// * case_sensitive
fn set_column_fulltext_options(
column_schema: &mut ColumnSchema,
column_name: &str,
@@ -1156,14 +1164,6 @@ fn set_column_fulltext_options(
current_options: Option<FulltextOptions>,
) -> Result<()> {
if let Some(current_options) = current_options {
ensure!(
!current_options.enable,
error::InvalidColumnOptionSnafu {
column_name,
msg: "FULLTEXT index already enabled",
}
);
ensure!(
current_options.analyzer == options.analyzer
&& current_options.case_sensitive == options.case_sensitive,