mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-28 02:40:38 +00:00
feat: support altering fulltext backend (#5896)
* feat: add `greptime_index_type` to `information_schema.key_column_usage` Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: show create Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -24,7 +24,7 @@ use datafusion::physical_plan::stream::RecordBatchStreamAdapter as DfRecordBatch
|
||||
use datafusion::physical_plan::streaming::PartitionStream as DfPartitionStream;
|
||||
use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream;
|
||||
use datatypes::prelude::{ConcreteDataType, MutableVector, ScalarVectorBuilder, VectorRef};
|
||||
use datatypes::schema::{ColumnSchema, Schema, SchemaRef};
|
||||
use datatypes::schema::{ColumnSchema, FulltextBackend, Schema, SchemaRef};
|
||||
use datatypes::value::Value;
|
||||
use datatypes::vectors::{ConstantVector, StringVector, StringVectorBuilder, UInt32VectorBuilder};
|
||||
use futures_util::TryStreamExt;
|
||||
@@ -47,20 +47,38 @@ pub const TABLE_SCHEMA: &str = "table_schema";
|
||||
pub const TABLE_NAME: &str = "table_name";
|
||||
pub const COLUMN_NAME: &str = "column_name";
|
||||
pub const ORDINAL_POSITION: &str = "ordinal_position";
|
||||
/// The type of the index.
|
||||
pub const GREPTIME_INDEX_TYPE: &str = "greptime_index_type";
|
||||
const INIT_CAPACITY: usize = 42;
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY";
|
||||
/// Time index constraint name
|
||||
pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_TIME_INDEX: &str = "TIME INDEX";
|
||||
|
||||
/// Primary key constraint name
|
||||
pub(crate) const CONSTRAINT_NAME_PRI: &str = "PRIMARY";
|
||||
/// Primary key index type
|
||||
pub(crate) const INDEX_TYPE_PRI: &str = "greptime-primary-key-v1";
|
||||
|
||||
/// Inverted index constraint name
|
||||
pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_INVERTED_INDEX: &str = "INVERTED INDEX";
|
||||
/// Inverted index type
|
||||
pub(crate) const INDEX_TYPE_INVERTED_INDEX: &str = "greptime-inverted-index-v1";
|
||||
|
||||
/// Fulltext index constraint name
|
||||
pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_FULLTEXT_INDEX: &str = "FULLTEXT INDEX";
|
||||
/// Fulltext index v1 type
|
||||
pub(crate) const INDEX_TYPE_FULLTEXT_TANTIVY: &str = "greptime-fulltext-index-v1";
|
||||
/// Fulltext index bloom type
|
||||
pub(crate) const INDEX_TYPE_FULLTEXT_BLOOM: &str = "greptime-fulltext-index-bloom";
|
||||
|
||||
/// Skipping index constraint name
|
||||
pub(crate) const SKIPPING_INDEX_CONSTRAINT_NAME: &str = "SKIPPING INDEX";
|
||||
pub(crate) const CONSTRAINT_NAME_SKIPPING_INDEX: &str = "SKIPPING INDEX";
|
||||
/// Skipping index type
|
||||
pub(crate) const INDEX_TYPE_SKIPPING_INDEX: &str = "greptime-bloom-filter-v1";
|
||||
|
||||
/// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`.
|
||||
///
|
||||
/// Provides an extra column `greptime_index_type` for the index type of the key column.
|
||||
#[derive(Debug)]
|
||||
pub(super) struct InformationSchemaKeyColumnUsage {
|
||||
schema: SchemaRef,
|
||||
@@ -120,6 +138,11 @@ impl InformationSchemaKeyColumnUsage {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
ColumnSchema::new(
|
||||
GREPTIME_INDEX_TYPE,
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
),
|
||||
]))
|
||||
}
|
||||
|
||||
@@ -184,6 +207,7 @@ struct InformationSchemaKeyColumnUsageBuilder {
|
||||
column_name: StringVectorBuilder,
|
||||
ordinal_position: UInt32VectorBuilder,
|
||||
position_in_unique_constraint: UInt32VectorBuilder,
|
||||
greptime_index_type: StringVectorBuilder,
|
||||
}
|
||||
|
||||
impl InformationSchemaKeyColumnUsageBuilder {
|
||||
@@ -206,6 +230,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
column_name: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
ordinal_position: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
position_in_unique_constraint: UInt32VectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
greptime_index_type: StringVectorBuilder::with_capacity(INIT_CAPACITY),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -229,34 +254,47 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
|
||||
for (idx, column) in schema.column_schemas().iter().enumerate() {
|
||||
let mut constraints = vec![];
|
||||
let mut greptime_index_type = vec![];
|
||||
if column.is_time_index() {
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_TIME_INDEX,
|
||||
&catalog_name,
|
||||
&schema_name,
|
||||
table_name,
|
||||
&column.name,
|
||||
1, //always 1 for time index
|
||||
"",
|
||||
);
|
||||
}
|
||||
// TODO(dimbtp): foreign key constraint not supported yet
|
||||
if keys.contains(&idx) {
|
||||
constraints.push(PRI_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_PRI);
|
||||
greptime_index_type.push(INDEX_TYPE_PRI);
|
||||
}
|
||||
if column.is_inverted_indexed() {
|
||||
constraints.push(INVERTED_INDEX_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_INVERTED_INDEX);
|
||||
greptime_index_type.push(INDEX_TYPE_INVERTED_INDEX);
|
||||
}
|
||||
if column.is_fulltext_indexed() {
|
||||
constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME);
|
||||
if let Ok(Some(options)) = column.fulltext_options() {
|
||||
if options.enable {
|
||||
constraints.push(CONSTRAINT_NAME_FULLTEXT_INDEX);
|
||||
let index_type = match options.backend {
|
||||
FulltextBackend::Bloom => INDEX_TYPE_FULLTEXT_BLOOM,
|
||||
FulltextBackend::Tantivy => INDEX_TYPE_FULLTEXT_TANTIVY,
|
||||
};
|
||||
greptime_index_type.push(index_type);
|
||||
}
|
||||
}
|
||||
if column.is_skipping_indexed() {
|
||||
constraints.push(SKIPPING_INDEX_CONSTRAINT_NAME);
|
||||
constraints.push(CONSTRAINT_NAME_SKIPPING_INDEX);
|
||||
greptime_index_type.push(INDEX_TYPE_SKIPPING_INDEX);
|
||||
}
|
||||
|
||||
if !constraints.is_empty() {
|
||||
let aggregated_constraints = constraints.join(", ");
|
||||
let aggregated_index_types = greptime_index_type.join(", ");
|
||||
self.add_key_column_usage(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
@@ -266,6 +304,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
table_name,
|
||||
&column.name,
|
||||
idx as u32 + 1,
|
||||
&aggregated_index_types,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -288,6 +327,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
table_name: &str,
|
||||
column_name: &str,
|
||||
ordinal_position: u32,
|
||||
index_types: &str,
|
||||
) {
|
||||
let row = [
|
||||
(CONSTRAINT_SCHEMA, &Value::from(constraint_schema)),
|
||||
@@ -297,6 +337,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
(TABLE_NAME, &Value::from(table_name)),
|
||||
(COLUMN_NAME, &Value::from(column_name)),
|
||||
(ORDINAL_POSITION, &Value::from(ordinal_position)),
|
||||
(GREPTIME_INDEX_TYPE, &Value::from(index_types)),
|
||||
];
|
||||
|
||||
if !predicates.eval(&row) {
|
||||
@@ -313,6 +354,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
self.column_name.push(Some(column_name));
|
||||
self.ordinal_position.push(Some(ordinal_position));
|
||||
self.position_in_unique_constraint.push(None);
|
||||
self.greptime_index_type.push(Some(index_types));
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<RecordBatch> {
|
||||
@@ -336,6 +378,7 @@ impl InformationSchemaKeyColumnUsageBuilder {
|
||||
null_string_vector.clone(),
|
||||
null_string_vector.clone(),
|
||||
null_string_vector,
|
||||
Arc::new(self.greptime_index_type.finish()),
|
||||
];
|
||||
RecordBatch::new(self.schema.clone(), columns).context(CreateRecordBatchSnafu)
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ use crate::error::{
|
||||
CreateRecordBatchSnafu, InternalSnafu, Result, UpgradeWeakCatalogManagerRefSnafu,
|
||||
};
|
||||
use crate::information_schema::key_column_usage::{
|
||||
PRI_CONSTRAINT_NAME, TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_PRI, CONSTRAINT_NAME_TIME_INDEX,
|
||||
};
|
||||
use crate::information_schema::Predicates;
|
||||
use crate::system_schema::information_schema::{InformationTable, TABLE_CONSTRAINTS};
|
||||
@@ -188,7 +188,7 @@ impl InformationSchemaTableConstraintsBuilder {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
TIME_INDEX_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_TIME_INDEX,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
TIME_INDEX_CONSTRAINT_TYPE,
|
||||
@@ -199,7 +199,7 @@ impl InformationSchemaTableConstraintsBuilder {
|
||||
self.add_table_constraint(
|
||||
&predicates,
|
||||
&schema_name,
|
||||
PRI_CONSTRAINT_NAME,
|
||||
CONSTRAINT_NAME_PRI,
|
||||
&schema_name,
|
||||
&table.table_info().name,
|
||||
PRI_KEY_CONSTRAINT_TYPE,
|
||||
|
||||
@@ -537,8 +537,8 @@ impl fmt::Display for FulltextOptions {
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub enum FulltextBackend {
|
||||
#[default]
|
||||
Bloom,
|
||||
Tantivy,
|
||||
Bloom, // TODO(zhongzc): when bloom is ready, use it as default
|
||||
}
|
||||
|
||||
impl fmt::Display for FulltextBackend {
|
||||
|
||||
@@ -40,7 +40,7 @@ use common_recordbatch::RecordBatches;
|
||||
use common_time::timezone::get_timezone;
|
||||
use common_time::Timestamp;
|
||||
use datafusion::common::ScalarValue;
|
||||
use datafusion::prelude::{concat_ws, SessionContext};
|
||||
use datafusion::prelude::SessionContext;
|
||||
use datafusion_expr::expr::WildcardOptions;
|
||||
use datafusion_expr::{case, col, lit, Expr, SortExpr};
|
||||
use datatypes::prelude::*;
|
||||
@@ -399,23 +399,6 @@ pub async fn show_index(
|
||||
query_ctx.current_schema()
|
||||
};
|
||||
|
||||
let primary_key_expr = case(col("constraint_name").like(lit("%PRIMARY%")))
|
||||
.when(lit(true), lit("greptime-primary-key-v1"))
|
||||
.otherwise(null())
|
||||
.context(error::PlanSqlSnafu)?;
|
||||
let inverted_index_expr = case(col("constraint_name").like(lit("%INVERTED INDEX%")))
|
||||
.when(lit(true), lit("greptime-inverted-index-v1"))
|
||||
.otherwise(null())
|
||||
.context(error::PlanSqlSnafu)?;
|
||||
let fulltext_index_expr = case(col("constraint_name").like(lit("%FULLTEXT INDEX%")))
|
||||
.when(lit(true), lit("greptime-fulltext-index-v1"))
|
||||
.otherwise(null())
|
||||
.context(error::PlanSqlSnafu)?;
|
||||
let skipping_index_expr = case(col("constraint_name").like(lit("%SKIPPING INDEX%")))
|
||||
.when(lit(true), lit("greptime-bloom-filter-v1"))
|
||||
.otherwise(null())
|
||||
.context(error::PlanSqlSnafu)?;
|
||||
|
||||
let select = vec![
|
||||
// 1 as `Non_unique`: contain duplicates
|
||||
lit(1).alias(INDEX_NONT_UNIQUE_COLUMN),
|
||||
@@ -433,16 +416,6 @@ pub async fn show_index(
|
||||
.otherwise(lit(YES_STR))
|
||||
.context(error::PlanSqlSnafu)?
|
||||
.alias(COLUMN_NULLABLE_COLUMN),
|
||||
concat_ws(
|
||||
lit(", "),
|
||||
vec![
|
||||
primary_key_expr,
|
||||
inverted_index_expr,
|
||||
fulltext_index_expr,
|
||||
skipping_index_expr,
|
||||
],
|
||||
)
|
||||
.alias(INDEX_INDEX_TYPE_COLUMN),
|
||||
lit("").alias(COLUMN_COMMENT_COLUMN),
|
||||
lit("").alias(INDEX_COMMENT_COLUMN),
|
||||
lit(YES_STR).alias(INDEX_VISIBLE_COLUMN),
|
||||
@@ -467,7 +440,10 @@ pub async fn show_index(
|
||||
(INDEX_SUB_PART_COLUMN, INDEX_SUB_PART_COLUMN),
|
||||
(INDEX_PACKED_COLUMN, INDEX_PACKED_COLUMN),
|
||||
(COLUMN_NULLABLE_COLUMN, COLUMN_NULLABLE_COLUMN),
|
||||
(INDEX_INDEX_TYPE_COLUMN, INDEX_INDEX_TYPE_COLUMN),
|
||||
(
|
||||
key_column_usage::GREPTIME_INDEX_TYPE,
|
||||
INDEX_INDEX_TYPE_COLUMN,
|
||||
),
|
||||
(COLUMN_COMMENT_COLUMN, COLUMN_COMMENT_COLUMN),
|
||||
(INDEX_COMMENT_COLUMN, INDEX_COMMENT_COLUMN),
|
||||
(INDEX_VISIBLE_COLUMN, INDEX_VISIBLE_COLUMN),
|
||||
|
||||
@@ -19,8 +19,8 @@ use std::collections::HashMap;
|
||||
use common_meta::SchemaOptions;
|
||||
use datatypes::schema::{
|
||||
ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
|
||||
@@ -113,6 +113,10 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE.to_string(),
|
||||
opt.case_sensitive.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND.to_string(),
|
||||
opt.backend.to_string(),
|
||||
),
|
||||
]);
|
||||
extensions.fulltext_index_options = Some(map.into());
|
||||
}
|
||||
@@ -327,7 +331,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
|
||||
"host" STRING NULL INVERTED INDEX,
|
||||
"cpu" DOUBLE NULL,
|
||||
"disk" FLOAT NULL,
|
||||
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'false'),
|
||||
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
|
||||
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
|
||||
TIME INDEX ("ts"),
|
||||
PRIMARY KEY ("id", "host")
|
||||
|
||||
@@ -969,6 +969,14 @@ impl ErrorExt for MetadataError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set column fulltext options if it passed the validation.
|
||||
///
|
||||
/// Options allowed to modify:
|
||||
/// * backend
|
||||
///
|
||||
/// Options not allowed to modify:
|
||||
/// * analyzer
|
||||
/// * case_sensitive
|
||||
fn set_column_fulltext_options(
|
||||
column_meta: &mut ColumnMetadata,
|
||||
column_name: String,
|
||||
@@ -976,14 +984,6 @@ fn set_column_fulltext_options(
|
||||
current_options: Option<FulltextOptions>,
|
||||
) -> Result<()> {
|
||||
if let Some(current_options) = current_options {
|
||||
ensure!(
|
||||
!current_options.enable,
|
||||
InvalidColumnOptionSnafu {
|
||||
column_name,
|
||||
msg: "FULLTEXT index already enabled".to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
ensure!(
|
||||
current_options.analyzer == options.analyzer
|
||||
&& current_options.case_sensitive == options.case_sensitive,
|
||||
|
||||
@@ -1149,6 +1149,14 @@ impl TryFrom<RawTableInfo> for TableInfo {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set column fulltext options if it passed the validation.
|
||||
///
|
||||
/// Options allowed to modify:
|
||||
/// * backend
|
||||
///
|
||||
/// Options not allowed to modify:
|
||||
/// * analyzer
|
||||
/// * case_sensitive
|
||||
fn set_column_fulltext_options(
|
||||
column_schema: &mut ColumnSchema,
|
||||
column_name: &str,
|
||||
@@ -1156,14 +1164,6 @@ fn set_column_fulltext_options(
|
||||
current_options: Option<FulltextOptions>,
|
||||
) -> Result<()> {
|
||||
if let Some(current_options) = current_options {
|
||||
ensure!(
|
||||
!current_options.enable,
|
||||
error::InvalidColumnOptionSnafu {
|
||||
column_name,
|
||||
msg: "FULLTEXT index already enabled",
|
||||
}
|
||||
);
|
||||
|
||||
ensure!(
|
||||
current_options.analyzer == options.analyzer
|
||||
&& current_options.case_sensitive == options.case_sensitive,
|
||||
|
||||
Reference in New Issue
Block a user