feat(index): support SQL to specify inverted index columns (#4929)

* feat(index): support building inverted index for the field column

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* feat(index): support SQL to specify inverted index columns

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* test: fix sqlness

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: consider compatibility

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* polish

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* compatibility

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: ignore case

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* refactor: reduce dup

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: clippy

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-11-11 16:06:23 +08:00
committed by GitHub
parent 0e0c4faf0d
commit 6248a6ccf5
14 changed files with 442 additions and 212 deletions

View File

@@ -16,6 +16,7 @@ use std::collections::HashMap;
use datatypes::schema::{
ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY,
INVERTED_INDEX_KEY,
};
use snafu::ResultExt;
@@ -25,6 +26,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType};
/// Key used to store fulltext options in gRPC column options.
const FULLTEXT_GRPC_KEY: &str = "fulltext";
/// Key used to store inverted index options in gRPC column options.
const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index";
/// Tries to construct a `ColumnSchema` from the given `ColumnDef`.
pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
@@ -49,10 +52,13 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result<ColumnSchema> {
if !column_def.comment.is_empty() {
metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone());
}
if let Some(options) = column_def.options.as_ref()
&& let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY)
{
metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_string());
if let Some(options) = column_def.options.as_ref() {
if let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) {
metadata.insert(FULLTEXT_KEY.to_string(), fulltext.clone());
}
if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) {
metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone());
}
}
ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable)
@@ -70,7 +76,12 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option<Column
if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) {
options
.options
.insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_string());
.insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.clone());
}
if let Some(inverted_index) = column_schema.metadata().get(INVERTED_INDEX_KEY) {
options
.options
.insert(INVERTED_INDEX_GRPC_KEY.to_string(), inverted_index.clone());
}
(!options.options.is_empty()).then_some(options)
@@ -115,10 +126,13 @@ mod tests {
comment: "test_comment".to_string(),
datatype_extension: None,
options: Some(ColumnOptions {
options: HashMap::from([(
FULLTEXT_GRPC_KEY.to_string(),
"{\"enable\":true}".to_string(),
)]),
options: HashMap::from([
(
FULLTEXT_GRPC_KEY.to_string(),
"{\"enable\":true}".to_string(),
),
(INVERTED_INDEX_GRPC_KEY.to_string(), "true".to_string()),
]),
}),
};
@@ -139,6 +153,7 @@ mod tests {
..Default::default()
}
);
assert!(schema.is_inverted_indexed());
}
#[test]
@@ -153,12 +168,17 @@ mod tests {
analyzer: FulltextAnalyzer::English,
case_sensitive: false,
})
.unwrap();
.unwrap()
.set_inverted_index(true);
let options = options_from_column_schema(&schema).unwrap();
assert_eq!(
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}"
);
assert_eq!(
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
"true"
);
}
#[test]

View File

@@ -28,7 +28,7 @@ use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, R
use crate::prelude::DataType;
pub use crate::schema::column_schema::{
ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY,
TIME_INDEX_KEY,
INVERTED_INDEX_KEY, TIME_INDEX_KEY,
};
pub use crate::schema::constraint::ColumnDefaultConstraint;
pub use crate::schema::raw::RawSchema;

View File

@@ -36,7 +36,8 @@ pub const COMMENT_KEY: &str = "greptime:storage:comment";
const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint";
/// Key used to store fulltext options in arrow field's metadata.
pub const FULLTEXT_KEY: &str = "greptime:fulltext";
/// Key used to store whether the column has inverted index in arrow field's metadata.
pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index";
/// Schema of a column, used as an immutable struct.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ColumnSchema {
@@ -134,6 +135,24 @@ impl ColumnSchema {
self
}
pub fn set_inverted_index(mut self, value: bool) -> Self {
let _ = self
.metadata
.insert(INVERTED_INDEX_KEY.to_string(), value.to_string());
self
}
pub fn is_inverted_indexed(&self) -> bool {
self.metadata
.get(INVERTED_INDEX_KEY)
.map(|v| v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
}
pub fn has_inverted_index_key(&self) -> bool {
self.metadata.contains_key(INVERTED_INDEX_KEY)
}
/// Set default constraint.
///
/// If a default constraint exists for the column, this method will

View File

@@ -393,29 +393,20 @@ impl ScanRegion {
.and_then(|c| c.index_cache())
.cloned();
// TODO(zhongzc): currently we only index tag columns, need to support field columns.
let ignore_column_ids = &self
.version
.options
.index_options
.inverted_index
.ignore_column_ids;
let indexed_column_ids = self
.version
.metadata
.primary_key
.iter()
.filter(|id| !ignore_column_ids.contains(id))
.copied()
.collect::<HashSet<_>>();
InvertedIndexApplierBuilder::new(
self.access_layer.region_dir().to_string(),
self.access_layer.object_store().clone(),
file_cache,
index_cache,
self.version.metadata.as_ref(),
indexed_column_ids,
self.version.metadata.inverted_indexed_column_ids(
self.version
.options
.index_options
.inverted_index
.ignore_column_ids
.iter(),
),
self.access_layer.puffin_manager_factory().clone(),
)
.build(&self.request.filters)

View File

@@ -20,7 +20,6 @@ pub(crate) mod puffin_manager;
mod statistics;
mod store;
use std::collections::HashSet;
use std::num::NonZeroUsize;
use common_telemetry::{debug, warn};
@@ -213,28 +212,15 @@ impl<'a> IndexerBuilder<'a> {
segment_row_count = row_group_size;
}
// TODO(zhongzc): currently we only index tag columns, need to support field columns.
let indexed_column_ids = self
.metadata
.primary_key
.iter()
.filter(|id| {
!self
.index_options
.inverted_index
.ignore_column_ids
.contains(id)
})
.copied()
.collect::<HashSet<_>>();
let indexer = InvertedIndexer::new(
self.file_id,
self.metadata,
self.intermediate_manager.clone(),
self.inverted_index_config.mem_threshold_on_create(),
segment_row_count,
indexed_column_ids,
self.metadata.inverted_indexed_column_ids(
self.index_options.inverted_index.ignore_column_ids.iter(),
),
);
Some(indexer)

View File

@@ -35,10 +35,10 @@ use query::sql::{
use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name;
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{ColumnOption, TableConstraint};
use sql::ast::ColumnOption;
use sql::statements::alter::{AlterTable, AlterTableOperation};
use sql::statements::create::{
Column as SqlColumn, CreateExternalTable, CreateFlow, CreateTable, CreateView, TIME_INDEX,
Column as SqlColumn, CreateExternalTable, CreateFlow, CreateTable, CreateView, TableConstraint,
};
use sql::statements::{
column_to_schema, sql_column_def_to_grpc_column_def, sql_data_type_to_concrete_data_type,
@@ -130,8 +130,14 @@ pub(crate) async fn create_external_expr(
// expanded form
let time_index = find_time_index(&create.constraints)?;
let primary_keys = find_primary_keys(&create.columns, &create.constraints)?;
let column_schemas =
columns_to_column_schemas(&create.columns, &time_index, Some(&query_ctx.timezone()))?;
let inverted_index_cols = find_inverted_index_cols(&create.columns, &create.constraints)?;
let column_schemas = columns_to_column_schemas(
&create.columns,
&time_index,
&inverted_index_cols,
&primary_keys,
Some(&query_ctx.timezone()),
)?;
(time_index, primary_keys, column_schemas)
} else {
// inferred form
@@ -186,6 +192,7 @@ pub fn create_to_expr(
);
let primary_keys = find_primary_keys(&create.columns, &create.constraints)?;
let inverted_index_cols = find_inverted_index_cols(&create.columns, &create.constraints)?;
let expr = CreateTableExpr {
catalog_name,
@@ -196,6 +203,7 @@ pub fn create_to_expr(
&create.columns,
&time_index,
&primary_keys,
&inverted_index_cols,
Some(&query_ctx.timezone()),
)?,
time_index,
@@ -304,9 +312,9 @@ fn find_primary_keys(
let constraints_pk = constraints
.iter()
.filter_map(|constraint| match constraint {
TableConstraint::PrimaryKey {
name: _, columns, ..
} => Some(columns.iter().map(|ident| ident.value.clone())),
TableConstraint::PrimaryKey { columns, .. } => {
Some(columns.iter().map(|ident| ident.value.clone()))
}
_ => None,
})
.flatten()
@@ -329,20 +337,9 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result<String> {
let time_index = constraints
.iter()
.filter_map(|constraint| match constraint {
TableConstraint::Unique {
name: Some(name),
columns,
..
} => {
if name.value == TIME_INDEX {
Some(columns.iter().map(|ident| &ident.value))
} else {
None
}
}
TableConstraint::TimeIndex { column, .. } => Some(&column.value),
_ => None,
})
.flatten()
.collect::<Vec<&String>>();
ensure!(
time_index.len() == 1,
@@ -353,25 +350,70 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result<String> {
Ok(time_index.first().unwrap().to_string())
}
/// Finds the inverted index columns from the constraints. If no inverted index
/// columns are provided in the constraints, return `None`.
fn find_inverted_index_cols(
columns: &[SqlColumn],
constraints: &[TableConstraint],
) -> Result<Option<Vec<String>>> {
let inverted_index_cols = constraints.iter().find_map(|constraint| {
if let TableConstraint::InvertedIndex { columns } = constraint {
Some(
columns
.iter()
.map(|ident| ident.value.clone())
.collect::<Vec<_>>(),
)
} else {
None
}
});
let Some(inverted_index_cols) = inverted_index_cols else {
return Ok(None);
};
for col in &inverted_index_cols {
if !columns.iter().any(|c| c.name().value == *col) {
return InvalidSqlSnafu {
err_msg: format!("inverted index column `{}` not found in column list", col),
}
.fail();
}
}
Ok(Some(inverted_index_cols))
}
fn columns_to_expr(
column_defs: &[SqlColumn],
time_index: &str,
primary_keys: &[String],
invereted_index_cols: &Option<Vec<String>>,
timezone: Option<&Timezone>,
) -> Result<Vec<api::v1::ColumnDef>> {
let column_schemas = columns_to_column_schemas(column_defs, time_index, timezone)?;
let column_schemas = columns_to_column_schemas(
column_defs,
time_index,
invereted_index_cols,
primary_keys,
timezone,
)?;
column_schemas_to_defs(column_schemas, primary_keys)
}
fn columns_to_column_schemas(
columns: &[SqlColumn],
time_index: &str,
invereted_index_cols: &Option<Vec<String>>,
primary_keys: &[String],
timezone: Option<&Timezone>,
) -> Result<Vec<ColumnSchema>> {
columns
.iter()
.map(|c| {
column_to_schema(c, c.name().to_string() == time_index, timezone).context(ParseSqlSnafu)
column_to_schema(c, time_index, invereted_index_cols, primary_keys, timezone)
.context(ParseSqlSnafu)
})
.collect::<Result<Vec<ColumnSchema>>>()
}

View File

@@ -20,15 +20,12 @@ use common_meta::SchemaOptions;
use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaRef, COMMENT_KEY};
use humantime::format_duration;
use snafu::ResultExt;
use sql::ast::{
ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName, TableConstraint,
};
use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName};
use sql::dialect::GreptimeDbDialect;
use sql::parser::ParserContext;
use sql::statements::create::{Column, ColumnExtensions, CreateTable, TIME_INDEX};
use sql::statements::create::{Column, ColumnExtensions, CreateTable, TableConstraint};
use sql::statements::{self, OptionMap};
use sql::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE};
use sqlparser::ast::KeyOrIndexDisplay;
use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column};
use table::metadata::{TableInfoRef, TableMeta};
use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY};
@@ -140,14 +137,8 @@ fn create_table_constraints(
let mut constraints = Vec::with_capacity(2);
if let Some(timestamp_column) = schema.timestamp_column() {
let column_name = &timestamp_column.name;
constraints.push(TableConstraint::Unique {
name: Some(TIME_INDEX.into()),
columns: vec![Ident::with_quote(quote_style, column_name)],
characteristics: None,
index_name: None,
index_type_display: KeyOrIndexDisplay::None,
index_type: None,
index_options: vec![],
constraints.push(TableConstraint::TimeIndex {
column: Ident::with_quote(quote_style, column_name),
});
}
if !table_meta.primary_key_indices.is_empty() {
@@ -162,13 +153,22 @@ fn create_table_constraints(
}
})
.collect();
constraints.push(TableConstraint::PrimaryKey {
name: None,
columns,
characteristics: None,
index_name: None,
index_type: None,
index_options: vec![],
constraints.push(TableConstraint::PrimaryKey { columns });
}
let inverted_index_set = schema
.column_schemas()
.iter()
.any(|c| c.has_inverted_index_key());
if inverted_index_set {
let inverted_index_cols = schema
.column_schemas()
.iter()
.filter(|c| c.is_inverted_indexed())
.map(|c| Ident::with_quote(quote_style, &c.name))
.collect::<Vec<_>>();
constraints.push(TableConstraint::InvertedIndex {
columns: inverted_index_cols,
});
}
@@ -229,7 +229,8 @@ mod tests {
fn test_show_create_table_sql() {
let schema = vec![
ColumnSchema::new("id", ConcreteDataType::uint32_datatype(), true),
ColumnSchema::new("host", ConcreteDataType::string_datatype(), true),
ColumnSchema::new("host", ConcreteDataType::string_datatype(), true)
.set_inverted_index(true),
ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true),
ColumnSchema::new("disk", ConcreteDataType::float32_datatype(), true),
ColumnSchema::new("msg", ConcreteDataType::string_datatype(), true)
@@ -295,7 +296,8 @@ CREATE TABLE IF NOT EXISTS "system_metrics" (
"msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'),
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
TIME INDEX ("ts"),
PRIMARY KEY ("id", "host")
PRIMARY KEY ("id", "host"),
INVERTED INDEX ("host")
)
ENGINE=mito
"#,

View File

@@ -28,4 +28,3 @@ pub use parsers::create_parser::{
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, ENGINE, MAXVALUE,
};
pub use parsers::tql_parser::TQL;
pub use statements::create::TIME_INDEX;

View File

@@ -20,7 +20,7 @@ use datatypes::arrow::datatypes::{DataType as ArrowDataType, IntervalUnit};
use datatypes::data_type::ConcreteDataType;
use itertools::Itertools;
use snafu::{ensure, OptionExt, ResultExt};
use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr, KeyOrIndexDisplay};
use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr};
use sqlparser::dialect::keywords::Keyword;
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::IsOptional::Mandatory;
@@ -29,7 +29,7 @@ use sqlparser::tokenizer::{Token, TokenWithLocation, Word};
use table::requests::validate_table_option;
use super::utils;
use crate::ast::{ColumnDef, Ident, TableConstraint};
use crate::ast::{ColumnDef, Ident};
use crate::error::{
self, InvalidColumnOptionSnafu, InvalidDatabaseOptionSnafu, InvalidIntervalSnafu,
InvalidSqlSnafu, InvalidTableOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result,
@@ -38,7 +38,7 @@ use crate::error::{
use crate::parser::{ParserContext, FLOW};
use crate::statements::create::{
Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable,
CreateTableLike, CreateView, Partitions, TIME_INDEX,
CreateTableLike, CreateView, Partitions, TableConstraint,
};
use crate::statements::statement::Statement;
use crate::statements::{
@@ -51,6 +51,7 @@ pub const MAXVALUE: &str = "MAXVALUE";
pub const SINK: &str = "SINK";
pub const EXPIRE: &str = "EXPIRE";
pub const AFTER: &str = "AFTER";
pub const INVERTED: &str = "INVERTED";
const DB_OPT_KEY_TTL: &str = "ttl";
@@ -500,20 +501,11 @@ impl<'a> ParserContext<'a> {
);
time_index_opt_idx = Some(index);
let constraint = TableConstraint::Unique {
name: Some(Ident {
value: TIME_INDEX.to_owned(),
quote_style: None,
}),
columns: vec![Ident {
let constraint = TableConstraint::TimeIndex {
column: Ident {
value: column.name().value.clone(),
quote_style: None,
}],
characteristics: None,
index_name: None,
index_type_display: KeyOrIndexDisplay::None,
index_type: None,
index_options: vec![],
},
};
constraints.push(constraint);
}
@@ -730,12 +722,6 @@ impl<'a> ParserContext<'a> {
}
fn parse_optional_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) {
let raw_name = self.parse_identifier().context(SyntaxSnafu)?;
Some(Self::canonicalize_identifier(raw_name))
} else {
None
};
match self.parser.next_token() {
TokenWithLocation {
token: Token::Word(w),
@@ -755,14 +741,7 @@ impl<'a> ParserContext<'a> {
.into_iter()
.map(Self::canonicalize_identifier)
.collect();
Ok(Some(TableConstraint::PrimaryKey {
name,
index_name: None,
index_type: None,
columns,
index_options: vec![],
characteristics: None,
}))
Ok(Some(TableConstraint::PrimaryKey { columns }))
}
TokenWithLocation {
token: Token::Word(w),
@@ -779,7 +758,7 @@ impl<'a> ParserContext<'a> {
.parser
.parse_parenthesized_column_list(Mandatory, false)
.context(error::SyntaxSnafu)?;
let columns = raw_columns
let mut columns = raw_columns
.into_iter()
.map(Self::canonicalize_identifier)
.collect::<Vec<_>>();
@@ -791,28 +770,35 @@ impl<'a> ParserContext<'a> {
}
);
// TODO(dennis): TableConstraint doesn't support dialect right now,
// so we use unique constraint with special key to represent TIME INDEX.
Ok(Some(TableConstraint::Unique {
name: Some(Ident {
value: TIME_INDEX.to_owned(),
quote_style: None,
}),
columns,
characteristics: None,
index_name: None,
index_type_display: KeyOrIndexDisplay::None,
index_type: None,
index_options: vec![],
Ok(Some(TableConstraint::TimeIndex {
column: columns.pop().unwrap(),
}))
}
unexpected => {
if name.is_some() {
self.expected("PRIMARY, TIME", unexpected)
} else {
self.parser.prev_token();
Ok(None)
}
TokenWithLocation {
token: Token::Word(w),
..
} if w.value == INVERTED => {
self.parser
.expect_keyword(Keyword::INDEX)
.context(error::UnexpectedSnafu {
expected: "INDEX",
actual: self.peek_token_as_string(),
})?;
let raw_columns = self
.parser
// allow empty list to unset inverted index
.parse_parenthesized_column_list(Mandatory, true)
.context(error::SyntaxSnafu)?;
let columns = raw_columns
.into_iter()
.map(Self::canonicalize_identifier)
.collect::<Vec<_>>();
Ok(Some(TableConstraint::InvertedIndex { columns }))
}
_ => {
self.parser.prev_token();
Ok(None)
}
}
}
@@ -842,21 +828,9 @@ impl<'a> ParserContext<'a> {
fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> Result<()> {
let time_index_constraints: Vec<_> = constraints
.iter()
.filter_map(|c| {
if let TableConstraint::Unique {
name: Some(ident),
columns,
..
} = c
{
if ident.value == TIME_INDEX {
Some(columns)
} else {
None
}
} else {
None
}
.filter_map(|c| match c {
TableConstraint::TimeIndex { column } => Some(column),
_ => None,
})
.unique()
.collect();
@@ -871,16 +845,10 @@ fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> R
),
}
);
ensure!(
time_index_constraints[0].len() == 1,
InvalidTimeIndexSnafu {
msg: "it should contain only one column in time index",
}
);
// It's safe to use time_index_constraints[0][0],
// we already check the bound above.
let time_index_column_ident = &time_index_constraints[0][0];
let time_index_column_ident = &time_index_constraints[0];
let time_index_column = columns
.iter()
.find(|c| c.name().value == *time_index_column_ident.value)
@@ -1120,7 +1088,8 @@ mod tests {
cpu float32 default 0,
memory float64,
TIME INDEX (ts),
PRIMARY KEY(ts, host)
PRIMARY KEY(ts, host),
INVERTED INDEX(host)
) with(location='/var/data/city.csv',format='csv');";
let options = HashMap::from([
@@ -1144,11 +1113,24 @@ mod tests {
assert_column_def(&columns[3].column_def, "memory", "FLOAT64");
let constraints = &c.constraints;
assert!(matches!(&constraints[0], TableConstraint::Unique {
name: Some(name),
..
} if name.value == TIME_INDEX));
assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. });
assert_eq!(
&constraints[0],
&TableConstraint::TimeIndex {
column: Ident::new("ts"),
}
);
assert_eq!(
&constraints[1],
&TableConstraint::PrimaryKey {
columns: vec![Ident::new("ts"), Ident::new("host")]
}
);
assert_eq!(
&constraints[2],
&TableConstraint::InvertedIndex {
columns: vec![Ident::new("host")]
}
);
}
_ => unreachable!(),
}
@@ -1478,10 +1460,8 @@ ENGINE=mito";
assert_eq!(c.constraints.len(), 2);
let tc = c.constraints[0].clone();
match tc {
TableConstraint::Unique { name, columns, .. } => {
assert_eq!(name.unwrap().to_string(), "__time_index");
assert_eq!(columns.len(), 1);
assert_eq!(&columns[0].value, "ts");
TableConstraint::TimeIndex { column } => {
assert_eq!(&column.value, "ts");
}
_ => panic!("should be time index constraint"),
};
@@ -1679,10 +1659,8 @@ ENGINE=mito";
if let Statement::CreateTable(c) = &result[0] {
let tc = c.constraints[0].clone();
match tc {
TableConstraint::Unique { name, columns, .. } => {
assert_eq!(name.unwrap().to_string(), "__time_index");
assert_eq!(columns.len(), 1);
assert_eq!(&columns[0].value, "ts");
TableConstraint::TimeIndex { column } => {
assert_eq!(&column.value, "ts");
}
_ => panic!("should be time index constraint"),
}
@@ -1769,7 +1747,9 @@ ENGINE=mito";
cpu float32 default 0,
memory float64,
TIME INDEX (ts),
PRIMARY KEY(ts, host)) engine=mito
PRIMARY KEY(ts, host),
INVERTED INDEX(host)
) engine=mito
with(ttl='10s');
";
let result =
@@ -1789,11 +1769,24 @@ ENGINE=mito";
assert_column_def(&columns[3].column_def, "memory", "FLOAT64");
let constraints = &c.constraints;
assert!(matches!(&constraints[0], TableConstraint::Unique {
name: Some(name),
..
} if name.value == TIME_INDEX));
assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. });
assert_eq!(
&constraints[0],
&TableConstraint::TimeIndex {
column: Ident::new("ts"),
}
);
assert_eq!(
&constraints[1],
&TableConstraint::PrimaryKey {
columns: vec![Ident::new("ts"), Ident::new("host")]
}
);
assert_eq!(
&constraints[2],
&TableConstraint::InvertedIndex {
columns: vec![Ident::new("host")]
}
);
assert_eq!(1, c.options.len());
assert_eq!(
[("ttl", "10s")].into_iter().collect::<HashMap<_, _>>(),
@@ -1851,6 +1844,33 @@ ENGINE=mito";
assert_matches!(result, Err(crate::error::Error::InvalidTimeIndex { .. }));
}
#[test]
fn test_inverted_index_empty_list() {
let sql = r"create table demo(
host string,
ts timestamp time index,
cpu float64 default 0,
memory float64,
TIME INDEX (ts),
INVERTED INDEX()
) engine=mito;
";
let result =
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
.unwrap();
if let Statement::CreateTable(c) = &result[0] {
let tc = &c
.constraints
.iter()
.find(|c| matches!(c, TableConstraint::InvertedIndex { .. }))
.unwrap();
assert_eq!(*tc, &TableConstraint::InvertedIndex { columns: vec![] });
} else {
unreachable!("should be create table statement");
}
}
#[test]
fn test_invalid_column_name() {
let sql = "create table foo(user string, i timestamp time index)";

View File

@@ -453,9 +453,13 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool {
/// Create a `ColumnSchema` from `Column`.
pub fn column_to_schema(
column: &Column,
is_time_index: bool,
time_index: &str,
invereted_index_cols: &Option<Vec<String>>,
primary_keys: &[String],
timezone: Option<&Timezone>,
) -> Result<ColumnSchema> {
let is_time_index = column.name().value == time_index;
let is_nullable = column
.options()
.iter()
@@ -474,6 +478,20 @@ pub fn column_to_schema(
column: &column.name().value,
})?;
// To keep compatibility,
// 1. if inverted index columns is not set, leave it empty meaning primary key columns will be used
// 2. if inverted index columns is set and non-empty, set selected columns to be inverted indexed
// 3. if inverted index columns is set and empty, set primary key columns to be non-inverted indexed explicitly
if let Some(inverted_index_cols) = invereted_index_cols {
if inverted_index_cols.is_empty() {
if primary_keys.contains(&column.name().value) {
column_schema = column_schema.set_inverted_index(false);
}
} else if inverted_index_cols.contains(&column.name().value) {
column_schema = column_schema.set_inverted_index(true);
}
}
if let Some(ColumnOption::Comment(c)) = column.options().iter().find_map(|o| {
if matches!(o.option, ColumnOption::Comment(_)) {
Some(&o.option)
@@ -1337,7 +1355,7 @@ mod tests {
extensions: ColumnExtensions::default(),
};
let column_schema = column_to_schema(&column_def, false, None).unwrap();
let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap();
assert_eq!("col", column_schema.name);
assert_eq!(
@@ -1347,7 +1365,7 @@ mod tests {
assert!(column_schema.is_nullable());
assert!(!column_schema.is_time_index());
let column_schema = column_to_schema(&column_def, true, None).unwrap();
let column_schema = column_to_schema(&column_def, "col", &None, &[], None).unwrap();
assert_eq!("col", column_schema.name);
assert_eq!(
@@ -1376,7 +1394,7 @@ mod tests {
extensions: ColumnExtensions::default(),
};
let column_schema = column_to_schema(&column_def, false, None).unwrap();
let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap();
assert_eq!("col2", column_schema.name);
assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type);
@@ -1410,7 +1428,9 @@ mod tests {
let column_schema = column_to_schema(
&column,
false,
"ts",
&None,
&[],
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()),
)
.unwrap();
@@ -1429,7 +1449,7 @@ mod tests {
);
// without timezone
let column_schema = column_to_schema(&column, false, None).unwrap();
let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap();
assert_eq!("col", column_schema.name);
assert_eq!(
@@ -1471,7 +1491,7 @@ mod tests {
},
};
let column_schema = column_to_schema(&column, false, None).unwrap();
let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap();
assert_eq!("col", column_schema.name);
assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type);
let fulltext_options = column_schema.fulltext_options().unwrap().unwrap();

View File

@@ -20,7 +20,7 @@ use itertools::Itertools;
use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
use sqlparser_derive::{Visit, VisitMut};
use crate::ast::{ColumnDef, Ident, ObjectName, TableConstraint, Value as SqlValue};
use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
use crate::error::{FulltextInvalidOptionSnafu, Result};
use crate::statements::statement::Statement;
use crate::statements::OptionMap;
@@ -52,31 +52,34 @@ macro_rules! format_list_comma {
}
fn format_table_constraint(constraints: &[TableConstraint]) -> String {
constraints
.iter()
.map(|c| {
if is_time_index(c) {
let TableConstraint::Unique { columns, .. } = c else {
unreachable!()
};
format_indent!("{}TIME INDEX ({})", format_list_comma!(columns))
} else {
format_indent!(c)
}
})
.join(LINE_SEP)
constraints.iter().map(|c| format_indent!(c)).join(LINE_SEP)
}
/// Time index name, used in table constraints.
pub const TIME_INDEX: &str = "__time_index";
/// Table constraint for create table statement.
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
pub enum TableConstraint {
/// Primary key constraint.
PrimaryKey { columns: Vec<Ident> },
/// Time index constraint.
TimeIndex { column: Ident },
/// Inverted index constraint.
InvertedIndex { columns: Vec<Ident> },
}
#[inline]
pub fn is_time_index(constraint: &TableConstraint) -> bool {
matches!(constraint, TableConstraint::Unique {
name: Some(name),
..
} if name.value == TIME_INDEX)
impl Display for TableConstraint {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
TableConstraint::PrimaryKey { columns } => {
write!(f, "PRIMARY KEY ({})", format_list_comma!(columns))
}
TableConstraint::TimeIndex { column } => {
write!(f, "TIME INDEX ({})", column)
}
TableConstraint::InvertedIndex { columns } => {
write!(f, "INVERTED INDEX ({})", format_list_comma!(columns))
}
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]

View File

@@ -323,6 +323,36 @@ impl RegionMetadata {
})
}
/// Gets the column ids to be indexed by inverted index.
///
/// If there is no column with inverted index key, it will use primary key columns.
pub fn inverted_indexed_column_ids<'a>(
&self,
ignore_column_ids: impl Iterator<Item = &'a ColumnId>,
) -> HashSet<ColumnId> {
// Default to use primary key columns as inverted index columns.
let pk_as_inverted_index = !self
.column_metadatas
.iter()
.any(|c| c.column_schema.has_inverted_index_key());
let mut inverted_index: HashSet<_> = if pk_as_inverted_index {
self.primary_key_columns().map(|c| c.column_id).collect()
} else {
self.column_metadatas
.iter()
.filter(|column| column.column_schema.is_inverted_indexed())
.map(|column| column.column_id)
.collect()
};
for ignored in ignore_column_ids {
inverted_index.remove(ignored);
}
inverted_index
}
/// Checks whether the metadata is valid.
fn validate(&self) -> Result<()> {
// Id to name.

View File

@@ -200,3 +200,73 @@ show create table information_schema.columns;
Error: 1001(Unsupported), Show create table only for base table. greptime.information_schema.columns is TEMPORARY
CREATE TABLE "specify_invereted_index_cols" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("host", "job"),
INVERTED INDEX ("job")
);
Affected Rows: 0
show create table specify_invereted_index_cols;
+------------------------------+-------------------------------------------------------------+
| Table | Create Table |
+------------------------------+-------------------------------------------------------------+
| specify_invereted_index_cols | CREATE TABLE IF NOT EXISTS "specify_invereted_index_cols" ( |
| | "ts" TIMESTAMP(3) NOT NULL, |
| | "val" DOUBLE NULL, |
| | "host" STRING NULL, |
| | "job" STRING NULL, |
| | TIME INDEX ("ts"), |
| | PRIMARY KEY ("host", "job"), |
| | INVERTED INDEX ("job") |
| | ) |
| | |
| | ENGINE=mito |
| | |
+------------------------------+-------------------------------------------------------------+
drop table specify_invereted_index_cols;
Affected Rows: 0
CREATE TABLE "specify_empty_invereted_index_cols" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("host", "job"),
INVERTED INDEX ()
);
Affected Rows: 0
show create table specify_empty_invereted_index_cols;
+------------------------------------+-------------------------------------------------------------------+
| Table | Create Table |
+------------------------------------+-------------------------------------------------------------------+
| specify_empty_invereted_index_cols | CREATE TABLE IF NOT EXISTS "specify_empty_invereted_index_cols" ( |
| | "ts" TIMESTAMP(3) NOT NULL, |
| | "val" DOUBLE NULL, |
| | "host" STRING NULL, |
| | "job" STRING NULL, |
| | TIME INDEX ("ts"), |
| | PRIMARY KEY ("host", "job"), |
| | INVERTED INDEX () |
| | ) |
| | |
| | ENGINE=mito |
| | |
+------------------------------------+-------------------------------------------------------------------+
drop table specify_empty_invereted_index_cols;
Affected Rows: 0

View File

@@ -83,3 +83,31 @@ drop table phy;
show create table numbers;
show create table information_schema.columns;
CREATE TABLE "specify_invereted_index_cols" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("host", "job"),
INVERTED INDEX ("job")
);
show create table specify_invereted_index_cols;
drop table specify_invereted_index_cols;
CREATE TABLE "specify_empty_invereted_index_cols" (
"ts" TIMESTAMP(3) NOT NULL,
"val" DOUBLE NULL,
"host" STRING NULL,
"job" STRING NULL,
TIME INDEX ("ts"),
PRIMARY KEY ("host", "job"),
INVERTED INDEX ()
);
show create table specify_empty_invereted_index_cols;
drop table specify_empty_invereted_index_cols;