mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-30 20:00:36 +00:00
feat(index): support SQL to specify inverted index columns (#4929)
* feat(index): support building inverted index for the field column Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * feat(index): support SQL to specify inverted index columns Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * test: fix sqlness Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: consider compatibility Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * polish Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * compatibility Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: ignore case Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * refactor: reduce dup Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * fix: clippy Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -28,4 +28,3 @@ pub use parsers::create_parser::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, ENGINE, MAXVALUE,
|
||||
};
|
||||
pub use parsers::tql_parser::TQL;
|
||||
pub use statements::create::TIME_INDEX;
|
||||
|
||||
@@ -20,7 +20,7 @@ use datatypes::arrow::datatypes::{DataType as ArrowDataType, IntervalUnit};
|
||||
use datatypes::data_type::ConcreteDataType;
|
||||
use itertools::Itertools;
|
||||
use snafu::{ensure, OptionExt, ResultExt};
|
||||
use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr, KeyOrIndexDisplay};
|
||||
use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr};
|
||||
use sqlparser::dialect::keywords::Keyword;
|
||||
use sqlparser::keywords::ALL_KEYWORDS;
|
||||
use sqlparser::parser::IsOptional::Mandatory;
|
||||
@@ -29,7 +29,7 @@ use sqlparser::tokenizer::{Token, TokenWithLocation, Word};
|
||||
use table::requests::validate_table_option;
|
||||
|
||||
use super::utils;
|
||||
use crate::ast::{ColumnDef, Ident, TableConstraint};
|
||||
use crate::ast::{ColumnDef, Ident};
|
||||
use crate::error::{
|
||||
self, InvalidColumnOptionSnafu, InvalidDatabaseOptionSnafu, InvalidIntervalSnafu,
|
||||
InvalidSqlSnafu, InvalidTableOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result,
|
||||
@@ -38,7 +38,7 @@ use crate::error::{
|
||||
use crate::parser::{ParserContext, FLOW};
|
||||
use crate::statements::create::{
|
||||
Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable,
|
||||
CreateTableLike, CreateView, Partitions, TIME_INDEX,
|
||||
CreateTableLike, CreateView, Partitions, TableConstraint,
|
||||
};
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::{
|
||||
@@ -51,6 +51,7 @@ pub const MAXVALUE: &str = "MAXVALUE";
|
||||
pub const SINK: &str = "SINK";
|
||||
pub const EXPIRE: &str = "EXPIRE";
|
||||
pub const AFTER: &str = "AFTER";
|
||||
pub const INVERTED: &str = "INVERTED";
|
||||
|
||||
const DB_OPT_KEY_TTL: &str = "ttl";
|
||||
|
||||
@@ -500,20 +501,11 @@ impl<'a> ParserContext<'a> {
|
||||
);
|
||||
time_index_opt_idx = Some(index);
|
||||
|
||||
let constraint = TableConstraint::Unique {
|
||||
name: Some(Ident {
|
||||
value: TIME_INDEX.to_owned(),
|
||||
quote_style: None,
|
||||
}),
|
||||
columns: vec![Ident {
|
||||
let constraint = TableConstraint::TimeIndex {
|
||||
column: Ident {
|
||||
value: column.name().value.clone(),
|
||||
quote_style: None,
|
||||
}],
|
||||
characteristics: None,
|
||||
index_name: None,
|
||||
index_type_display: KeyOrIndexDisplay::None,
|
||||
index_type: None,
|
||||
index_options: vec![],
|
||||
},
|
||||
};
|
||||
constraints.push(constraint);
|
||||
}
|
||||
@@ -730,12 +722,6 @@ impl<'a> ParserContext<'a> {
|
||||
}
|
||||
|
||||
fn parse_optional_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
|
||||
let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) {
|
||||
let raw_name = self.parse_identifier().context(SyntaxSnafu)?;
|
||||
Some(Self::canonicalize_identifier(raw_name))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
match self.parser.next_token() {
|
||||
TokenWithLocation {
|
||||
token: Token::Word(w),
|
||||
@@ -755,14 +741,7 @@ impl<'a> ParserContext<'a> {
|
||||
.into_iter()
|
||||
.map(Self::canonicalize_identifier)
|
||||
.collect();
|
||||
Ok(Some(TableConstraint::PrimaryKey {
|
||||
name,
|
||||
index_name: None,
|
||||
index_type: None,
|
||||
columns,
|
||||
index_options: vec![],
|
||||
characteristics: None,
|
||||
}))
|
||||
Ok(Some(TableConstraint::PrimaryKey { columns }))
|
||||
}
|
||||
TokenWithLocation {
|
||||
token: Token::Word(w),
|
||||
@@ -779,7 +758,7 @@ impl<'a> ParserContext<'a> {
|
||||
.parser
|
||||
.parse_parenthesized_column_list(Mandatory, false)
|
||||
.context(error::SyntaxSnafu)?;
|
||||
let columns = raw_columns
|
||||
let mut columns = raw_columns
|
||||
.into_iter()
|
||||
.map(Self::canonicalize_identifier)
|
||||
.collect::<Vec<_>>();
|
||||
@@ -791,28 +770,35 @@ impl<'a> ParserContext<'a> {
|
||||
}
|
||||
);
|
||||
|
||||
// TODO(dennis): TableConstraint doesn't support dialect right now,
|
||||
// so we use unique constraint with special key to represent TIME INDEX.
|
||||
Ok(Some(TableConstraint::Unique {
|
||||
name: Some(Ident {
|
||||
value: TIME_INDEX.to_owned(),
|
||||
quote_style: None,
|
||||
}),
|
||||
columns,
|
||||
characteristics: None,
|
||||
index_name: None,
|
||||
index_type_display: KeyOrIndexDisplay::None,
|
||||
index_type: None,
|
||||
index_options: vec![],
|
||||
Ok(Some(TableConstraint::TimeIndex {
|
||||
column: columns.pop().unwrap(),
|
||||
}))
|
||||
}
|
||||
unexpected => {
|
||||
if name.is_some() {
|
||||
self.expected("PRIMARY, TIME", unexpected)
|
||||
} else {
|
||||
self.parser.prev_token();
|
||||
Ok(None)
|
||||
}
|
||||
TokenWithLocation {
|
||||
token: Token::Word(w),
|
||||
..
|
||||
} if w.value == INVERTED => {
|
||||
self.parser
|
||||
.expect_keyword(Keyword::INDEX)
|
||||
.context(error::UnexpectedSnafu {
|
||||
expected: "INDEX",
|
||||
actual: self.peek_token_as_string(),
|
||||
})?;
|
||||
|
||||
let raw_columns = self
|
||||
.parser
|
||||
// allow empty list to unset inverted index
|
||||
.parse_parenthesized_column_list(Mandatory, true)
|
||||
.context(error::SyntaxSnafu)?;
|
||||
let columns = raw_columns
|
||||
.into_iter()
|
||||
.map(Self::canonicalize_identifier)
|
||||
.collect::<Vec<_>>();
|
||||
Ok(Some(TableConstraint::InvertedIndex { columns }))
|
||||
}
|
||||
_ => {
|
||||
self.parser.prev_token();
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -842,21 +828,9 @@ impl<'a> ParserContext<'a> {
|
||||
fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> Result<()> {
|
||||
let time_index_constraints: Vec<_> = constraints
|
||||
.iter()
|
||||
.filter_map(|c| {
|
||||
if let TableConstraint::Unique {
|
||||
name: Some(ident),
|
||||
columns,
|
||||
..
|
||||
} = c
|
||||
{
|
||||
if ident.value == TIME_INDEX {
|
||||
Some(columns)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
.filter_map(|c| match c {
|
||||
TableConstraint::TimeIndex { column } => Some(column),
|
||||
_ => None,
|
||||
})
|
||||
.unique()
|
||||
.collect();
|
||||
@@ -871,16 +845,10 @@ fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> R
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
time_index_constraints[0].len() == 1,
|
||||
InvalidTimeIndexSnafu {
|
||||
msg: "it should contain only one column in time index",
|
||||
}
|
||||
);
|
||||
|
||||
// It's safe to use time_index_constraints[0][0],
|
||||
// we already check the bound above.
|
||||
let time_index_column_ident = &time_index_constraints[0][0];
|
||||
let time_index_column_ident = &time_index_constraints[0];
|
||||
let time_index_column = columns
|
||||
.iter()
|
||||
.find(|c| c.name().value == *time_index_column_ident.value)
|
||||
@@ -1120,7 +1088,8 @@ mod tests {
|
||||
cpu float32 default 0,
|
||||
memory float64,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(ts, host)
|
||||
PRIMARY KEY(ts, host),
|
||||
INVERTED INDEX(host)
|
||||
) with(location='/var/data/city.csv',format='csv');";
|
||||
|
||||
let options = HashMap::from([
|
||||
@@ -1144,11 +1113,24 @@ mod tests {
|
||||
assert_column_def(&columns[3].column_def, "memory", "FLOAT64");
|
||||
|
||||
let constraints = &c.constraints;
|
||||
assert!(matches!(&constraints[0], TableConstraint::Unique {
|
||||
name: Some(name),
|
||||
..
|
||||
} if name.value == TIME_INDEX));
|
||||
assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. });
|
||||
assert_eq!(
|
||||
&constraints[0],
|
||||
&TableConstraint::TimeIndex {
|
||||
column: Ident::new("ts"),
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
&constraints[1],
|
||||
&TableConstraint::PrimaryKey {
|
||||
columns: vec![Ident::new("ts"), Ident::new("host")]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
&constraints[2],
|
||||
&TableConstraint::InvertedIndex {
|
||||
columns: vec![Ident::new("host")]
|
||||
}
|
||||
);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
@@ -1478,10 +1460,8 @@ ENGINE=mito";
|
||||
assert_eq!(c.constraints.len(), 2);
|
||||
let tc = c.constraints[0].clone();
|
||||
match tc {
|
||||
TableConstraint::Unique { name, columns, .. } => {
|
||||
assert_eq!(name.unwrap().to_string(), "__time_index");
|
||||
assert_eq!(columns.len(), 1);
|
||||
assert_eq!(&columns[0].value, "ts");
|
||||
TableConstraint::TimeIndex { column } => {
|
||||
assert_eq!(&column.value, "ts");
|
||||
}
|
||||
_ => panic!("should be time index constraint"),
|
||||
};
|
||||
@@ -1679,10 +1659,8 @@ ENGINE=mito";
|
||||
if let Statement::CreateTable(c) = &result[0] {
|
||||
let tc = c.constraints[0].clone();
|
||||
match tc {
|
||||
TableConstraint::Unique { name, columns, .. } => {
|
||||
assert_eq!(name.unwrap().to_string(), "__time_index");
|
||||
assert_eq!(columns.len(), 1);
|
||||
assert_eq!(&columns[0].value, "ts");
|
||||
TableConstraint::TimeIndex { column } => {
|
||||
assert_eq!(&column.value, "ts");
|
||||
}
|
||||
_ => panic!("should be time index constraint"),
|
||||
}
|
||||
@@ -1769,7 +1747,9 @@ ENGINE=mito";
|
||||
cpu float32 default 0,
|
||||
memory float64,
|
||||
TIME INDEX (ts),
|
||||
PRIMARY KEY(ts, host)) engine=mito
|
||||
PRIMARY KEY(ts, host),
|
||||
INVERTED INDEX(host)
|
||||
) engine=mito
|
||||
with(ttl='10s');
|
||||
";
|
||||
let result =
|
||||
@@ -1789,11 +1769,24 @@ ENGINE=mito";
|
||||
assert_column_def(&columns[3].column_def, "memory", "FLOAT64");
|
||||
|
||||
let constraints = &c.constraints;
|
||||
assert!(matches!(&constraints[0], TableConstraint::Unique {
|
||||
name: Some(name),
|
||||
..
|
||||
} if name.value == TIME_INDEX));
|
||||
assert_matches!(&constraints[1], TableConstraint::PrimaryKey { .. });
|
||||
assert_eq!(
|
||||
&constraints[0],
|
||||
&TableConstraint::TimeIndex {
|
||||
column: Ident::new("ts"),
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
&constraints[1],
|
||||
&TableConstraint::PrimaryKey {
|
||||
columns: vec![Ident::new("ts"), Ident::new("host")]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
&constraints[2],
|
||||
&TableConstraint::InvertedIndex {
|
||||
columns: vec![Ident::new("host")]
|
||||
}
|
||||
);
|
||||
assert_eq!(1, c.options.len());
|
||||
assert_eq!(
|
||||
[("ttl", "10s")].into_iter().collect::<HashMap<_, _>>(),
|
||||
@@ -1851,6 +1844,33 @@ ENGINE=mito";
|
||||
assert_matches!(result, Err(crate::error::Error::InvalidTimeIndex { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inverted_index_empty_list() {
|
||||
let sql = r"create table demo(
|
||||
host string,
|
||||
ts timestamp time index,
|
||||
cpu float64 default 0,
|
||||
memory float64,
|
||||
TIME INDEX (ts),
|
||||
INVERTED INDEX()
|
||||
) engine=mito;
|
||||
";
|
||||
let result =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap();
|
||||
|
||||
if let Statement::CreateTable(c) = &result[0] {
|
||||
let tc = &c
|
||||
.constraints
|
||||
.iter()
|
||||
.find(|c| matches!(c, TableConstraint::InvertedIndex { .. }))
|
||||
.unwrap();
|
||||
assert_eq!(*tc, &TableConstraint::InvertedIndex { columns: vec![] });
|
||||
} else {
|
||||
unreachable!("should be create table statement");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_column_name() {
|
||||
let sql = "create table foo(user string, i timestamp time index)";
|
||||
|
||||
@@ -453,9 +453,13 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool {
|
||||
/// Create a `ColumnSchema` from `Column`.
|
||||
pub fn column_to_schema(
|
||||
column: &Column,
|
||||
is_time_index: bool,
|
||||
time_index: &str,
|
||||
invereted_index_cols: &Option<Vec<String>>,
|
||||
primary_keys: &[String],
|
||||
timezone: Option<&Timezone>,
|
||||
) -> Result<ColumnSchema> {
|
||||
let is_time_index = column.name().value == time_index;
|
||||
|
||||
let is_nullable = column
|
||||
.options()
|
||||
.iter()
|
||||
@@ -474,6 +478,20 @@ pub fn column_to_schema(
|
||||
column: &column.name().value,
|
||||
})?;
|
||||
|
||||
// To keep compatibility,
|
||||
// 1. if inverted index columns is not set, leave it empty meaning primary key columns will be used
|
||||
// 2. if inverted index columns is set and non-empty, set selected columns to be inverted indexed
|
||||
// 3. if inverted index columns is set and empty, set primary key columns to be non-inverted indexed explicitly
|
||||
if let Some(inverted_index_cols) = invereted_index_cols {
|
||||
if inverted_index_cols.is_empty() {
|
||||
if primary_keys.contains(&column.name().value) {
|
||||
column_schema = column_schema.set_inverted_index(false);
|
||||
}
|
||||
} else if inverted_index_cols.contains(&column.name().value) {
|
||||
column_schema = column_schema.set_inverted_index(true);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ColumnOption::Comment(c)) = column.options().iter().find_map(|o| {
|
||||
if matches!(o.option, ColumnOption::Comment(_)) {
|
||||
Some(&o.option)
|
||||
@@ -1337,7 +1355,7 @@ mod tests {
|
||||
extensions: ColumnExtensions::default(),
|
||||
};
|
||||
|
||||
let column_schema = column_to_schema(&column_def, false, None).unwrap();
|
||||
let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap();
|
||||
|
||||
assert_eq!("col", column_schema.name);
|
||||
assert_eq!(
|
||||
@@ -1347,7 +1365,7 @@ mod tests {
|
||||
assert!(column_schema.is_nullable());
|
||||
assert!(!column_schema.is_time_index());
|
||||
|
||||
let column_schema = column_to_schema(&column_def, true, None).unwrap();
|
||||
let column_schema = column_to_schema(&column_def, "col", &None, &[], None).unwrap();
|
||||
|
||||
assert_eq!("col", column_schema.name);
|
||||
assert_eq!(
|
||||
@@ -1376,7 +1394,7 @@ mod tests {
|
||||
extensions: ColumnExtensions::default(),
|
||||
};
|
||||
|
||||
let column_schema = column_to_schema(&column_def, false, None).unwrap();
|
||||
let column_schema = column_to_schema(&column_def, "ts", &None, &[], None).unwrap();
|
||||
|
||||
assert_eq!("col2", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type);
|
||||
@@ -1410,7 +1428,9 @@ mod tests {
|
||||
|
||||
let column_schema = column_to_schema(
|
||||
&column,
|
||||
false,
|
||||
"ts",
|
||||
&None,
|
||||
&[],
|
||||
Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()),
|
||||
)
|
||||
.unwrap();
|
||||
@@ -1429,7 +1449,7 @@ mod tests {
|
||||
);
|
||||
|
||||
// without timezone
|
||||
let column_schema = column_to_schema(&column, false, None).unwrap();
|
||||
let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap();
|
||||
|
||||
assert_eq!("col", column_schema.name);
|
||||
assert_eq!(
|
||||
@@ -1471,7 +1491,7 @@ mod tests {
|
||||
},
|
||||
};
|
||||
|
||||
let column_schema = column_to_schema(&column, false, None).unwrap();
|
||||
let column_schema = column_to_schema(&column, "ts", &None, &[], None).unwrap();
|
||||
assert_eq!("col", column_schema.name);
|
||||
assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type);
|
||||
let fulltext_options = column_schema.fulltext_options().unwrap().unwrap();
|
||||
|
||||
@@ -20,7 +20,7 @@ use itertools::Itertools;
|
||||
use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query};
|
||||
use sqlparser_derive::{Visit, VisitMut};
|
||||
|
||||
use crate::ast::{ColumnDef, Ident, ObjectName, TableConstraint, Value as SqlValue};
|
||||
use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue};
|
||||
use crate::error::{FulltextInvalidOptionSnafu, Result};
|
||||
use crate::statements::statement::Statement;
|
||||
use crate::statements::OptionMap;
|
||||
@@ -52,31 +52,34 @@ macro_rules! format_list_comma {
|
||||
}
|
||||
|
||||
fn format_table_constraint(constraints: &[TableConstraint]) -> String {
|
||||
constraints
|
||||
.iter()
|
||||
.map(|c| {
|
||||
if is_time_index(c) {
|
||||
let TableConstraint::Unique { columns, .. } = c else {
|
||||
unreachable!()
|
||||
};
|
||||
|
||||
format_indent!("{}TIME INDEX ({})", format_list_comma!(columns))
|
||||
} else {
|
||||
format_indent!(c)
|
||||
}
|
||||
})
|
||||
.join(LINE_SEP)
|
||||
constraints.iter().map(|c| format_indent!(c)).join(LINE_SEP)
|
||||
}
|
||||
|
||||
/// Time index name, used in table constraints.
|
||||
pub const TIME_INDEX: &str = "__time_index";
|
||||
/// Table constraint for create table statement.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
pub enum TableConstraint {
|
||||
/// Primary key constraint.
|
||||
PrimaryKey { columns: Vec<Ident> },
|
||||
/// Time index constraint.
|
||||
TimeIndex { column: Ident },
|
||||
/// Inverted index constraint.
|
||||
InvertedIndex { columns: Vec<Ident> },
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_time_index(constraint: &TableConstraint) -> bool {
|
||||
matches!(constraint, TableConstraint::Unique {
|
||||
name: Some(name),
|
||||
..
|
||||
} if name.value == TIME_INDEX)
|
||||
impl Display for TableConstraint {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TableConstraint::PrimaryKey { columns } => {
|
||||
write!(f, "PRIMARY KEY ({})", format_list_comma!(columns))
|
||||
}
|
||||
TableConstraint::TimeIndex { column } => {
|
||||
write!(f, "TIME INDEX ({})", column)
|
||||
}
|
||||
TableConstraint::InvertedIndex { columns } => {
|
||||
write!(f, "INVERTED INDEX ({})", format_list_comma!(columns))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)]
|
||||
|
||||
Reference in New Issue
Block a user