From f797de34979a5bd442d0fe6a699623e9b100a210 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Wed, 2 Apr 2025 17:15:54 +0800 Subject: [PATCH] feat: add backend field to fulltext options (#5806) * feat: add backend field to fulltext options Signed-off-by: Zhenchi * update proto Signed-off-by: Zhenchi * fix option conv Signed-off-by: Zhenchi * fix display Signed-off-by: Zhenchi * polish Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- Cargo.lock | 12 +++--- Cargo.toml | 3 +- src/api/src/v1/column_def.rs | 27 ++++++++---- src/common/grpc-expr/src/alter.rs | 14 +++++-- src/datatypes/src/schema.rs | 5 ++- src/datatypes/src/schema/column_schema.rs | 36 ++++++++++++++++ .../src/fulltext_index/create/bloom_filter.rs | 17 +++++++- src/index/src/fulltext_index/error.rs | 18 ++++++++ src/mito2/src/engine/alter_test.rs | 4 +- src/mito2/src/sst/index.rs | 3 ++ .../src/sst/index/fulltext_index/creator.rs | 41 +++++++++++++++---- src/operator/src/expr_helper.rs | 17 +++++--- src/sql/src/parsers/alter_parser.rs | 7 ++-- src/sql/src/parsers/utils.rs | 6 ++- src/sql/src/statements/alter.rs | 6 +-- src/store-api/src/metadata.rs | 5 ++- src/store-api/src/region_request.rs | 16 +++++--- src/table/src/metadata.rs | 7 +++- 18 files changed, 192 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b36093fb60..54be2d472e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1053,7 +1053,7 @@ dependencies = [ "bitflags 2.9.0", "cexpr", "clang-sys", - "itertools 0.13.0", + "itertools 0.11.0", "proc-macro2", "quote", "regex", @@ -4689,7 +4689,7 @@ dependencies = [ [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=103948cbce833e1a17ee7083f5ba79564d08d6ec#103948cbce833e1a17ee7083f5ba79564d08d6ec" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?branch=zhongzc%2Falter-fulltext-backend#b794184a4ce71e7fb7e1dfe17821c5a472a79588" dependencies = [ "prost 0.13.3", "serde", @@ -6252,7 +6252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -8885,7 +8885,7 @@ checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.13.0", + "itertools 0.11.0", "log", "multimap", "once_cell", @@ -8931,7 +8931,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" dependencies = [ "anyhow", - "itertools 0.13.0", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.96", @@ -13554,7 +13554,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 62454b5987..6cce8055ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,7 +130,8 @@ etcd-client = "0.14" fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", branch = "zhongzc/alter-fulltext-backend" } +# greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "103948cbce833e1a17ee7083f5ba79564d08d6ec" } hex = "0.4" http = "1" humantime = "2.1" diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index a119bbb3e1..619d6d6e64 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -15,10 +15,13 @@ use std::collections::HashMap; use datatypes::schema::{ - ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, SkippingIndexOptions, - SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, + ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, + SkippingIndexOptions, SkippingIndexType, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + SKIPPING_INDEX_KEY, +}; +use greptime_proto::v1::{ + Analyzer, FulltextBackend as PbFulltextBackend, SkippingIndexType as PbSkippingIndexType, }; -use greptime_proto::v1::{Analyzer, SkippingIndexType as PbSkippingIndexType}; use snafu::ResultExt; use crate::error::{self, Result}; @@ -142,13 +145,21 @@ pub fn options_from_inverted() -> ColumnOptions { } /// Tries to construct a `FulltextAnalyzer` from the given analyzer. -pub fn as_fulltext_option(analyzer: Analyzer) -> FulltextAnalyzer { +pub fn as_fulltext_option_analyzer(analyzer: Analyzer) -> FulltextAnalyzer { match analyzer { Analyzer::English => FulltextAnalyzer::English, Analyzer::Chinese => FulltextAnalyzer::Chinese, } } +/// Tries to construct a `FulltextBackend` from the given backend. +pub fn as_fulltext_option_backend(backend: PbFulltextBackend) -> FulltextBackend { + match backend { + PbFulltextBackend::Bloom => FulltextBackend::Bloom, + PbFulltextBackend::Tantivy => FulltextBackend::Tantivy, + } +} + /// Tries to construct a `SkippingIndexType` from the given skipping index type. pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> SkippingIndexType { match skipping_index_type { @@ -160,7 +171,7 @@ pub fn as_skipping_index_type(skipping_index_type: PbSkippingIndexType) -> Skipp mod tests { use datatypes::data_type::ConcreteDataType; - use datatypes::schema::FulltextAnalyzer; + use datatypes::schema::{FulltextAnalyzer, FulltextBackend}; use super::*; use crate::v1::ColumnDataType; @@ -219,13 +230,14 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: false, + backend: FulltextBackend::Bloom, }) .unwrap(); schema.set_inverted_index(true); let options = options_from_column_schema(&schema).unwrap(); assert_eq!( options.options.get(FULLTEXT_GRPC_KEY).unwrap(), - "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}" + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}" ); assert_eq!( options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(), @@ -239,11 +251,12 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: false, + backend: FulltextBackend::Bloom, }; let options = options_from_fulltext(&fulltext).unwrap().unwrap(); assert_eq!( options.options.get(FULLTEXT_GRPC_KEY).unwrap(), - "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}" + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}" ); } diff --git a/src/common/grpc-expr/src/alter.rs b/src/common/grpc-expr/src/alter.rs index 85f8e51d37..5afa8e3487 100644 --- a/src/common/grpc-expr/src/alter.rs +++ b/src/common/grpc-expr/src/alter.rs @@ -15,11 +15,13 @@ use api::helper::ColumnDataTypeWrapper; use api::v1::add_column_location::LocationType; use api::v1::alter_table_expr::Kind; -use api::v1::column_def::{as_fulltext_option, as_skipping_index_type}; +use api::v1::column_def::{ + as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type, +}; use api::v1::{ column_def, AddColumnLocation as Location, AlterTableExpr, Analyzer, CreateTableExpr, - DropColumns, ModifyColumnTypes, RenameTable, SemanticType, - SkippingIndexType as PbSkippingIndexType, + DropColumns, FulltextBackend as PbFulltextBackend, ModifyColumnTypes, RenameTable, + SemanticType, SkippingIndexType as PbSkippingIndexType, }; use common_query::AddColumnLocation; use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema, SkippingIndexOptions}; @@ -126,11 +128,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result< column_name: f.column_name.clone(), options: FulltextOptions { enable: f.enable, - analyzer: as_fulltext_option( + analyzer: as_fulltext_option_analyzer( Analyzer::try_from(f.analyzer) .context(InvalidSetFulltextOptionRequestSnafu)?, ), case_sensitive: f.case_sensitive, + backend: as_fulltext_option_backend( + PbFulltextBackend::try_from(f.backend) + .context(InvalidSetFulltextOptionRequestSnafu)?, + ), }, }, }, diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 19f3c6e55f..fbafdcca7b 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -28,8 +28,9 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::ConcreteDataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions, - SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, + ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata, + SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, TIME_INDEX_KEY, diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index 24f9b8ac3f..9a975c4008 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -46,6 +46,7 @@ pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index"; pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; +pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend"; /// Keys used in SKIPPING index options pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; @@ -514,6 +515,9 @@ pub struct FulltextOptions { /// Whether the fulltext index is case-sensitive. #[serde(default)] pub case_sensitive: bool, + /// The fulltext backend to use. + #[serde(default)] + pub backend: FulltextBackend, } impl fmt::Display for FulltextOptions { @@ -522,11 +526,30 @@ impl fmt::Display for FulltextOptions { if self.enable { write!(f, ", analyzer={}", self.analyzer)?; write!(f, ", case_sensitive={}", self.case_sensitive)?; + write!(f, ", backend={}", self.backend)?; } Ok(()) } } +/// The backend of the fulltext index. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[serde(rename_all = "kebab-case")] +pub enum FulltextBackend { + #[default] + Tantivy, + Bloom, // TODO(zhongzc): when bloom is ready, use it as default +} + +impl fmt::Display for FulltextBackend { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FulltextBackend::Tantivy => write!(f, "tantivy"), + FulltextBackend::Bloom => write!(f, "bloom"), + } + } +} + impl TryFrom> for FulltextOptions { type Error = Error; @@ -575,6 +598,19 @@ impl TryFrom> for FulltextOptions { } } + if let Some(backend) = options.get(COLUMN_FULLTEXT_OPT_KEY_BACKEND) { + match backend.to_ascii_lowercase().as_str() { + "bloom" => fulltext_options.backend = FulltextBackend::Bloom, + "tantivy" => fulltext_options.backend = FulltextBackend::Tantivy, + _ => { + return InvalidFulltextOptionSnafu { + msg: format!("{backend}, expected: 'bloom' | 'tantivy'"), + } + .fail(); + } + } + } + Ok(fulltext_options) } } diff --git a/src/index/src/fulltext_index/create/bloom_filter.rs b/src/index/src/fulltext_index/create/bloom_filter.rs index 1e734be9e2..970f89d65d 100644 --- a/src/index/src/fulltext_index/create/bloom_filter.rs +++ b/src/index/src/fulltext_index/create/bloom_filter.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::sync::atomic::AtomicUsize; use std::sync::Arc; @@ -26,16 +27,20 @@ use crate::external_provider::ExternalTempFileProvider; use crate::fulltext_index::create::FulltextIndexCreator; use crate::fulltext_index::error::{ AbortedSnafu, BiErrorsSnafu, BloomFilterFinishSnafu, ExternalSnafu, PuffinAddBlobSnafu, Result, + SerializeToJsonSnafu, }; use crate::fulltext_index::tokenizer::{Analyzer, ChineseTokenizer, EnglishTokenizer}; use crate::fulltext_index::Config; const PIPE_BUFFER_SIZE_FOR_SENDING_BLOB: usize = 8192; +pub const KEY_FULLTEXT_CONFIG: &str = "fulltext_config"; + /// `BloomFilterFulltextIndexCreator` is for creating a fulltext index using a bloom filter. pub struct BloomFilterFulltextIndexCreator { inner: Option, analyzer: Analyzer, + config: Config, } impl BloomFilterFulltextIndexCreator { @@ -61,6 +66,7 @@ impl BloomFilterFulltextIndexCreator { Self { inner: Some(inner), analyzer, + config, } } } @@ -89,10 +95,17 @@ impl FulltextIndexCreator for BloomFilterFulltextIndexCreator { let (tx, rx) = tokio::io::duplex(PIPE_BUFFER_SIZE_FOR_SENDING_BLOB); + let property_key = KEY_FULLTEXT_CONFIG.to_string(); + let property_value = serde_json::to_string(&self.config).context(SerializeToJsonSnafu)?; + let (index_finish, puffin_add_blob) = futures::join!( creator.finish(tx.compat_write()), - // TODO(zhongzc): add fulltext config properties - puffin_writer.put_blob(blob_key, rx.compat(), put_options, Default::default()) + puffin_writer.put_blob( + blob_key, + rx.compat(), + put_options, + HashMap::from([(property_key, property_value)]), + ) ); match ( diff --git a/src/index/src/fulltext_index/error.rs b/src/index/src/fulltext_index/error.rs index 6cf7f74943..710905c818 100644 --- a/src/index/src/fulltext_index/error.rs +++ b/src/index/src/fulltext_index/error.rs @@ -104,6 +104,22 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to serialize to json"))] + SerializeToJson { + #[snafu(source)] + error: serde_json::error::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to deserialize from json"))] + DeserializeFromJson { + #[snafu(source)] + error: serde_json::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -122,6 +138,8 @@ impl ErrorExt for Error { PuffinAddBlob { source, .. } => source.status_code(), External { source, .. } => source.status_code(), + + SerializeToJson { .. } | DeserializeFromJson { .. } => StatusCode::Internal, } } diff --git a/src/mito2/src/engine/alter_test.rs b/src/mito2/src/engine/alter_test.rs index cb6d690a0b..1f0de1734a 100644 --- a/src/mito2/src/engine/alter_test.rs +++ b/src/mito2/src/engine/alter_test.rs @@ -22,7 +22,7 @@ use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_recordbatch::RecordBatches; use datatypes::prelude::ConcreteDataType; -use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextOptions}; +use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions}; use store_api::metadata::ColumnMetadata; use store_api::region_engine::{RegionEngine, RegionRole}; use store_api::region_request::{ @@ -90,6 +90,7 @@ fn alter_column_fulltext_options() -> RegionAlterRequest { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: false, + backend: FulltextBackend::Bloom, }, }, }, @@ -557,6 +558,7 @@ async fn test_alter_column_fulltext_options() { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: false, + backend: FulltextBackend::Bloom, }; let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| { let current_fulltext_options = engine diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index 77a7345317..3abeee26ff 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -46,6 +46,8 @@ pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index"; pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index"; pub(crate) const TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index"; +const DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY: usize = 8096; + /// Output of the index creation. #[derive(Debug, Clone, Default)] pub struct IndexOutput { @@ -292,6 +294,7 @@ impl IndexerBuilderImpl { &self.intermediate_manager, &self.metadata, self.fulltext_index_config.compress, + DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY, mem_limit, ) .await; diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs index 1a88c1eafa..bd35770eef 100644 --- a/src/mito2/src/sst/index/fulltext_index/creator.rs +++ b/src/mito2/src/sst/index/fulltext_index/creator.rs @@ -13,9 +13,11 @@ // limitations under the License. use std::collections::HashMap; +use std::sync::atomic::AtomicUsize; +use std::sync::Arc; use common_telemetry::warn; -use datatypes::schema::FulltextAnalyzer; +use datatypes::schema::{FulltextAnalyzer, FulltextBackend}; use index::fulltext_index::create::{ BloomFilterFulltextIndexCreator, FulltextIndexCreator, TantivyFulltextIndexCreator, }; @@ -33,7 +35,9 @@ use crate::error::{ use crate::read::Batch; use crate::sst::file::FileId; use crate::sst::index::fulltext_index::{INDEX_BLOB_TYPE_BLOOM, INDEX_BLOB_TYPE_TANTIVY}; -use crate::sst::index::intermediate::IntermediateManager; +use crate::sst::index::intermediate::{ + IntermediateLocation, IntermediateManager, TempFileProvider, +}; use crate::sst::index::puffin_manager::SstPuffinWriter; use crate::sst::index::statistics::{ByteCount, RowCount, Statistics}; use crate::sst::index::TYPE_FULLTEXT_INDEX; @@ -56,6 +60,7 @@ impl FulltextIndexer { intermediate_manager: &IntermediateManager, metadata: &RegionMetadataRef, compress: bool, + bloom_row_granularity: usize, mem_limit: usize, ) -> Result> { let mut creators = HashMap::new(); @@ -86,11 +91,29 @@ impl FulltextIndexer { case_sensitive: options.case_sensitive, }; - // TODO(zhongzc): according to fulltext options, choose in the Tantivy flavor or Bloom Filter flavor. - let creator = TantivyFulltextIndexCreator::new(&intm_path, config, mem_limit) - .await - .context(CreateFulltextCreatorSnafu)?; - let inner = AltFulltextCreator::Tantivy(creator); + let inner = match options.backend { + FulltextBackend::Tantivy => { + let creator = TantivyFulltextIndexCreator::new(&intm_path, config, mem_limit) + .await + .context(CreateFulltextCreatorSnafu)?; + AltFulltextCreator::Tantivy(creator) + } + FulltextBackend::Bloom => { + let temp_file_provider = Arc::new(TempFileProvider::new( + IntermediateLocation::new(&metadata.region_id, sst_file_id), + intermediate_manager.clone(), + )); + let global_memory_usage = Arc::new(AtomicUsize::new(0)); + let creator = BloomFilterFulltextIndexCreator::new( + config, + bloom_row_granularity, + temp_file_provider, + global_memory_usage, + Some(mem_limit), + ); + AltFulltextCreator::Bloom(creator) + } + }; creators.insert( column_id, @@ -377,6 +400,7 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: true, + backend: FulltextBackend::Tantivy, }) .unwrap(), semantic_type: SemanticType::Field, @@ -392,6 +416,7 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::English, case_sensitive: false, + backend: FulltextBackend::Tantivy, }) .unwrap(), semantic_type: SemanticType::Field, @@ -407,6 +432,7 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::Chinese, case_sensitive: false, + backend: FulltextBackend::Tantivy, }) .unwrap(), semantic_type: SemanticType::Field, @@ -504,6 +530,7 @@ mod tests { &intm_mgr, ®ion_metadata, true, + 8096, 1024, ) .await diff --git a/src/operator/src/expr_helper.rs b/src/operator/src/expr_helper.rs index 3c90f4d275..7910405f77 100644 --- a/src/operator/src/expr_helper.rs +++ b/src/operator/src/expr_helper.rs @@ -21,16 +21,19 @@ use api::v1::column_def::options_from_column_schema; use api::v1::{ set_index, unset_index, AddColumn, AddColumns, AlterDatabaseExpr, AlterTableExpr, Analyzer, ColumnDataType, ColumnDataTypeExtension, CreateFlowExpr, CreateTableExpr, CreateViewExpr, - DropColumn, DropColumns, ExpireAfter, ModifyColumnType, ModifyColumnTypes, RenameTable, - SemanticType, SetDatabaseOptions, SetFulltext, SetIndex, SetInverted, SetSkipping, - SetTableOptions, SkippingIndexType as PbSkippingIndexType, TableName, UnsetDatabaseOptions, - UnsetFulltext, UnsetIndex, UnsetInverted, UnsetSkipping, UnsetTableOptions, + DropColumn, DropColumns, ExpireAfter, FulltextBackend as PbFulltextBackend, ModifyColumnType, + ModifyColumnTypes, RenameTable, SemanticType, SetDatabaseOptions, SetFulltext, SetIndex, + SetInverted, SetSkipping, SetTableOptions, SkippingIndexType as PbSkippingIndexType, TableName, + UnsetDatabaseOptions, UnsetFulltext, UnsetIndex, UnsetInverted, UnsetSkipping, + UnsetTableOptions, }; use common_error::ext::BoxedError; use common_grpc_expr::util::ColumnExpr; use common_time::Timezone; use datafusion::sql::planner::object_name_to_table_reference; -use datatypes::schema::{ColumnSchema, FulltextAnalyzer, Schema, SkippingIndexType, COMMENT_KEY}; +use datatypes::schema::{ + ColumnSchema, FulltextAnalyzer, FulltextBackend, Schema, SkippingIndexType, COMMENT_KEY, +}; use file_engine::FileOptions; use query::sql::{ check_file_to_table_schema_compatibility, file_column_schemas_to_table, @@ -581,6 +584,10 @@ pub(crate) fn to_alter_table_expr( FulltextAnalyzer::Chinese => Analyzer::Chinese.into(), }, case_sensitive: options.case_sensitive, + backend: match options.backend { + FulltextBackend::Bloom => PbFulltextBackend::Bloom.into(), + FulltextBackend::Tantivy => PbFulltextBackend::Tantivy.into(), + }, })), }, sql::statements::alter::SetIndexOperation::Inverted { column_name } => SetIndex { diff --git a/src/sql/src/parsers/alter_parser.rs b/src/sql/src/parsers/alter_parser.rs index 0411292dae..1569ede6ed 100644 --- a/src/sql/src/parsers/alter_parser.rs +++ b/src/sql/src/parsers/alter_parser.rs @@ -444,7 +444,7 @@ mod tests { use std::assert_matches::assert_matches; use common_error::ext::ErrorExt; - use datatypes::schema::{FulltextAnalyzer, FulltextOptions}; + use datatypes::schema::{FulltextAnalyzer, FulltextBackend, FulltextOptions}; use sqlparser::ast::{ColumnDef, ColumnOption, ColumnOptionDef, DataType}; use super::*; @@ -958,7 +958,7 @@ mod tests { #[test] fn test_parse_alter_column_fulltext() { - let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false')"; + let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom')"; let mut result = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) .unwrap(); @@ -984,7 +984,8 @@ mod tests { FulltextOptions { enable: true, analyzer: FulltextAnalyzer::English, - case_sensitive: false + case_sensitive: false, + backend: FulltextBackend::Bloom, }, *options ); diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index ef6a3283eb..c9c0e6fa0a 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -28,8 +28,9 @@ use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; use datatypes::schema::{ - COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, - COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, }; use snafu::ResultExt; @@ -124,6 +125,7 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { [ COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + COLUMN_FULLTEXT_OPT_KEY_BACKEND, ] .contains(&key) } diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index 4f271efc40..5aec1cfb47 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -181,7 +181,7 @@ impl Display for AlterTableOperation { column_name, options, } => { - write!(f, "MODIFY COLUMN {column_name} SET FULLTEXT INDEX WITH(analyzer={0}, case_sensitive={1})", options.analyzer, options.case_sensitive) + write!(f, "MODIFY COLUMN {column_name} SET FULLTEXT INDEX WITH(analyzer={0}, case_sensitive={1}, backend={2})", options.analyzer, options.case_sensitive, options.backend) } SetIndexOperation::Inverted { column_name } => { write!(f, "MODIFY COLUMN {column_name} SET INVERTED INDEX") @@ -425,7 +425,7 @@ ALTER TABLE monitor RENAME monitor_new"#, } } - let sql = "ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false')"; + let sql = "ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom')"; let stmts = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) .unwrap(); @@ -437,7 +437,7 @@ ALTER TABLE monitor RENAME monitor_new"#, let new_sql = format!("\n{}", set); assert_eq!( r#" -ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer=English, case_sensitive=false)"#, +ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer=English, case_sensitive=false, backend=bloom)"#, &new_sql ); } diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 41876d0705..de2a31feab 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -1030,7 +1030,7 @@ fn unset_column_fulltext_options( #[cfg(test)] mod test { use datatypes::prelude::ConcreteDataType; - use datatypes::schema::ColumnSchema; + use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend}; use super::*; @@ -1455,8 +1455,9 @@ mod test { column_name: "b".to_string(), options: FulltextOptions { enable: true, - analyzer: datatypes::schema::FulltextAnalyzer::Chinese, + analyzer: FulltextAnalyzer::Chinese, case_sensitive: true, + backend: FulltextBackend::Bloom, }, }, }) diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index b7f81c28c0..1df363da0d 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -17,15 +17,17 @@ use std::fmt::{self, Display}; use api::helper::ColumnDataTypeWrapper; use api::v1::add_column_location::LocationType; -use api::v1::column_def::{as_fulltext_option, as_skipping_index_type}; +use api::v1::column_def::{ + as_fulltext_option_analyzer, as_fulltext_option_backend, as_skipping_index_type, +}; use api::v1::region::{ alter_request, compact_request, region_request, AlterRequest, AlterRequests, CloseRequest, CompactRequest, CreateRequest, CreateRequests, DeleteRequests, DropRequest, DropRequests, FlushRequest, InsertRequests, OpenRequest, TruncateRequest, }; use api::v1::{ - self, set_index, Analyzer, Option as PbOption, Rows, SemanticType, - SkippingIndexType as PbSkippingIndexType, WriteHint, + self, set_index, Analyzer, FulltextBackend as PbFulltextBackend, Option as PbOption, Rows, + SemanticType, SkippingIndexType as PbSkippingIndexType, WriteHint, }; pub use common_base::AffectedRows; use common_time::TimeToLive; @@ -729,10 +731,13 @@ impl TryFrom for AlterKind { column_name: x.column_name.clone(), options: FulltextOptions { enable: x.enable, - analyzer: as_fulltext_option( + analyzer: as_fulltext_option_analyzer( Analyzer::try_from(x.analyzer).context(DecodeProtoSnafu)?, ), case_sensitive: x.case_sensitive, + backend: as_fulltext_option_backend( + PbFulltextBackend::try_from(x.backend).context(DecodeProtoSnafu)?, + ), }, }, }, @@ -1149,7 +1154,7 @@ mod tests { use api::v1::region::RegionColumnDef; use api::v1::{ColumnDataType, ColumnDef}; use datatypes::prelude::ConcreteDataType; - use datatypes::schema::{ColumnSchema, FulltextAnalyzer}; + use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextBackend}; use super::*; use crate::metadata::RegionMetadataBuilder; @@ -1631,6 +1636,7 @@ mod tests { enable: true, analyzer: FulltextAnalyzer::Chinese, case_sensitive: false, + backend: FulltextBackend::Bloom, }, }, }; diff --git a/src/table/src/metadata.rs b/src/table/src/metadata.rs index 4725652f68..a457afe107 100644 --- a/src/table/src/metadata.rs +++ b/src/table/src/metadata.rs @@ -1233,7 +1233,9 @@ mod tests { use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use datatypes::data_type::ConcreteDataType; - use datatypes::schema::{ColumnSchema, Schema, SchemaBuilder}; + use datatypes::schema::{ + ColumnSchema, FulltextAnalyzer, FulltextBackend, Schema, SchemaBuilder, + }; use super::*; @@ -1806,8 +1808,9 @@ mod tests { column_name: "my_tag_first".to_string(), options: FulltextOptions { enable: true, - analyzer: datatypes::schema::FulltextAnalyzer::Chinese, + analyzer: FulltextAnalyzer::Chinese, case_sensitive: true, + backend: FulltextBackend::Bloom, }, }, };