diff --git a/Cargo.lock b/Cargo.lock index 3769e1913a..6ad48467cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5144,7 +5144,7 @@ dependencies = [ [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=82fe5c6282f623c185b86f03e898ee8952e50cf9#82fe5c6282f623c185b86f03e898ee8952e50cf9" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=96c733f8472284d3c83a4c011dc6de9cf830c353#96c733f8472284d3c83a4c011dc6de9cf830c353" dependencies = [ "prost 0.13.5", "serde", @@ -9568,7 +9568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.14.0", + "itertools 0.11.0", "log", "multimap", "once_cell", @@ -9614,7 +9614,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.100", @@ -14183,7 +14183,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 5f17e7f722..3a25536e82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -134,7 +134,7 @@ etcd-client = "0.14" fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "82fe5c6282f623c185b86f03e898ee8952e50cf9" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "96c733f8472284d3c83a4c011dc6de9cf830c353" } hex = "0.4" http = "1" humantime = "2.1" diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index 619d6d6e64..316d5342db 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -226,18 +226,20 @@ mod tests { assert!(options.is_none()); let mut schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true) - .with_fulltext_options(FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }) + .with_fulltext_options(FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 10240, + 0.01, + )) .unwrap(); schema.set_inverted_index(true); let options = options_from_column_schema(&schema).unwrap(); assert_eq!( options.options.get(FULLTEXT_GRPC_KEY).unwrap(), - "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}" + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}" ); assert_eq!( options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(), @@ -247,16 +249,18 @@ mod tests { #[test] fn test_options_with_fulltext() { - let fulltext = FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }; + let fulltext = FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 10240, + 0.01, + ); let options = options_from_fulltext(&fulltext).unwrap().unwrap(); assert_eq!( options.options.get(FULLTEXT_GRPC_KEY).unwrap(), - "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}" + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}" ); } diff --git a/src/common/grpc-expr/src/alter.rs b/src/common/grpc-expr/src/alter.rs index 5688f3c989..ee80d9551e 100644 --- a/src/common/grpc-expr/src/alter.rs +++ b/src/common/grpc-expr/src/alter.rs @@ -34,7 +34,7 @@ use table::requests::{ }; use crate::error::{ - InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu, + InvalidColumnDefSnafu, InvalidIndexOptionSnafu, InvalidSetFulltextOptionRequestSnafu, InvalidSetSkippingIndexOptionRequestSnafu, InvalidSetTableOptionRequestSnafu, InvalidUnsetTableOptionRequestSnafu, MissingAlterIndexOptionSnafu, MissingFieldSnafu, MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu, @@ -126,18 +126,21 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result< api::v1::set_index::Options::Fulltext(f) => AlterKind::SetIndex { options: SetIndexOptions::Fulltext { column_name: f.column_name.clone(), - options: FulltextOptions { - enable: f.enable, - analyzer: as_fulltext_option_analyzer( + options: FulltextOptions::new( + f.enable, + as_fulltext_option_analyzer( Analyzer::try_from(f.analyzer) .context(InvalidSetFulltextOptionRequestSnafu)?, ), - case_sensitive: f.case_sensitive, - backend: as_fulltext_option_backend( + f.case_sensitive, + as_fulltext_option_backend( PbFulltextBackend::try_from(f.backend) .context(InvalidSetFulltextOptionRequestSnafu)?, ), - }, + f.granularity as u32, + f.false_positive_rate, + ) + .context(InvalidIndexOptionSnafu)?, }, }, api::v1::set_index::Options::Inverted(i) => AlterKind::SetIndex { @@ -148,13 +151,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result< api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex { options: SetIndexOptions::Skipping { column_name: s.column_name, - options: SkippingIndexOptions { - granularity: s.granularity as u32, - index_type: as_skipping_index_type( + options: SkippingIndexOptions::new( + s.granularity as u32, + s.false_positive_rate, + as_skipping_index_type( PbSkippingIndexType::try_from(s.skipping_index_type) .context(InvalidSetSkippingIndexOptionRequestSnafu)?, ), - }, + ) + .context(InvalidIndexOptionSnafu)?, }, }, }, diff --git a/src/common/grpc-expr/src/error.rs b/src/common/grpc-expr/src/error.rs index 02ce391d97..cd3fe5af30 100644 --- a/src/common/grpc-expr/src/error.rs +++ b/src/common/grpc-expr/src/error.rs @@ -153,6 +153,14 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid index option"))] + InvalidIndexOption { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: datatypes::error::Error, + }, } pub type Result = std::result::Result; @@ -180,7 +188,8 @@ impl ErrorExt for Error { | Error::InvalidUnsetTableOptionRequest { .. } | Error::InvalidSetFulltextOptionRequest { .. } | Error::InvalidSetSkippingIndexOptionRequest { .. } - | Error::MissingAlterIndexOption { .. } => StatusCode::InvalidArguments, + | Error::MissingAlterIndexOption { .. } + | Error::InvalidIndexOption { .. } => StatusCode::InvalidArguments, } } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index fbafdcca7b..1f27eb01c2 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -31,9 +31,10 @@ pub use crate::schema::column_schema::{ ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata, SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, - COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, - SKIPPING_INDEX_KEY, TIME_INDEX_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, + COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index 376c9e6de0..cd6a05a83f 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -47,13 +47,18 @@ pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend"; +pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate"; /// Keys used in SKIPPING index options pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate"; pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; pub const DEFAULT_GRANULARITY: u32 = 10240; +pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -504,7 +509,7 @@ impl TryFrom<&ColumnSchema> for Field { } /// Fulltext options for a column. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)] #[serde(rename_all = "kebab-case")] pub struct FulltextOptions { /// Whether the fulltext index is enabled. @@ -518,6 +523,92 @@ pub struct FulltextOptions { /// The fulltext backend to use. #[serde(default)] pub backend: FulltextBackend, + /// The granularity of the fulltext index (for bloom backend only) + #[serde(default = "fulltext_options_default_granularity")] + pub granularity: u32, + /// The false positive rate of the fulltext index (for bloom backend only) + #[serde(default = "fulltext_options_default_false_positive_rate_in_10000")] + pub false_positive_rate_in_10000: u32, +} + +fn fulltext_options_default_granularity() -> u32 { + DEFAULT_GRANULARITY +} + +fn fulltext_options_default_false_positive_rate_in_10000() -> u32 { + (DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32 +} + +impl FulltextOptions { + /// Creates a new fulltext options. + pub fn new( + enable: bool, + analyzer: FulltextAnalyzer, + case_sensitive: bool, + backend: FulltextBackend, + granularity: u32, + false_positive_rate: f64, + ) -> Result { + ensure!( + 0.0 < false_positive_rate && false_positive_rate <= 1.0, + error::InvalidFulltextOptionSnafu { + msg: format!( + "Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0" + ), + } + ); + ensure!( + granularity > 0, + error::InvalidFulltextOptionSnafu { + msg: format!("Invalid granularity: {granularity}, expected: positive integer"), + } + ); + Ok(Self::new_unchecked( + enable, + analyzer, + case_sensitive, + backend, + granularity, + false_positive_rate, + )) + } + + /// Creates a new fulltext options without checking `false_positive_rate` and `granularity`. + pub fn new_unchecked( + enable: bool, + analyzer: FulltextAnalyzer, + case_sensitive: bool, + backend: FulltextBackend, + granularity: u32, + false_positive_rate: f64, + ) -> Self { + Self { + enable, + analyzer, + case_sensitive, + backend, + granularity, + false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32, + } + } + + /// Gets the false positive rate. + pub fn false_positive_rate(&self) -> f64 { + self.false_positive_rate_in_10000 as f64 / 10000.0 + } +} + +impl Default for FulltextOptions { + fn default() -> Self { + Self::new_unchecked( + false, + FulltextAnalyzer::default(), + false, + FulltextBackend::default(), + DEFAULT_GRANULARITY, + DEFAULT_FALSE_POSITIVE_RATE, + ) + } } impl fmt::Display for FulltextOptions { @@ -527,6 +618,10 @@ impl fmt::Display for FulltextOptions { write!(f, ", analyzer={}", self.analyzer)?; write!(f, ", case_sensitive={}", self.case_sensitive)?; write!(f, ", backend={}", self.backend)?; + if self.backend == FulltextBackend::Bloom { + write!(f, ", granularity={}", self.granularity)?; + write!(f, ", false_positive_rate={}", self.false_positive_rate())?; + } } Ok(()) } @@ -611,6 +706,45 @@ impl TryFrom> for FulltextOptions { } } + if fulltext_options.backend == FulltextBackend::Bloom { + // Parse granularity with default value 10240 + let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) { + Some(value) => value + .parse::() + .ok() + .filter(|&v| v > 0) + .ok_or_else(|| { + error::InvalidFulltextOptionSnafu { + msg: format!( + "Invalid granularity: {value}, expected: positive integer" + ), + } + .build() + })?, + None => DEFAULT_GRANULARITY, + }; + fulltext_options.granularity = granularity; + + // Parse false positive rate with default value 0.01 + let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE) + { + Some(value) => value + .parse::() + .ok() + .filter(|&v| v > 0.0 && v <= 1.0) + .ok_or_else(|| { + error::InvalidFulltextOptionSnafu { + msg: format!( + "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0" + ), + } + .build() + })?, + None => DEFAULT_FALSE_POSITIVE_RATE, + }; + fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32; + } + Ok(fulltext_options) } } @@ -638,23 +772,72 @@ impl fmt::Display for FulltextAnalyzer { pub struct SkippingIndexOptions { /// The granularity of the skip index. pub granularity: u32, + /// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%). + pub false_positive_rate_in_10000: u32, /// The type of the skip index. #[serde(default)] pub index_type: SkippingIndexType, } +impl SkippingIndexOptions { + /// Creates a new skipping index options without checking `false_positive_rate` and `granularity`. + pub fn new_unchecked( + granularity: u32, + false_positive_rate: f64, + index_type: SkippingIndexType, + ) -> Self { + Self { + granularity, + false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32, + index_type, + } + } + + /// Creates a new skipping index options. + pub fn new( + granularity: u32, + false_positive_rate: f64, + index_type: SkippingIndexType, + ) -> Result { + ensure!( + 0.0 < false_positive_rate && false_positive_rate <= 1.0, + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"), + } + ); + ensure!( + granularity > 0, + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {granularity}, expected: positive integer"), + } + ); + Ok(Self::new_unchecked( + granularity, + false_positive_rate, + index_type, + )) + } + + /// Gets the false positive rate. + pub fn false_positive_rate(&self) -> f64 { + self.false_positive_rate_in_10000 as f64 / 10000.0 + } +} + impl Default for SkippingIndexOptions { fn default() -> Self { - Self { - granularity: DEFAULT_GRANULARITY, - index_type: SkippingIndexType::default(), - } + Self::new_unchecked( + DEFAULT_GRANULARITY, + DEFAULT_FALSE_POSITIVE_RATE, + SkippingIndexType::default(), + ) } } impl fmt::Display for SkippingIndexOptions { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "granularity={}", self.granularity)?; + write!(f, ", false_positive_rate={}", self.false_positive_rate())?; write!(f, ", index_type={}", self.index_type)?; Ok(()) } @@ -681,15 +864,37 @@ impl TryFrom> for SkippingIndexOptions { fn try_from(options: HashMap) -> Result { // Parse granularity with default value 1 let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) { - Some(value) => value.parse::().map_err(|_| { - error::InvalidSkippingIndexOptionSnafu { - msg: format!("Invalid granularity: {value}, expected: positive integer"), - } - .build() - })?, + Some(value) => value + .parse::() + .ok() + .filter(|&v| v > 0) + .ok_or_else(|| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {value}, expected: positive integer"), + } + .build() + })?, None => DEFAULT_GRANULARITY, }; + // Parse false positive rate with default value 100 + let false_positive_rate = + match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) { + Some(value) => value + .parse::() + .ok() + .filter(|&v| v > 0.0 && v <= 1.0) + .ok_or_else(|| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!( + "Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0" + ), + } + .build() + })?, + None => DEFAULT_FALSE_POSITIVE_RATE, + }; + // Parse index type with default value BloomFilter let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) { Some(typ) => match typ.to_ascii_uppercase().as_str() { @@ -704,10 +909,11 @@ impl TryFrom> for SkippingIndexOptions { None => SkippingIndexType::default(), }; - Ok(SkippingIndexOptions { + Ok(SkippingIndexOptions::new_unchecked( granularity, + false_positive_rate, index_type, - }) + )) } } diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 6a1697a389..1f2a14d3b9 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -899,7 +899,7 @@ impl StreamingEngine { let rows_send = self.run_available(true).await?; let row = self.send_writeback_requests().await?; debug!( - "Done to flush flow_id={:?} with {} input rows flushed, {} rows sended and {} output rows flushed", + "Done to flush flow_id={:?} with {} input rows flushed, {} rows sent and {} output rows flushed", flow_id, flushed_input_rows, rows_send, row ); Ok(row) diff --git a/src/index/src/bloom_filter/applier.rs b/src/index/src/bloom_filter/applier.rs index 4e29eb538b..1bd58a08f4 100644 --- a/src/index/src/bloom_filter/applier.rs +++ b/src/index/src/bloom_filter/applier.rs @@ -218,6 +218,7 @@ mod tests { let mut writer = Cursor::new(Vec::new()); let mut creator = BloomFilterCreator::new( 4, + 0.01, Arc::new(MockExternalTempFileProvider::new()), Arc::new(AtomicUsize::new(0)), None, diff --git a/src/index/src/bloom_filter/creator.rs b/src/index/src/bloom_filter/creator.rs index 66e892e29f..77a56c6b3d 100644 --- a/src/index/src/bloom_filter/creator.rs +++ b/src/index/src/bloom_filter/creator.rs @@ -30,9 +30,6 @@ use crate::bloom_filter::SEED; use crate::external_provider::ExternalTempFileProvider; use crate::Bytes; -/// The false positive rate of the Bloom filter. -pub const FALSE_POSITIVE_RATE: f64 = 0.01; - /// `BloomFilterCreator` is responsible for creating and managing bloom filters /// for a set of elements. It divides the rows into segments and creates /// bloom filters for each segment. @@ -79,6 +76,7 @@ impl BloomFilterCreator { /// `rows_per_segment` <= 0 pub fn new( rows_per_segment: usize, + false_positive_rate: f64, intermediate_provider: Arc, global_memory_usage: Arc, global_memory_usage_threshold: Option, @@ -95,6 +93,7 @@ impl BloomFilterCreator { cur_seg_distinct_elems_mem_usage: 0, global_memory_usage: global_memory_usage.clone(), finalized_bloom_filters: FinalizedBloomFilterStorage::new( + false_positive_rate, intermediate_provider, global_memory_usage, global_memory_usage_threshold, @@ -263,6 +262,7 @@ mod tests { let mut writer = Cursor::new(Vec::new()); let mut creator = BloomFilterCreator::new( 2, + 0.01, Arc::new(MockExternalTempFileProvider::new()), Arc::new(AtomicUsize::new(0)), None, @@ -337,6 +337,7 @@ mod tests { let mut writer = Cursor::new(Vec::new()); let mut creator: BloomFilterCreator = BloomFilterCreator::new( 2, + 0.01, Arc::new(MockExternalTempFileProvider::new()), Arc::new(AtomicUsize::new(0)), None, @@ -418,6 +419,7 @@ mod tests { let mut writer = Cursor::new(Vec::new()); let mut creator = BloomFilterCreator::new( 2, + 0.01, Arc::new(MockExternalTempFileProvider::new()), Arc::new(AtomicUsize::new(0)), None, diff --git a/src/index/src/bloom_filter/creator/finalize_segment.rs b/src/index/src/bloom_filter/creator/finalize_segment.rs index 1ba848a74d..4bf72895b0 100644 --- a/src/index/src/bloom_filter/creator/finalize_segment.rs +++ b/src/index/src/bloom_filter/creator/finalize_segment.rs @@ -23,7 +23,7 @@ use futures::{stream, AsyncWriteExt, Stream}; use snafu::ResultExt; use crate::bloom_filter::creator::intermediate_codec::IntermediateBloomFilterCodecV1; -use crate::bloom_filter::creator::{FALSE_POSITIVE_RATE, SEED}; +use crate::bloom_filter::creator::SEED; use crate::bloom_filter::error::{IntermediateSnafu, IoSnafu, Result}; use crate::external_provider::ExternalTempFileProvider; use crate::Bytes; @@ -33,6 +33,9 @@ const MIN_MEMORY_USAGE_THRESHOLD: usize = 1024 * 1024; // 1MB /// Storage for finalized Bloom filters. pub struct FinalizedBloomFilterStorage { + /// The false positive rate of the Bloom filter. + false_positive_rate: f64, + /// Indices of the segments in the sequence of finalized Bloom filters. segment_indices: Vec, @@ -65,12 +68,14 @@ pub struct FinalizedBloomFilterStorage { impl FinalizedBloomFilterStorage { /// Creates a new `FinalizedBloomFilterStorage`. pub fn new( + false_positive_rate: f64, intermediate_provider: Arc, global_memory_usage: Arc, global_memory_usage_threshold: Option, ) -> Self { let external_prefix = format!("intm-bloom-filters-{}", uuid::Uuid::new_v4()); Self { + false_positive_rate, segment_indices: Vec::new(), in_memory: Vec::new(), intermediate_file_id_counter: 0, @@ -96,7 +101,7 @@ impl FinalizedBloomFilterStorage { elems: impl IntoIterator, element_count: usize, ) -> Result<()> { - let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE) + let mut bf = BloomFilter::with_false_pos(self.false_positive_rate) .seed(&SEED) .expected_items(element_count); for elem in elems.into_iter() { @@ -284,6 +289,7 @@ mod tests { let global_memory_usage_threshold = Some(1024 * 1024); // 1MB let provider = Arc::new(mock_provider); let mut storage = FinalizedBloomFilterStorage::new( + 0.01, provider, global_memory_usage.clone(), global_memory_usage_threshold, @@ -340,6 +346,7 @@ mod tests { let global_memory_usage_threshold = Some(1024 * 1024); // 1MB let provider = Arc::new(mock_provider); let mut storage = FinalizedBloomFilterStorage::new( + 0.01, provider, global_memory_usage.clone(), global_memory_usage_threshold, diff --git a/src/index/src/bloom_filter/reader.rs b/src/index/src/bloom_filter/reader.rs index 037eb6b3db..a65eef1971 100644 --- a/src/index/src/bloom_filter/reader.rs +++ b/src/index/src/bloom_filter/reader.rs @@ -222,6 +222,7 @@ mod tests { let mut writer = Cursor::new(vec![]); let mut creator = BloomFilterCreator::new( 2, + 0.01, Arc::new(MockExternalTempFileProvider::new()), Arc::new(AtomicUsize::new(0)), None, diff --git a/src/index/src/fulltext_index/create/bloom_filter.rs b/src/index/src/fulltext_index/create/bloom_filter.rs index 127464db71..4fd2593e8e 100644 --- a/src/index/src/fulltext_index/create/bloom_filter.rs +++ b/src/index/src/fulltext_index/create/bloom_filter.rs @@ -45,6 +45,7 @@ impl BloomFilterFulltextIndexCreator { pub fn new( config: Config, rows_per_segment: usize, + false_positive_rate: f64, intermediate_provider: Arc, global_memory_usage: Arc, global_memory_usage_threshold: Option, @@ -57,6 +58,7 @@ impl BloomFilterFulltextIndexCreator { let inner = BloomFilterCreator::new( rows_per_segment, + false_positive_rate, intermediate_provider, global_memory_usage, global_memory_usage_threshold, diff --git a/src/metric-engine/src/data_region.rs b/src/metric-engine/src/data_region.rs index c07cbd88ce..80aacc2848 100644 --- a/src/metric-engine/src/data_region.rs +++ b/src/metric-engine/src/data_region.rs @@ -145,12 +145,19 @@ impl DataRegion { IndexOptions::Inverted => { c.column_schema.set_inverted_index(true); } - IndexOptions::Skipping { granularity } => { + IndexOptions::Skipping { + granularity, + false_positive_rate, + } => { c.column_schema - .set_skipping_options(&SkippingIndexOptions { - granularity, - index_type: SkippingIndexType::BloomFilter, - }) + .set_skipping_options( + &SkippingIndexOptions::new( + granularity, + false_positive_rate, + SkippingIndexType::BloomFilter, + ) + .context(SetSkippingIndexOptionSnafu)?, + ) .context(SetSkippingIndexOptionSnafu)?; } } diff --git a/src/metric-engine/src/engine/create.rs b/src/metric-engine/src/engine/create.rs index 1b85527785..64c7cd7e14 100644 --- a/src/metric-engine/src/engine/create.rs +++ b/src/metric-engine/src/engine/create.rs @@ -55,6 +55,7 @@ use crate::utils::{ }; const DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY: u32 = 1024; +const DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE: f64 = 0.01; impl MetricEngineInner { pub async fn create_regions( @@ -542,10 +543,11 @@ impl MetricEngineInner { ConcreteDataType::uint32_datatype(), false, ) - .with_skipping_options(SkippingIndexOptions { - granularity: DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY, - index_type: datatypes::schema::SkippingIndexType::BloomFilter, - }) + .with_skipping_options(SkippingIndexOptions::new_unchecked( + DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY, + DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE, + datatypes::schema::SkippingIndexType::BloomFilter, + )) .unwrap(), }; let tsid_col = ColumnMetadata { diff --git a/src/metric-engine/src/engine/options.rs b/src/metric-engine/src/engine/options.rs index baeea621a5..e8ff117c2e 100644 --- a/src/metric-engine/src/engine/options.rs +++ b/src/metric-engine/src/engine/options.rs @@ -17,6 +17,8 @@ use std::collections::HashMap; use store_api::metric_engine_consts::{ + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION, + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT, METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION, METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT, METRIC_ENGINE_INDEX_TYPE_OPTION, }; @@ -31,19 +33,20 @@ use crate::error::{Error, ParseRegionOptionsSnafu, Result}; const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256; /// Physical region options. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq)] pub struct PhysicalRegionOptions { pub index: IndexOptions, } /// Index options for auto created columns -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Default, PartialEq)] pub enum IndexOptions { #[default] None, Inverted, Skipping { granularity: u32, + false_positive_rate: f64, }, } @@ -54,6 +57,7 @@ pub fn set_data_region_options( ) { options.remove(METRIC_ENGINE_INDEX_TYPE_OPTION); options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION); + options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION); options.insert( "index.inverted_index.segment_row_count".to_string(), SEG_ROW_COUNT_FOR_DATA_REGION.to_string(), @@ -93,7 +97,23 @@ impl TryFrom<&HashMap> for PhysicalRegionOptions { }) }, )?; - Ok(IndexOptions::Skipping { granularity }) + let false_positive_rate = value + .get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION) + .map_or( + Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT), + |f| { + f.parse().ok().filter(|f| *f > 0.0 && *f <= 1.0).ok_or( + ParseRegionOptionsSnafu { + reason: format!("Invalid false positive rate: {}", f), + } + .build(), + ) + }, + )?; + Ok(IndexOptions::Skipping { + granularity, + false_positive_rate, + }) } Some(index_type) => ParseRegionOptionsSnafu { reason: format!("Invalid index type: {}", index_type), @@ -121,11 +141,16 @@ mod tests { METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(), "102400".to_string(), ); + options.insert( + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(), + "0.01".to_string(), + ); set_data_region_options(&mut options, false); for key in [ METRIC_ENGINE_INDEX_TYPE_OPTION, METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION, + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION, ] { assert_eq!(options.get(key), None); } @@ -154,11 +179,16 @@ mod tests { METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(), "102400".to_string(), ); + options.insert( + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(), + "0.01".to_string(), + ); let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap(); assert_eq!( physical_region_options.index, IndexOptions::Skipping { - granularity: 102400 + granularity: 102400, + false_positive_rate: 0.01, } ); } diff --git a/src/mito2/src/engine/alter_test.rs b/src/mito2/src/engine/alter_test.rs index 0863d3a7bd..365306c77d 100644 --- a/src/mito2/src/engine/alter_test.rs +++ b/src/mito2/src/engine/alter_test.rs @@ -84,12 +84,14 @@ fn alter_column_fulltext_options() -> RegionAlterRequest { kind: AlterKind::SetIndex { options: ApiSetIndexOptions::Fulltext { column_name: "tag_0".to_string(), - options: FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }, + options: FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 1000, + 0.01, + ), }, }, } @@ -553,12 +555,14 @@ async fn test_alter_column_fulltext_options() { // Wait for the write job. alter_job.await.unwrap(); - let expect_fulltext_options = FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }; + let expect_fulltext_options = FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 1000, + 0.01, + ); let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| { let current_fulltext_options = engine .get_region(region_id) diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index 65bc02c7c1..3490107f9b 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -45,8 +45,6 @@ pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index"; pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index"; pub(crate) const TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index"; -const DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY: usize = 8096; - /// Output of the index creation. #[derive(Debug, Clone, Default)] pub struct IndexOutput { @@ -293,7 +291,6 @@ impl IndexerBuilderImpl { &self.intermediate_manager, &self.metadata, self.fulltext_index_config.compress, - DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY, mem_limit, ) .await; @@ -455,10 +452,11 @@ mod tests { if with_skipping_bloom { let column_schema = ColumnSchema::new("bloom", ConcreteDataType::string_datatype(), false) - .with_skipping_options(SkippingIndexOptions { - granularity: 42, - index_type: SkippingIndexType::BloomFilter, - }) + .with_skipping_options(SkippingIndexOptions::new_unchecked( + 42, + 0.01, + SkippingIndexType::BloomFilter, + )) .unwrap(); let column = ColumnMetadata { diff --git a/src/mito2/src/sst/index/bloom_filter/creator.rs b/src/mito2/src/sst/index/bloom_filter/creator.rs index 4c1dbd13e3..d68a68f480 100644 --- a/src/mito2/src/sst/index/bloom_filter/creator.rs +++ b/src/mito2/src/sst/index/bloom_filter/creator.rs @@ -97,6 +97,7 @@ impl BloomFilterIndexer { let creator = BloomFilterCreator::new( options.granularity as _, + options.false_positive_rate(), temp_file_provider.clone(), global_memory_usage.clone(), memory_usage_threshold, @@ -408,10 +409,11 @@ pub(crate) mod tests { ConcreteDataType::string_datatype(), false, ) - .with_skipping_options(SkippingIndexOptions { - index_type: SkippingIndexType::BloomFilter, - granularity: 2, - }) + .with_skipping_options(SkippingIndexOptions::new_unchecked( + 2, + 0.01, + SkippingIndexType::BloomFilter, + )) .unwrap(), semantic_type: SemanticType::Tag, column_id: 1, @@ -431,10 +433,11 @@ pub(crate) mod tests { ConcreteDataType::uint64_datatype(), false, ) - .with_skipping_options(SkippingIndexOptions { - index_type: SkippingIndexType::BloomFilter, - granularity: 4, - }) + .with_skipping_options(SkippingIndexOptions::new_unchecked( + 4, + 0.01, + SkippingIndexType::BloomFilter, + )) .unwrap(), semantic_type: SemanticType::Field, column_id: 3, diff --git a/src/mito2/src/sst/index/fulltext_index/applier.rs b/src/mito2/src/sst/index/fulltext_index/applier.rs index 03235f3e7e..ac14579de6 100644 --- a/src/mito2/src/sst/index/fulltext_index/applier.rs +++ b/src/mito2/src/sst/index/fulltext_index/applier.rs @@ -465,7 +465,6 @@ impl IndexSource { /// Returns the blob with the specified key from local cache or remote store. /// /// Returns `None` if the blob is not found. - #[allow(unused)] async fn blob( &self, file_id: FileId, diff --git a/src/mito2/src/sst/index/fulltext_index/creator.rs b/src/mito2/src/sst/index/fulltext_index/creator.rs index 52bc34bb3b..bede13e7c2 100644 --- a/src/mito2/src/sst/index/fulltext_index/creator.rs +++ b/src/mito2/src/sst/index/fulltext_index/creator.rs @@ -60,7 +60,6 @@ impl FulltextIndexer { intermediate_manager: &IntermediateManager, metadata: &RegionMetadataRef, compress: bool, - bloom_row_granularity: usize, mem_limit: usize, ) -> Result> { let mut creators = HashMap::new(); @@ -106,7 +105,8 @@ impl FulltextIndexer { let global_memory_usage = Arc::new(AtomicUsize::new(0)); let creator = BloomFilterFulltextIndexCreator::new( config, - bloom_row_granularity, + options.granularity as _, + options.false_positive_rate(), temp_file_provider, global_memory_usage, Some(mem_limit), @@ -400,12 +400,14 @@ mod tests { ConcreteDataType::string_datatype(), true, ) - .with_fulltext_options(FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: true, - backend: backend.clone(), - }) + .with_fulltext_options(FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + true, + backend.clone(), + 1, + 0.01, + )) .unwrap(), semantic_type: SemanticType::Field, column_id: 1, @@ -416,12 +418,14 @@ mod tests { ConcreteDataType::string_datatype(), true, ) - .with_fulltext_options(FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: backend.clone(), - }) + .with_fulltext_options(FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + backend.clone(), + 1, + 0.01, + )) .unwrap(), semantic_type: SemanticType::Field, column_id: 2, @@ -432,12 +436,14 @@ mod tests { ConcreteDataType::string_datatype(), true, ) - .with_fulltext_options(FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::Chinese, - case_sensitive: false, - backend: backend.clone(), - }) + .with_fulltext_options(FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::Chinese, + false, + backend.clone(), + 1, + 0.01, + )) .unwrap(), semantic_type: SemanticType::Field, column_id: 3, @@ -547,7 +553,6 @@ mod tests { &intm_mgr, ®ion_metadata, true, - 1, 1024, ) .await diff --git a/src/operator/src/expr_helper.rs b/src/operator/src/expr_helper.rs index 6b07e0e40f..2b2cf7a63e 100644 --- a/src/operator/src/expr_helper.rs +++ b/src/operator/src/expr_helper.rs @@ -593,6 +593,8 @@ pub(crate) fn to_alter_table_expr( FulltextBackend::Bloom => PbFulltextBackend::Bloom.into(), FulltextBackend::Tantivy => PbFulltextBackend::Tantivy.into(), }, + granularity: options.granularity as u64, + false_positive_rate: options.false_positive_rate(), })), }, sql::statements::alter::SetIndexOperation::Inverted { column_name } => SetIndex { @@ -608,6 +610,7 @@ pub(crate) fn to_alter_table_expr( column_name: column_name.value, enable: true, granularity: options.granularity as u64, + false_positive_rate: options.false_positive_rate(), skipping_index_type: match options.index_type { SkippingIndexType::BloomFilter => PbSkippingIndexType::BloomFilter.into(), }, diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index bc004f514e..f3b46f2867 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -18,8 +18,10 @@ use std::collections::HashMap; use common_meta::SchemaOptions; use datatypes::schema::{ - ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef, + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, + COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, }; use snafu::ResultExt; @@ -104,7 +106,7 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result Result Result { assert_eq!("a", column_name.value); assert_eq!( - FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::English, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }, + FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::English, + false, + FulltextBackend::Bloom, + 1000, + 0.01, + ), *options ); } diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index 6f6a54ccee..ea07992d70 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -29,8 +29,9 @@ use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; use datatypes::schema::{ COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, - COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, + COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, }; use snafu::ResultExt; @@ -126,6 +127,8 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_BACKEND, + COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, + COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE, ] .contains(&key) } @@ -134,6 +137,7 @@ pub fn validate_column_skipping_index_create_option(key: &str) -> bool { [ COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE, ] .contains(&key) } diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 62cb1b7e60..c279f92448 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -1064,6 +1064,14 @@ pub enum MetadataError { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid index option"))] + InvalidIndexOption { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: datatypes::error::Error, + }, } impl ErrorExt for MetadataError { @@ -1620,12 +1628,14 @@ mod test { .alter(AlterKind::SetIndex { options: ApiSetIndexOptions::Fulltext { column_name: "b".to_string(), - options: FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::Chinese, - case_sensitive: true, - backend: FulltextBackend::Bloom, - }, + options: FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::Chinese, + true, + FulltextBackend::Bloom, + 1000, + 0.01, + ), }, }) .unwrap(); diff --git a/src/store-api/src/metric_engine_consts.rs b/src/store-api/src/metric_engine_consts.rs index b2994b0b00..bf0f405812 100644 --- a/src/store-api/src/metric_engine_consts.rs +++ b/src/store-api/src/metric_engine_consts.rs @@ -113,13 +113,19 @@ pub const METRIC_ENGINE_INDEX_TYPE_OPTION: &str = "index.type"; /// physical_metric_table = "", /// index.type = "skipping", /// index.granularity = "102400", +/// index.false_positive_rate = "0.01", /// ); /// ``` pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION: &str = "index.granularity"; +pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION: &str = + "index.false_positive_rate"; /// Default granularity for the skipping index in the metric engine. pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT: u32 = 102400; +/// Default false positive rate for the skipping index in the metric engine. +pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT: f64 = 0.01; + /// Returns true if the `key` is a valid option key for the metric engine. pub fn is_metric_engine_option_key(key: &str) -> bool { [ @@ -127,6 +133,7 @@ pub fn is_metric_engine_option_key(key: &str) -> bool { LOGICAL_TABLE_METADATA_KEY, METRIC_ENGINE_INDEX_TYPE_OPTION, METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION, + METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION, ] .contains(&key) } diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index f9b71c4bc6..32f5d151b7 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -42,8 +42,8 @@ use strum::{AsRefStr, IntoStaticStr}; use crate::logstore::entry; use crate::metadata::{ - ColumnMetadata, DecodeProtoSnafu, FlightCodecSnafu, InvalidRawRegionRequestSnafu, - InvalidRegionRequestSnafu, InvalidSetRegionOptionRequestSnafu, + ColumnMetadata, DecodeProtoSnafu, FlightCodecSnafu, InvalidIndexOptionSnafu, + InvalidRawRegionRequestSnafu, InvalidRegionRequestSnafu, InvalidSetRegionOptionRequestSnafu, InvalidUnsetRegionOptionRequestSnafu, MetadataError, RegionMetadata, Result, UnexpectedSnafu, }; use crate::metric_engine_consts::PHYSICAL_TABLE_METADATA_KEY; @@ -760,16 +760,19 @@ impl TryFrom for AlterKind { set_index::Options::Fulltext(x) => AlterKind::SetIndex { options: ApiSetIndexOptions::Fulltext { column_name: x.column_name.clone(), - options: FulltextOptions { - enable: x.enable, - analyzer: as_fulltext_option_analyzer( + options: FulltextOptions::new( + x.enable, + as_fulltext_option_analyzer( Analyzer::try_from(x.analyzer).context(DecodeProtoSnafu)?, ), - case_sensitive: x.case_sensitive, - backend: as_fulltext_option_backend( + x.case_sensitive, + as_fulltext_option_backend( PbFulltextBackend::try_from(x.backend).context(DecodeProtoSnafu)?, ), - }, + x.granularity as u32, + x.false_positive_rate, + ) + .context(InvalidIndexOptionSnafu)?, }, }, set_index::Options::Inverted(i) => AlterKind::SetIndex { @@ -780,13 +783,15 @@ impl TryFrom for AlterKind { set_index::Options::Skipping(s) => AlterKind::SetIndex { options: ApiSetIndexOptions::Skipping { column_name: s.column_name, - options: SkippingIndexOptions { - index_type: as_skipping_index_type( + options: SkippingIndexOptions::new( + s.granularity as u32, + s.false_positive_rate, + as_skipping_index_type( PbSkippingIndexType::try_from(s.skipping_index_type) .context(DecodeProtoSnafu)?, ), - granularity: s.granularity as u32, - }, + ) + .context(InvalidIndexOptionSnafu)?, }, }, }, @@ -1644,12 +1649,14 @@ mod tests { let kind = AlterKind::SetIndex { options: ApiSetIndexOptions::Fulltext { column_name: "tag_0".to_string(), - options: FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::Chinese, - case_sensitive: false, - backend: FulltextBackend::Bloom, - }, + options: FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::Chinese, + false, + FulltextBackend::Bloom, + 1000, + 0.01, + ), }, }; let request = RegionAlterRequest { kind }; diff --git a/src/table/src/metadata.rs b/src/table/src/metadata.rs index 5f644cbc10..cb9c2674e9 100644 --- a/src/table/src/metadata.rs +++ b/src/table/src/metadata.rs @@ -1876,12 +1876,14 @@ mod tests { let alter_kind = AlterKind::SetIndex { options: SetIndexOptions::Fulltext { column_name: "my_tag_first".to_string(), - options: FulltextOptions { - enable: true, - analyzer: FulltextAnalyzer::Chinese, - case_sensitive: true, - backend: FulltextBackend::Bloom, - }, + options: FulltextOptions::new_unchecked( + true, + FulltextAnalyzer::Chinese, + true, + FulltextBackend::Bloom, + 1000, + 0.01, + ), }, }; let new_meta = new_meta diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 77df110d06..eea1065835 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1674,7 +1674,7 @@ transform: assert_eq!(res.status(), StatusCode::OK); // 3. check schema - let expected_schema = "[[\"logs1\",\"CREATE TABLE IF NOT EXISTS \\\"logs1\\\" (\\n \\\"id1\\\" INT NULL INVERTED INDEX,\\n \\\"id2\\\" INT NULL INVERTED INDEX,\\n \\\"logger\\\" STRING NULL,\\n \\\"type\\\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\\n \\\"log\\\" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\"),\\n PRIMARY KEY (\\\"type\\\", \\\"log\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; + let expected_schema = "[[\"logs1\",\"CREATE TABLE IF NOT EXISTS \\\"logs1\\\" (\\n \\\"id1\\\" INT NULL INVERTED INDEX,\\n \\\"id2\\\" INT NULL INVERTED INDEX,\\n \\\"logger\\\" STRING NULL,\\n \\\"type\\\" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'),\\n \\\"log\\\" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'),\\n \\\"time\\\" TIMESTAMP(9) NOT NULL,\\n TIME INDEX (\\\"time\\\"),\\n PRIMARY KEY (\\\"type\\\", \\\"log\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; validate_data( "pipeline_schema", &client, @@ -3702,7 +3702,7 @@ pub async fn test_otlp_traces_v1(store_type: StorageType) { let expected = r#"[[1736480942444376000,1736480942444499000,123000,null,"c05d7a4ec8e1f231f02ed6e8da8655b4","d24f921c75f68e23","SPAN_KIND_CLIENT","lets-go","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-server",[],[]],[1736480942444376000,1736480942444499000,123000,"d24f921c75f68e23","c05d7a4ec8e1f231f02ed6e8da8655b4","9630f2916e2f7909","SPAN_KIND_SERVER","okey-dokey-0","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-client",[],[]],[1736480942444589000,1736480942444712000,123000,null,"cc9e0991a2e63d274984bd44ee669203","eba7be77e3558179","SPAN_KIND_CLIENT","lets-go","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-server",[],[]],[1736480942444589000,1736480942444712000,123000,"eba7be77e3558179","cc9e0991a2e63d274984bd44ee669203","8f847259b0f6e1ab","SPAN_KIND_SERVER","okey-dokey-0","STATUS_CODE_UNSET","","","telemetrygen","","telemetrygen","1.2.3.4","telemetrygen-client",[],[]]]"#; validate_data("otlp_traces", &client, "select * from mytable;", expected).await; - let expected_ddl = r#"[["mytable","CREATE TABLE IF NOT EXISTS \"mytable\" (\n \"timestamp\" TIMESTAMP(9) NOT NULL,\n \"timestamp_end\" TIMESTAMP(9) NULL,\n \"duration_nano\" BIGINT UNSIGNED NULL,\n \"parent_span_id\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"trace_id\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"span_id\" STRING NULL,\n \"span_kind\" STRING NULL,\n \"span_name\" STRING NULL,\n \"span_status_code\" STRING NULL,\n \"span_status_message\" STRING NULL,\n \"trace_state\" STRING NULL,\n \"scope_name\" STRING NULL,\n \"scope_version\" STRING NULL,\n \"service_name\" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'),\n \"span_attributes.net.peer.ip\" STRING NULL,\n \"span_attributes.peer.service\" STRING NULL,\n \"span_events\" JSON NULL,\n \"span_links\" JSON NULL,\n TIME INDEX (\"timestamp\"),\n PRIMARY KEY (\"service_name\")\n)\nPARTITION ON COLUMNS (\"trace_id\") (\n trace_id < '1',\n trace_id >= 'f',\n trace_id >= '1' AND trace_id < '2',\n trace_id >= '2' AND trace_id < '3',\n trace_id >= '3' AND trace_id < '4',\n trace_id >= '4' AND trace_id < '5',\n trace_id >= '5' AND trace_id < '6',\n trace_id >= '6' AND trace_id < '7',\n trace_id >= '7' AND trace_id < '8',\n trace_id >= '8' AND trace_id < '9',\n trace_id >= '9' AND trace_id < 'a',\n trace_id >= 'a' AND trace_id < 'b',\n trace_id >= 'b' AND trace_id < 'c',\n trace_id >= 'c' AND trace_id < 'd',\n trace_id >= 'd' AND trace_id < 'e',\n trace_id >= 'e' AND trace_id < 'f'\n)\nENGINE=mito\nWITH(\n append_mode = 'true',\n table_data_model = 'greptime_trace_v1'\n)"]]"#; + let expected_ddl = r#"[["mytable","CREATE TABLE IF NOT EXISTS \"mytable\" (\n \"timestamp\" TIMESTAMP(9) NOT NULL,\n \"timestamp_end\" TIMESTAMP(9) NULL,\n \"duration_nano\" BIGINT UNSIGNED NULL,\n \"parent_span_id\" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'),\n \"trace_id\" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'),\n \"span_id\" STRING NULL,\n \"span_kind\" STRING NULL,\n \"span_name\" STRING NULL,\n \"span_status_code\" STRING NULL,\n \"span_status_message\" STRING NULL,\n \"trace_state\" STRING NULL,\n \"scope_name\" STRING NULL,\n \"scope_version\" STRING NULL,\n \"service_name\" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'),\n \"span_attributes.net.peer.ip\" STRING NULL,\n \"span_attributes.peer.service\" STRING NULL,\n \"span_events\" JSON NULL,\n \"span_links\" JSON NULL,\n TIME INDEX (\"timestamp\"),\n PRIMARY KEY (\"service_name\")\n)\nPARTITION ON COLUMNS (\"trace_id\") (\n trace_id < '1',\n trace_id >= 'f',\n trace_id >= '1' AND trace_id < '2',\n trace_id >= '2' AND trace_id < '3',\n trace_id >= '3' AND trace_id < '4',\n trace_id >= '4' AND trace_id < '5',\n trace_id >= '5' AND trace_id < '6',\n trace_id >= '6' AND trace_id < '7',\n trace_id >= '7' AND trace_id < '8',\n trace_id >= '8' AND trace_id < '9',\n trace_id >= '9' AND trace_id < 'a',\n trace_id >= 'a' AND trace_id < 'b',\n trace_id >= 'b' AND trace_id < 'c',\n trace_id >= 'c' AND trace_id < 'd',\n trace_id >= 'd' AND trace_id < 'e',\n trace_id >= 'e' AND trace_id < 'f'\n)\nENGINE=mito\nWITH(\n append_mode = 'true',\n table_data_model = 'greptime_trace_v1'\n)"]]"#; validate_data( "otlp_traces", &client, diff --git a/tests/cases/standalone/common/alter/change_col_fulltext_options.result b/tests/cases/standalone/common/alter/change_col_fulltext_options.result index 13202ae12c..70a093711e 100644 --- a/tests/cases/standalone/common/alter/change_col_fulltext_options.result +++ b/tests/cases/standalone/common/alter/change_col_fulltext_options.result @@ -79,20 +79,20 @@ SELECT * FROM test WHERE MATCHES(message, 'hello') ORDER BY message; -- SQLNESS ARG restart=true SHOW CREATE TABLE test; -+-------+----------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+----------------------------------------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | WITH( | -| | append_mode = 'true' | -| | ) | -+-------+----------------------------------------------------------------------------------------------------------------+ ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '10240'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ SHOW INDEX FROM test; @@ -138,20 +138,20 @@ Affected Rows: 0 SHOW CREATE TABLE test; -+-------+----------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+----------------------------------------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | WITH( | -| | append_mode = 'true' | -| | ) | -+-------+----------------------------------------------------------------------------------------------------------------+ ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '10240'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ SHOW INDEX FROM test; @@ -201,20 +201,80 @@ Affected Rows: 0 SHOW CREATE TABLE test; -+-------+----------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+----------------------------------------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | WITH( | -| | append_mode = 'true' | -| | ) | -+-------+----------------------------------------------------------------------------------------------------------------+ ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '10240'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +SHOW INDEX FROM test; + ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ +| test | 1 | FULLTEXT INDEX | 1 | message | A | | | | YES | greptime-fulltext-index-bloom | | | YES | | +| test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 1000); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '1000'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +SHOW INDEX FROM test; + ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ +| test | 1 | FULLTEXT INDEX | 1 | message | A | | | | YES | greptime-fulltext-index-bloom | | | YES | | +| test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+-------------------------------+---------+---------------+---------+------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 1000, false_positive_rate = 0.05); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT INDEX WITH(analyzer = 'Chinese', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.05', granularity = '1000'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------+ SHOW INDEX FROM test; @@ -255,6 +315,7 @@ SHOW INDEX FROM test; | test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | +-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ +-- Invalid options ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinglish', case_sensitive = 'false'); Error: 1002(Unexpected), Invalid fulltext option: Chinglish, expected: 'English' | 'Chinese' @@ -275,6 +336,14 @@ ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(backend = 'xor'); Error: 1002(Unexpected), Invalid fulltext option: xor, expected: 'bloom' | 'tantivy' +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 0); + +Error: 1002(Unexpected), Invalid fulltext option: Invalid granularity: 0, expected: positive integer + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(false_positive_rate = '0'); + +Error: 1002(Unexpected), Invalid fulltext option: Invalid false positive rate: 0, expected: 0.0 < rate <= 1.0 + DROP TABLE test; Affected Rows: 0 diff --git a/tests/cases/standalone/common/alter/change_col_fulltext_options.sql b/tests/cases/standalone/common/alter/change_col_fulltext_options.sql index df56e8179e..2dd1d69f0f 100644 --- a/tests/cases/standalone/common/alter/change_col_fulltext_options.sql +++ b/tests/cases/standalone/common/alter/change_col_fulltext_options.sql @@ -57,20 +57,32 @@ SHOW CREATE TABLE test; SHOW INDEX FROM test; +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 1000); + +SHOW CREATE TABLE test; + +SHOW INDEX FROM test; + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 1000, false_positive_rate = 0.05); + +SHOW CREATE TABLE test; + +SHOW INDEX FROM test; + ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'tantivy'); SHOW CREATE TABLE test; SHOW INDEX FROM test; +-- Invalid options + ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinglish', case_sensitive = 'false'); - ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'no'); - ALTER TABLE test MODIFY COLUMN time SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'false'); - ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'English', case_sensitive = 'true'); - ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(backend = 'xor'); +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(analyzer = 'Chinese', case_sensitive = 'true', backend = 'bloom', granularity = 0); +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT INDEX WITH(false_positive_rate = '0'); DROP TABLE test; diff --git a/tests/cases/standalone/common/alter/change_col_skipping_options.result b/tests/cases/standalone/common/alter/change_col_skipping_options.result index 4b6b7705ee..b3b90e2359 100644 --- a/tests/cases/standalone/common/alter/change_col_skipping_options.result +++ b/tests/cases/standalone/common/alter/change_col_skipping_options.result @@ -54,7 +54,7 @@ SELECT * FROM test WHERE metric > 200 ORDER BY time; +-------+----------+--------+---------------------+ -- Add skipping index -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0.01); Affected Rows: 0 @@ -107,7 +107,7 @@ SELECT * FROM test WHERE value < 3.0 ORDER BY time; +-------+----------+--------+---------------------+ -- Test multiple columns with skipping indexes -ALTER TABLE test MODIFY COLUMN metric SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN metric SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0.01); Affected Rows: 0 @@ -124,20 +124,20 @@ SELECT * FROM test WHERE value > 5.0 AND metric < 700 ORDER BY time; -- Verify persistence after restart SHOW CREATE TABLE test; -+-------+-----------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+-----------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "value" DOUBLE NULL SKIPPING INDEX WITH(granularity = '1024', type = 'BLOOM'), | -| | "category" STRING NULL, | -| | "metric" BIGINT NULL SKIPPING INDEX WITH(granularity = '1024', type = 'BLOOM'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------+-----------------------------------------------------------------------------------+ ++-------+-----------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+-----------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-----------------------------------------------------------------------------------------------------------------+ SHOW INDEX FROM test; @@ -150,32 +150,91 @@ SHOW INDEX FROM test; +-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ -- Test modifying existing skipping index options -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM', false_positive_rate = 0.01); Affected Rows: 0 SHOW CREATE TABLE test; -+-------+-----------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+-----------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "value" DOUBLE NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | -| | "category" STRING NULL, | -| | "metric" BIGINT NULL SKIPPING INDEX WITH(granularity = '1024', type = 'BLOOM'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------+-----------------------------------------------------------------------------------+ ++-------+-----------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+-----------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '8192', type = 'BLOOM'), | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-----------------------------------------------------------------------------------------------------------------+ + +SHOW INDEX FROM test; + ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| test | 1 | SKIPPING INDEX | 3 | metric | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | +| test | 1 | SKIPPING INDEX | 1 | value | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ + +-- Test modifying existing skipping index options +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM', false_positive_rate = 0.0001); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL SKIPPING INDEX WITH(false_positive_rate = '0.0001', granularity = '8192', type = 'BLOOM'), | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+------------------------------------------------------------------------------------------------------------------+ + +SHOW INDEX FROM test; + ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| test | 1 | SKIPPING INDEX | 3 | metric | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | +| test | 1 | SKIPPING INDEX | 1 | value | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ -- Test removing skipping index ALTER TABLE test MODIFY COLUMN value UNSET SKIPPING INDEX; Affected Rows: 0 +SHOW CREATE TABLE test; + ++-------+-----------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+-----------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL, | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-----------------------------------------------------------------------------------------------------------------+ + SHOW INDEX FROM test; +-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ @@ -186,26 +245,36 @@ SHOW INDEX FROM test; +-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ -- Test adding back with different options -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 2048, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 2048, type = 'BLOOM', false_positive_rate = 0.01); Affected Rows: 0 SHOW CREATE TABLE test; -+-------+-----------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+-----------------------------------------------------------------------------------+ -| test | CREATE TABLE IF NOT EXISTS "test" ( | -| | "value" DOUBLE NULL SKIPPING INDEX WITH(granularity = '2048', type = 'BLOOM'), | -| | "category" STRING NULL, | -| | "metric" BIGINT NULL SKIPPING INDEX WITH(granularity = '1024', type = 'BLOOM'), | -| | "time" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("time") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------+-----------------------------------------------------------------------------------+ ++-------+-----------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+-----------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '2048', type = 'BLOOM'), | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-----------------------------------------------------------------------------------------------------------------+ + +SHOW INDEX FROM test; + ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ +| test | 1 | SKIPPING INDEX | 3 | metric | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | time | A | | | | NO | | | | YES | | +| test | 1 | SKIPPING INDEX | 1 | value | A | | | | YES | greptime-bloom-filter-v1 | | | YES | | ++-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ -- Test removing all skipping indexes ALTER TABLE test MODIFY COLUMN value UNSET SKIPPING INDEX; @@ -216,6 +285,23 @@ ALTER TABLE test MODIFY COLUMN metric UNSET SKIPPING INDEX; Affected Rows: 0 +SHOW CREATE TABLE test; + ++-------+-------------------------------------+ +| Table | Create Table | ++-------+-------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL, | +| | "category" STRING NULL, | +| | "metric" BIGINT NULL, | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-------------------------------------+ + SHOW INDEX FROM test; +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ @@ -225,26 +311,54 @@ SHOW INDEX FROM test; +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ -- Test invalid operations and error cases --- Try to set skipping index on string column (should fail) -ALTER TABLE test MODIFY COLUMN category SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); - -Affected Rows: 0 - --- Try to set skipping index on timestamp column (should fail) -ALTER TABLE test MODIFY COLUMN time SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); - -Affected Rows: 0 - --- Test invalid option values +-- Test invalid option values (should fail) ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(blabla = 1024, type = 'BLOOM'); Error: 1004(InvalidArguments), Invalid column option, column name: value, error: invalid SKIPPING INDEX option: blabla +-- Test invalid false_positive_rate values (should fail) +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0); + +Error: 1002(Unexpected), Invalid skipping index option: Invalid false positive rate: 0, expected: 0.0 < rate <= 1.0 + +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = -0.01); + +Error: 1004(InvalidArguments), Unrecognized table option key: false_positive_rate, value: -0.01 + +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 2); + +Error: 1002(Unexpected), Invalid skipping index option: Invalid false positive rate: 2, expected: 0.0 < rate <= 1.0 + -- Test partial options +ALTER TABLE test MODIFY COLUMN category SET SKIPPING INDEX WITH(granularity = 1024); + +Affected Rows: 0 + +ALTER TABLE test MODIFY COLUMN time SET SKIPPING INDEX WITH(granularity = 1024); + +Affected Rows: 0 + ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 4096); Affected Rows: 0 +SHOW CREATE TABLE test; + ++-------+-------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+-------------------------------------------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "value" DOUBLE NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '4096', type = 'BLOOM'), | +| | "category" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | "metric" BIGINT NULL, | +| | "time" TIMESTAMP(3) NOT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '1024', type = 'BLOOM'), | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+-------------------------------------------------------------------------------------------------------------------------+ + SHOW INDEX FROM test; +-------+------------+----------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------+---------+---------------+---------+------------+ diff --git a/tests/cases/standalone/common/alter/change_col_skipping_options.sql b/tests/cases/standalone/common/alter/change_col_skipping_options.sql index 905a615983..25af217160 100644 --- a/tests/cases/standalone/common/alter/change_col_skipping_options.sql +++ b/tests/cases/standalone/common/alter/change_col_skipping_options.sql @@ -20,7 +20,7 @@ SELECT * FROM test WHERE value > 2.0 ORDER BY time; SELECT * FROM test WHERE metric > 200 ORDER BY time; -- Add skipping index -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0.01); -- Test queries after adding skipping index SELECT * FROM test WHERE value > 2.0 ORDER BY time; @@ -38,7 +38,7 @@ SELECT * FROM test WHERE value > 6.0 ORDER BY time; SELECT * FROM test WHERE value < 3.0 ORDER BY time; -- Test multiple columns with skipping indexes -ALTER TABLE test MODIFY COLUMN metric SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN metric SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0.01); -- Test queries with multiple skipping indexes SELECT * FROM test WHERE value > 5.0 AND metric < 700 ORDER BY time; @@ -49,34 +49,45 @@ SHOW CREATE TABLE test; SHOW INDEX FROM test; -- Test modifying existing skipping index options -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM', false_positive_rate = 0.01); SHOW CREATE TABLE test; +SHOW INDEX FROM test; + +-- Test modifying existing skipping index options +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 8192, type = 'BLOOM', false_positive_rate = 0.0001); +SHOW CREATE TABLE test; +SHOW INDEX FROM test; -- Test removing skipping index ALTER TABLE test MODIFY COLUMN value UNSET SKIPPING INDEX; +SHOW CREATE TABLE test; SHOW INDEX FROM test; -- Test adding back with different options -ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 2048, type = 'BLOOM'); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 2048, type = 'BLOOM', false_positive_rate = 0.01); SHOW CREATE TABLE test; +SHOW INDEX FROM test; -- Test removing all skipping indexes ALTER TABLE test MODIFY COLUMN value UNSET SKIPPING INDEX; ALTER TABLE test MODIFY COLUMN metric UNSET SKIPPING INDEX; +SHOW CREATE TABLE test; SHOW INDEX FROM test; -- Test invalid operations and error cases --- Try to set skipping index on string column (should fail) -ALTER TABLE test MODIFY COLUMN category SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); - --- Try to set skipping index on timestamp column (should fail) -ALTER TABLE test MODIFY COLUMN time SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM'); - --- Test invalid option values +-- Test invalid option values (should fail) ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(blabla = 1024, type = 'BLOOM'); +-- Test invalid false_positive_rate values (should fail) +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 0); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = -0.01); +ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 1024, type = 'BLOOM', false_positive_rate = 2); + -- Test partial options +ALTER TABLE test MODIFY COLUMN category SET SKIPPING INDEX WITH(granularity = 1024); +ALTER TABLE test MODIFY COLUMN time SET SKIPPING INDEX WITH(granularity = 1024); ALTER TABLE test MODIFY COLUMN value SET SKIPPING INDEX WITH(granularity = 4096); +SHOW CREATE TABLE test; SHOW INDEX FROM test; -- Clean up diff --git a/tests/cases/standalone/common/create/create_metric_table.result b/tests/cases/standalone/common/create/create_metric_table.result index 2c4c863a3b..2895105a9e 100644 --- a/tests/cases/standalone/common/create/create_metric_table.result +++ b/tests/cases/standalone/common/create/create_metric_table.result @@ -244,28 +244,29 @@ DROP TABLE phy; Affected Rows: 0 -CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "skipping", "index.granularity" = "8192"); +CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "skipping", "index.granularity" = "8192", "index.false_positive_rate" = "0.05"); Affected Rows: 0 SHOW CREATE TABLE phy; -+-------+------------------------------------+ -| Table | Create Table | -+-------+------------------------------------+ -| phy | CREATE TABLE IF NOT EXISTS "phy" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "val" DOUBLE NULL, | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=metric | -| | WITH( | -| | 'index.granularity' = '8192', | -| | 'index.type' = 'skipping', | -| | physical_metric_table = '' | -| | ) | -+-------+------------------------------------+ ++-------+-----------------------------------------+ +| Table | Create Table | ++-------+-----------------------------------------+ +| phy | CREATE TABLE IF NOT EXISTS "phy" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" DOUBLE NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | 'index.false_positive_rate' = '0.05', | +| | 'index.granularity' = '8192', | +| | 'index.type' = 'skipping', | +| | physical_metric_table = '' | +| | ) | ++-------+-----------------------------------------+ CREATE TABLE t1 (ts timestamp time index, val double, host string primary key) engine=metric with ("on_physical_table" = "phy"); @@ -273,24 +274,25 @@ Affected Rows: 0 SHOW CREATE TABLE phy; -+-------+---------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+---------------------------------------------------------------------------------+ -| phy | CREATE TABLE IF NOT EXISTS "phy" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "val" DOUBLE NULL, | -| | "host" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | -| | TIME INDEX ("ts"), | -| | PRIMARY KEY ("host") | -| | ) | -| | | -| | ENGINE=metric | -| | WITH( | -| | 'index.granularity' = '8192', | -| | 'index.type' = 'skipping', | -| | physical_metric_table = '' | -| | ) | -+-------+---------------------------------------------------------------------------------+ ++-------+---------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------------------------------+ +| phy | CREATE TABLE IF NOT EXISTS "phy" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" DOUBLE NULL, | +| | "host" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.05', granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | 'index.false_positive_rate' = '0.05', | +| | 'index.granularity' = '8192', | +| | 'index.type' = 'skipping', | +| | physical_metric_table = '' | +| | ) | ++-------+---------------------------------------------------------------------------------------------------------------+ SHOW INDEX FROM phy; @@ -311,7 +313,7 @@ DROP TABLE phy; Affected Rows: 0 -CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "hihi", "index.granularity" = "8192"); +CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "hihi", "index.granularity" = "8192", "index.false_positive_rate" = "0.01"); Error: 1004(InvalidArguments), Failed to parse region options: Invalid index type: hihi diff --git a/tests/cases/standalone/common/create/create_metric_table.sql b/tests/cases/standalone/common/create/create_metric_table.sql index 8a4aa2b7cd..1839bd017f 100644 --- a/tests/cases/standalone/common/create/create_metric_table.sql +++ b/tests/cases/standalone/common/create/create_metric_table.sql @@ -92,7 +92,7 @@ DESC TABLE t1; DROP TABLE phy; -CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "skipping", "index.granularity" = "8192"); +CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "skipping", "index.granularity" = "8192", "index.false_positive_rate" = "0.05"); SHOW CREATE TABLE phy; @@ -106,4 +106,4 @@ DROP TABLE t1; DROP TABLE phy; -CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "hihi", "index.granularity" = "8192"); +CREATE TABLE phy (ts timestamp time index, val double) engine=metric with ("physical_metric_table" = "", "index.type" = "hihi", "index.granularity" = "8192", "index.false_positive_rate" = "0.01"); diff --git a/tests/cases/standalone/common/create/create_with_fulltext.result b/tests/cases/standalone/common/create/create_with_fulltext.result index d5ae9ee2dd..ecb6cd93f3 100644 --- a/tests/cases/standalone/common/create/create_with_fulltext.result +++ b/tests/cases/standalone/common/create/create_with_fulltext.result @@ -7,18 +7,18 @@ Affected Rows: 0 SHOW CREATE TABLE log; -+-------+-------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+-------------------------------------------------------------------------------------------------------------+ -| log | CREATE TABLE IF NOT EXISTS "log" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'), | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------+-------------------------------------------------------------------------------------------------------------+ ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| log | CREATE TABLE IF NOT EXISTS "log" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE log; @@ -33,18 +33,18 @@ Affected Rows: 0 SHOW CREATE TABLE log_with_opts; -+---------------+------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+---------------+------------------------------------------------------------------------------------------------------------+ -| log_with_opts | CREATE TABLE IF NOT EXISTS "log_with_opts" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'true'), | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+---------------+------------------------------------------------------------------------------------------------------------+ ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| log_with_opts | CREATE TABLE IF NOT EXISTS "log_with_opts" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'true', false_positive_rate = '0.01', granularity = '10240'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE log_with_opts; @@ -60,19 +60,19 @@ Affected Rows: 0 SHOW CREATE TABLE log_multi_fulltext_cols; -+-------------------------+--------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-------------------------+--------------------------------------------------------------------------------------------------------------+ -| log_multi_fulltext_cols | CREATE TABLE IF NOT EXISTS "log_multi_fulltext_cols" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'), | -| | "msg2" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'), | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-------------------------+--------------------------------------------------------------------------------------------------------------+ ++-------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| log_multi_fulltext_cols | CREATE TABLE IF NOT EXISTS "log_multi_fulltext_cols" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'), | +| | "msg2" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------+ DROP TABLE log_multi_fulltext_cols; diff --git a/tests/cases/standalone/common/create/create_with_skipping_index.result b/tests/cases/standalone/common/create/create_with_skipping_index.result index 8cbe4e53f0..ce791c0caa 100644 --- a/tests/cases/standalone/common/create/create_with_skipping_index.result +++ b/tests/cases/standalone/common/create/create_with_skipping_index.result @@ -4,7 +4,11 @@ create table `id` string skipping index, `name` string skipping index with - (granularity = 8192), + ( + granularity = 8192, + false_positive_rate = 0.05, + type = 'BLOOM', + ), ); Affected Rows: 0 @@ -13,19 +17,19 @@ show create table skipping_table; -+----------------+---------------------------------------------------------------------------------+ -| Table | Create Table | -+----------------+---------------------------------------------------------------------------------+ -| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "id" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | -| | "name" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | -| | TIME INDEX ("ts") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+----------------+---------------------------------------------------------------------------------+ ++----------------+---------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++----------------+---------------------------------------------------------------------------------------------------------------+ +| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "id" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'), | +| | "name" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.05', granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+---------------------------------------------------------------------------------------------------------------+ drop table skipping_table; diff --git a/tests/cases/standalone/common/create/create_with_skipping_index.sql b/tests/cases/standalone/common/create/create_with_skipping_index.sql index 00330519de..06c2b5bb6f 100644 --- a/tests/cases/standalone/common/create/create_with_skipping_index.sql +++ b/tests/cases/standalone/common/create/create_with_skipping_index.sql @@ -4,7 +4,11 @@ create table `id` string skipping index, `name` string skipping index with - (granularity = 8192), + ( + granularity = 8192, + false_positive_rate = 0.05, + type = 'BLOOM', + ), ); show diff --git a/tests/cases/standalone/common/show/show_create.result b/tests/cases/standalone/common/show/show_create.result index 5d7019265a..d11aedf99c 100644 --- a/tests/cases/standalone/common/show/show_create.result +++ b/tests/cases/standalone/common/show/show_create.result @@ -240,24 +240,24 @@ Affected Rows: 0 show create table phy; -+-------+---------------------------------------------------------------------------------+ -| Table | Create Table | -+-------+---------------------------------------------------------------------------------+ -| phy | CREATE TABLE IF NOT EXISTS "phy" ( | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | "val" DOUBLE NULL, | -| | "host" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | -| | TIME INDEX ("ts"), | -| | PRIMARY KEY ("host") | -| | ) | -| | | -| | ENGINE=metric | -| | WITH( | -| | 'index.granularity' = '8192', | -| | 'index.type' = 'skipping', | -| | physical_metric_table = '' | -| | ) | -+-------+---------------------------------------------------------------------------------+ ++-------+---------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------------------------------+ +| phy | CREATE TABLE IF NOT EXISTS "phy" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" DOUBLE NULL, | +| | "host" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | 'index.granularity' = '8192', | +| | 'index.type' = 'skipping', | +| | physical_metric_table = '' | +| | ) | ++-------+---------------------------------------------------------------------------------------------------------------+ CREATE TABLE t1 ( ts TIMESTAMP TIME INDEX, @@ -373,20 +373,20 @@ Affected Rows: 0 show create table test_column_constrain_composite_indexes; -+-----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Table | Create Table | -+-----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| test_column_constrain_composite_indexes | CREATE TABLE IF NOT EXISTS "test_column_constrain_composite_indexes" ( | -| | "id" INT NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX, | -| | "host" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false') SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM') INVERTED INDEX, | -| | "ts" TIMESTAMP(3) NOT NULL, | -| | TIME INDEX ("ts"), | -| | PRIMARY KEY ("host") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+-----------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| test_column_constrain_composite_indexes | CREATE TABLE IF NOT EXISTS "test_column_constrain_composite_indexes" ( | +| | "id" INT NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM') INVERTED INDEX, | +| | "host" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240') SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM') INVERTED INDEX, | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-----------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ drop table test_column_constrain_composite_indexes; diff --git a/tests/cases/standalone/common/show/show_index.result b/tests/cases/standalone/common/show/show_index.result index 80010b5331..a035ca34bf 100644 --- a/tests/cases/standalone/common/show/show_index.result +++ b/tests/cases/standalone/common/show/show_index.result @@ -38,21 +38,21 @@ Affected Rows: 0 show create table test_no_inverted_index; -+------------------------+-------------------------------------------------------------------------------+ -| Table | Create Table | -+------------------------+-------------------------------------------------------------------------------+ -| test_no_inverted_index | CREATE TABLE IF NOT EXISTS "test_no_inverted_index" ( | -| | "a" STRING NULL, | -| | "b" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | -| | "c" DOUBLE NULL, | -| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | -| | TIME INDEX ("ts"), | -| | PRIMARY KEY ("a", "b") | -| | ) | -| | | -| | ENGINE=mito | -| | | -+------------------------+-------------------------------------------------------------------------------+ ++------------------------+-------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++------------------------+-------------------------------------------------------------------------------------------------------------+ +| test_no_inverted_index | CREATE TABLE IF NOT EXISTS "test_no_inverted_index" ( | +| | "a" STRING NULL, | +| | "b" STRING NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '10240', type = 'BLOOM'), | +| | "c" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("a", "b") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++------------------------+-------------------------------------------------------------------------------------------------------------+ SHOW INDEX;