mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-06-01 12:50:40 +00:00
feat: add granularity and false_positive_rate options for indexes (#6416)
* feat: add `granularity` and `false_positive_rate` options for indexes Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * address comments Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * upgrade proto Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -226,18 +226,20 @@ mod tests {
|
||||
assert!(options.is_none());
|
||||
|
||||
let mut schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
})
|
||||
.with_fulltext_options(FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
10240,
|
||||
0.01,
|
||||
))
|
||||
.unwrap();
|
||||
schema.set_inverted_index(true);
|
||||
let options = options_from_column_schema(&schema).unwrap();
|
||||
assert_eq!(
|
||||
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
|
||||
);
|
||||
assert_eq!(
|
||||
options.options.get(INVERTED_INDEX_GRPC_KEY).unwrap(),
|
||||
@@ -247,16 +249,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_options_with_fulltext() {
|
||||
let fulltext = FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
};
|
||||
let fulltext = FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
10240,
|
||||
0.01,
|
||||
);
|
||||
let options = options_from_fulltext(&fulltext).unwrap().unwrap();
|
||||
assert_eq!(
|
||||
options.options.get(FULLTEXT_GRPC_KEY).unwrap(),
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\"}"
|
||||
"{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false,\"backend\":\"bloom\",\"granularity\":10240,\"false-positive-rate-in-10000\":100}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ use table::requests::{
|
||||
};
|
||||
|
||||
use crate::error::{
|
||||
InvalidColumnDefSnafu, InvalidSetFulltextOptionRequestSnafu,
|
||||
InvalidColumnDefSnafu, InvalidIndexOptionSnafu, InvalidSetFulltextOptionRequestSnafu,
|
||||
InvalidSetSkippingIndexOptionRequestSnafu, InvalidSetTableOptionRequestSnafu,
|
||||
InvalidUnsetTableOptionRequestSnafu, MissingAlterIndexOptionSnafu, MissingFieldSnafu,
|
||||
MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu,
|
||||
@@ -126,18 +126,21 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
|
||||
api::v1::set_index::Options::Fulltext(f) => AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Fulltext {
|
||||
column_name: f.column_name.clone(),
|
||||
options: FulltextOptions {
|
||||
enable: f.enable,
|
||||
analyzer: as_fulltext_option_analyzer(
|
||||
options: FulltextOptions::new(
|
||||
f.enable,
|
||||
as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(f.analyzer)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
case_sensitive: f.case_sensitive,
|
||||
backend: as_fulltext_option_backend(
|
||||
f.case_sensitive,
|
||||
as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(f.backend)
|
||||
.context(InvalidSetFulltextOptionRequestSnafu)?,
|
||||
),
|
||||
},
|
||||
f.granularity as u32,
|
||||
f.false_positive_rate,
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
api::v1::set_index::Options::Inverted(i) => AlterKind::SetIndex {
|
||||
@@ -148,13 +151,15 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterTableExpr) -> Result<
|
||||
api::v1::set_index::Options::Skipping(s) => AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Skipping {
|
||||
column_name: s.column_name,
|
||||
options: SkippingIndexOptions {
|
||||
granularity: s.granularity as u32,
|
||||
index_type: as_skipping_index_type(
|
||||
options: SkippingIndexOptions::new(
|
||||
s.granularity as u32,
|
||||
s.false_positive_rate,
|
||||
as_skipping_index_type(
|
||||
PbSkippingIndexType::try_from(s.skipping_index_type)
|
||||
.context(InvalidSetSkippingIndexOptionRequestSnafu)?,
|
||||
),
|
||||
},
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -153,6 +153,14 @@ pub enum Error {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid index option"))]
|
||||
InvalidIndexOption {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
@@ -180,7 +188,8 @@ impl ErrorExt for Error {
|
||||
| Error::InvalidUnsetTableOptionRequest { .. }
|
||||
| Error::InvalidSetFulltextOptionRequest { .. }
|
||||
| Error::InvalidSetSkippingIndexOptionRequest { .. }
|
||||
| Error::MissingAlterIndexOption { .. } => StatusCode::InvalidArguments,
|
||||
| Error::MissingAlterIndexOption { .. }
|
||||
| Error::InvalidIndexOption { .. } => StatusCode::InvalidArguments,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -31,9 +31,10 @@ pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata,
|
||||
SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
@@ -47,13 +47,18 @@ pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
|
||||
|
||||
/// Keys used in SKIPPING index options
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
|
||||
|
||||
pub const DEFAULT_GRANULARITY: u32 = 10240;
|
||||
|
||||
pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
@@ -504,7 +509,7 @@ impl TryFrom<&ColumnSchema> for Field {
|
||||
}
|
||||
|
||||
/// Fulltext options for a column.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct FulltextOptions {
|
||||
/// Whether the fulltext index is enabled.
|
||||
@@ -518,6 +523,92 @@ pub struct FulltextOptions {
|
||||
/// The fulltext backend to use.
|
||||
#[serde(default)]
|
||||
pub backend: FulltextBackend,
|
||||
/// The granularity of the fulltext index (for bloom backend only)
|
||||
#[serde(default = "fulltext_options_default_granularity")]
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the fulltext index (for bloom backend only)
|
||||
#[serde(default = "fulltext_options_default_false_positive_rate_in_10000")]
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
}
|
||||
|
||||
fn fulltext_options_default_granularity() -> u32 {
|
||||
DEFAULT_GRANULARITY
|
||||
}
|
||||
|
||||
fn fulltext_options_default_false_positive_rate_in_10000() -> u32 {
|
||||
(DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
|
||||
}
|
||||
|
||||
impl FulltextOptions {
|
||||
/// Creates a new fulltext options.
|
||||
pub fn new(
|
||||
enable: bool,
|
||||
analyzer: FulltextAnalyzer,
|
||||
case_sensitive: bool,
|
||||
backend: FulltextBackend,
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
) -> Result<Self> {
|
||||
ensure!(
|
||||
0.0 < false_positive_rate && false_positive_rate <= 1.0,
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
granularity > 0,
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
|
||||
}
|
||||
);
|
||||
Ok(Self::new_unchecked(
|
||||
enable,
|
||||
analyzer,
|
||||
case_sensitive,
|
||||
backend,
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
))
|
||||
}
|
||||
|
||||
/// Creates a new fulltext options without checking `false_positive_rate` and `granularity`.
|
||||
pub fn new_unchecked(
|
||||
enable: bool,
|
||||
analyzer: FulltextAnalyzer,
|
||||
case_sensitive: bool,
|
||||
backend: FulltextBackend,
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
enable,
|
||||
analyzer,
|
||||
case_sensitive,
|
||||
backend,
|
||||
granularity,
|
||||
false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the false positive rate.
|
||||
pub fn false_positive_rate(&self) -> f64 {
|
||||
self.false_positive_rate_in_10000 as f64 / 10000.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FulltextOptions {
|
||||
fn default() -> Self {
|
||||
Self::new_unchecked(
|
||||
false,
|
||||
FulltextAnalyzer::default(),
|
||||
false,
|
||||
FulltextBackend::default(),
|
||||
DEFAULT_GRANULARITY,
|
||||
DEFAULT_FALSE_POSITIVE_RATE,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FulltextOptions {
|
||||
@@ -527,6 +618,10 @@ impl fmt::Display for FulltextOptions {
|
||||
write!(f, ", analyzer={}", self.analyzer)?;
|
||||
write!(f, ", case_sensitive={}", self.case_sensitive)?;
|
||||
write!(f, ", backend={}", self.backend)?;
|
||||
if self.backend == FulltextBackend::Bloom {
|
||||
write!(f, ", granularity={}", self.granularity)?;
|
||||
write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -611,6 +706,45 @@ impl TryFrom<HashMap<String, String>> for FulltextOptions {
|
||||
}
|
||||
}
|
||||
|
||||
if fulltext_options.backend == FulltextBackend::Bloom {
|
||||
// Parse granularity with default value 10240
|
||||
let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) {
|
||||
Some(value) => value
|
||||
.parse::<u32>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid granularity: {value}, expected: positive integer"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_GRANULARITY,
|
||||
};
|
||||
fulltext_options.granularity = granularity;
|
||||
|
||||
// Parse false positive rate with default value 0.01
|
||||
let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE)
|
||||
{
|
||||
Some(value) => value
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0.0 && v <= 1.0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_FALSE_POSITIVE_RATE,
|
||||
};
|
||||
fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32;
|
||||
}
|
||||
|
||||
Ok(fulltext_options)
|
||||
}
|
||||
}
|
||||
@@ -638,23 +772,72 @@ impl fmt::Display for FulltextAnalyzer {
|
||||
pub struct SkippingIndexOptions {
|
||||
/// The granularity of the skip index.
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
/// The type of the skip index.
|
||||
#[serde(default)]
|
||||
pub index_type: SkippingIndexType,
|
||||
}
|
||||
|
||||
impl SkippingIndexOptions {
|
||||
/// Creates a new skipping index options without checking `false_positive_rate` and `granularity`.
|
||||
pub fn new_unchecked(
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
index_type: SkippingIndexType,
|
||||
) -> Self {
|
||||
Self {
|
||||
granularity,
|
||||
false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
|
||||
index_type,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new skipping index options.
|
||||
pub fn new(
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
index_type: SkippingIndexType,
|
||||
) -> Result<Self> {
|
||||
ensure!(
|
||||
0.0 < false_positive_rate && false_positive_rate <= 1.0,
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
granularity > 0,
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
|
||||
}
|
||||
);
|
||||
Ok(Self::new_unchecked(
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
index_type,
|
||||
))
|
||||
}
|
||||
|
||||
/// Gets the false positive rate.
|
||||
pub fn false_positive_rate(&self) -> f64 {
|
||||
self.false_positive_rate_in_10000 as f64 / 10000.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SkippingIndexOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
granularity: DEFAULT_GRANULARITY,
|
||||
index_type: SkippingIndexType::default(),
|
||||
}
|
||||
Self::new_unchecked(
|
||||
DEFAULT_GRANULARITY,
|
||||
DEFAULT_FALSE_POSITIVE_RATE,
|
||||
SkippingIndexType::default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SkippingIndexOptions {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "granularity={}", self.granularity)?;
|
||||
write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
|
||||
write!(f, ", index_type={}", self.index_type)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -681,15 +864,37 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
|
||||
fn try_from(options: HashMap<String, String>) -> Result<Self> {
|
||||
// Parse granularity with default value 1
|
||||
let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
|
||||
Some(value) => value.parse::<u32>().map_err(|_| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {value}, expected: positive integer"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
Some(value) => value
|
||||
.parse::<u32>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {value}, expected: positive integer"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_GRANULARITY,
|
||||
};
|
||||
|
||||
// Parse false positive rate with default value 100
|
||||
let false_positive_rate =
|
||||
match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) {
|
||||
Some(value) => value
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0.0 && v <= 1.0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_FALSE_POSITIVE_RATE,
|
||||
};
|
||||
|
||||
// Parse index type with default value BloomFilter
|
||||
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
|
||||
Some(typ) => match typ.to_ascii_uppercase().as_str() {
|
||||
@@ -704,10 +909,11 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
|
||||
None => SkippingIndexType::default(),
|
||||
};
|
||||
|
||||
Ok(SkippingIndexOptions {
|
||||
Ok(SkippingIndexOptions::new_unchecked(
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
index_type,
|
||||
})
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -218,6 +218,7 @@ mod tests {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
4,
|
||||
0.01,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
|
||||
@@ -30,9 +30,6 @@ use crate::bloom_filter::SEED;
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
use crate::Bytes;
|
||||
|
||||
/// The false positive rate of the Bloom filter.
|
||||
pub const FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
/// `BloomFilterCreator` is responsible for creating and managing bloom filters
|
||||
/// for a set of elements. It divides the rows into segments and creates
|
||||
/// bloom filters for each segment.
|
||||
@@ -79,6 +76,7 @@ impl BloomFilterCreator {
|
||||
/// `rows_per_segment` <= 0
|
||||
pub fn new(
|
||||
rows_per_segment: usize,
|
||||
false_positive_rate: f64,
|
||||
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
@@ -95,6 +93,7 @@ impl BloomFilterCreator {
|
||||
cur_seg_distinct_elems_mem_usage: 0,
|
||||
global_memory_usage: global_memory_usage.clone(),
|
||||
finalized_bloom_filters: FinalizedBloomFilterStorage::new(
|
||||
false_positive_rate,
|
||||
intermediate_provider,
|
||||
global_memory_usage,
|
||||
global_memory_usage_threshold,
|
||||
@@ -263,6 +262,7 @@ mod tests {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
0.01,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
@@ -337,6 +337,7 @@ mod tests {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator: BloomFilterCreator = BloomFilterCreator::new(
|
||||
2,
|
||||
0.01,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
@@ -418,6 +419,7 @@ mod tests {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
0.01,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
|
||||
@@ -23,7 +23,7 @@ use futures::{stream, AsyncWriteExt, Stream};
|
||||
use snafu::ResultExt;
|
||||
|
||||
use crate::bloom_filter::creator::intermediate_codec::IntermediateBloomFilterCodecV1;
|
||||
use crate::bloom_filter::creator::{FALSE_POSITIVE_RATE, SEED};
|
||||
use crate::bloom_filter::creator::SEED;
|
||||
use crate::bloom_filter::error::{IntermediateSnafu, IoSnafu, Result};
|
||||
use crate::external_provider::ExternalTempFileProvider;
|
||||
use crate::Bytes;
|
||||
@@ -33,6 +33,9 @@ const MIN_MEMORY_USAGE_THRESHOLD: usize = 1024 * 1024; // 1MB
|
||||
|
||||
/// Storage for finalized Bloom filters.
|
||||
pub struct FinalizedBloomFilterStorage {
|
||||
/// The false positive rate of the Bloom filter.
|
||||
false_positive_rate: f64,
|
||||
|
||||
/// Indices of the segments in the sequence of finalized Bloom filters.
|
||||
segment_indices: Vec<usize>,
|
||||
|
||||
@@ -65,12 +68,14 @@ pub struct FinalizedBloomFilterStorage {
|
||||
impl FinalizedBloomFilterStorage {
|
||||
/// Creates a new `FinalizedBloomFilterStorage`.
|
||||
pub fn new(
|
||||
false_positive_rate: f64,
|
||||
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
) -> Self {
|
||||
let external_prefix = format!("intm-bloom-filters-{}", uuid::Uuid::new_v4());
|
||||
Self {
|
||||
false_positive_rate,
|
||||
segment_indices: Vec::new(),
|
||||
in_memory: Vec::new(),
|
||||
intermediate_file_id_counter: 0,
|
||||
@@ -96,7 +101,7 @@ impl FinalizedBloomFilterStorage {
|
||||
elems: impl IntoIterator<Item = Bytes>,
|
||||
element_count: usize,
|
||||
) -> Result<()> {
|
||||
let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE)
|
||||
let mut bf = BloomFilter::with_false_pos(self.false_positive_rate)
|
||||
.seed(&SEED)
|
||||
.expected_items(element_count);
|
||||
for elem in elems.into_iter() {
|
||||
@@ -284,6 +289,7 @@ mod tests {
|
||||
let global_memory_usage_threshold = Some(1024 * 1024); // 1MB
|
||||
let provider = Arc::new(mock_provider);
|
||||
let mut storage = FinalizedBloomFilterStorage::new(
|
||||
0.01,
|
||||
provider,
|
||||
global_memory_usage.clone(),
|
||||
global_memory_usage_threshold,
|
||||
@@ -340,6 +346,7 @@ mod tests {
|
||||
let global_memory_usage_threshold = Some(1024 * 1024); // 1MB
|
||||
let provider = Arc::new(mock_provider);
|
||||
let mut storage = FinalizedBloomFilterStorage::new(
|
||||
0.01,
|
||||
provider,
|
||||
global_memory_usage.clone(),
|
||||
global_memory_usage_threshold,
|
||||
|
||||
@@ -222,6 +222,7 @@ mod tests {
|
||||
let mut writer = Cursor::new(vec![]);
|
||||
let mut creator = BloomFilterCreator::new(
|
||||
2,
|
||||
0.01,
|
||||
Arc::new(MockExternalTempFileProvider::new()),
|
||||
Arc::new(AtomicUsize::new(0)),
|
||||
None,
|
||||
|
||||
@@ -45,6 +45,7 @@ impl BloomFilterFulltextIndexCreator {
|
||||
pub fn new(
|
||||
config: Config,
|
||||
rows_per_segment: usize,
|
||||
false_positive_rate: f64,
|
||||
intermediate_provider: Arc<dyn ExternalTempFileProvider>,
|
||||
global_memory_usage: Arc<AtomicUsize>,
|
||||
global_memory_usage_threshold: Option<usize>,
|
||||
@@ -57,6 +58,7 @@ impl BloomFilterFulltextIndexCreator {
|
||||
|
||||
let inner = BloomFilterCreator::new(
|
||||
rows_per_segment,
|
||||
false_positive_rate,
|
||||
intermediate_provider,
|
||||
global_memory_usage,
|
||||
global_memory_usage_threshold,
|
||||
|
||||
@@ -145,12 +145,19 @@ impl DataRegion {
|
||||
IndexOptions::Inverted => {
|
||||
c.column_schema.set_inverted_index(true);
|
||||
}
|
||||
IndexOptions::Skipping { granularity } => {
|
||||
IndexOptions::Skipping {
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
} => {
|
||||
c.column_schema
|
||||
.set_skipping_options(&SkippingIndexOptions {
|
||||
granularity,
|
||||
index_type: SkippingIndexType::BloomFilter,
|
||||
})
|
||||
.set_skipping_options(
|
||||
&SkippingIndexOptions::new(
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
SkippingIndexType::BloomFilter,
|
||||
)
|
||||
.context(SetSkippingIndexOptionSnafu)?,
|
||||
)
|
||||
.context(SetSkippingIndexOptionSnafu)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ use crate::utils::{
|
||||
};
|
||||
|
||||
const DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY: u32 = 1024;
|
||||
const DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
impl MetricEngineInner {
|
||||
pub async fn create_regions(
|
||||
@@ -542,10 +543,11 @@ impl MetricEngineInner {
|
||||
ConcreteDataType::uint32_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_skipping_options(SkippingIndexOptions {
|
||||
granularity: DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY,
|
||||
index_type: datatypes::schema::SkippingIndexType::BloomFilter,
|
||||
})
|
||||
.with_skipping_options(SkippingIndexOptions::new_unchecked(
|
||||
DEFAULT_TABLE_ID_SKIPPING_INDEX_GRANULARITY,
|
||||
DEFAULT_TABLE_ID_SKIPPING_INDEX_FALSE_POSITIVE_RATE,
|
||||
datatypes::schema::SkippingIndexType::BloomFilter,
|
||||
))
|
||||
.unwrap(),
|
||||
};
|
||||
let tsid_col = ColumnMetadata {
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use store_api::metric_engine_consts::{
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT, METRIC_ENGINE_INDEX_TYPE_OPTION,
|
||||
};
|
||||
@@ -31,19 +33,20 @@ use crate::error::{Error, ParseRegionOptionsSnafu, Result};
|
||||
const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256;
|
||||
|
||||
/// Physical region options.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct PhysicalRegionOptions {
|
||||
pub index: IndexOptions,
|
||||
}
|
||||
|
||||
/// Index options for auto created columns
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq)]
|
||||
pub enum IndexOptions {
|
||||
#[default]
|
||||
None,
|
||||
Inverted,
|
||||
Skipping {
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -54,6 +57,7 @@ pub fn set_data_region_options(
|
||||
) {
|
||||
options.remove(METRIC_ENGINE_INDEX_TYPE_OPTION);
|
||||
options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION);
|
||||
options.remove(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION);
|
||||
options.insert(
|
||||
"index.inverted_index.segment_row_count".to_string(),
|
||||
SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
|
||||
@@ -93,7 +97,23 @@ impl TryFrom<&HashMap<String, String>> for PhysicalRegionOptions {
|
||||
})
|
||||
},
|
||||
)?;
|
||||
Ok(IndexOptions::Skipping { granularity })
|
||||
let false_positive_rate = value
|
||||
.get(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION)
|
||||
.map_or(
|
||||
Ok(METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT),
|
||||
|f| {
|
||||
f.parse().ok().filter(|f| *f > 0.0 && *f <= 1.0).ok_or(
|
||||
ParseRegionOptionsSnafu {
|
||||
reason: format!("Invalid false positive rate: {}", f),
|
||||
}
|
||||
.build(),
|
||||
)
|
||||
},
|
||||
)?;
|
||||
Ok(IndexOptions::Skipping {
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
})
|
||||
}
|
||||
Some(index_type) => ParseRegionOptionsSnafu {
|
||||
reason: format!("Invalid index type: {}", index_type),
|
||||
@@ -121,11 +141,16 @@ mod tests {
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
|
||||
"102400".to_string(),
|
||||
);
|
||||
options.insert(
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
|
||||
"0.01".to_string(),
|
||||
);
|
||||
set_data_region_options(&mut options, false);
|
||||
|
||||
for key in [
|
||||
METRIC_ENGINE_INDEX_TYPE_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
|
||||
] {
|
||||
assert_eq!(options.get(key), None);
|
||||
}
|
||||
@@ -154,11 +179,16 @@ mod tests {
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION.to_string(),
|
||||
"102400".to_string(),
|
||||
);
|
||||
options.insert(
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION.to_string(),
|
||||
"0.01".to_string(),
|
||||
);
|
||||
let physical_region_options = PhysicalRegionOptions::try_from(&options).unwrap();
|
||||
assert_eq!(
|
||||
physical_region_options.index,
|
||||
IndexOptions::Skipping {
|
||||
granularity: 102400
|
||||
granularity: 102400,
|
||||
false_positive_rate: 0.01,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -84,12 +84,14 @@ fn alter_column_fulltext_options() -> RegionAlterRequest {
|
||||
kind: AlterKind::SetIndex {
|
||||
options: ApiSetIndexOptions::Fulltext {
|
||||
column_name: "tag_0".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
options: FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -553,12 +555,14 @@ async fn test_alter_column_fulltext_options() {
|
||||
// Wait for the write job.
|
||||
alter_job.await.unwrap();
|
||||
|
||||
let expect_fulltext_options = FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
};
|
||||
let expect_fulltext_options = FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
);
|
||||
let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| {
|
||||
let current_fulltext_options = engine
|
||||
.get_region(region_id)
|
||||
|
||||
@@ -45,8 +45,6 @@ pub(crate) const TYPE_INVERTED_INDEX: &str = "inverted_index";
|
||||
pub(crate) const TYPE_FULLTEXT_INDEX: &str = "fulltext_index";
|
||||
pub(crate) const TYPE_BLOOM_FILTER_INDEX: &str = "bloom_filter_index";
|
||||
|
||||
const DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY: usize = 8096;
|
||||
|
||||
/// Output of the index creation.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct IndexOutput {
|
||||
@@ -293,7 +291,6 @@ impl IndexerBuilderImpl {
|
||||
&self.intermediate_manager,
|
||||
&self.metadata,
|
||||
self.fulltext_index_config.compress,
|
||||
DEFAULT_FULLTEXT_BLOOM_ROW_GRANULARITY,
|
||||
mem_limit,
|
||||
)
|
||||
.await;
|
||||
@@ -455,10 +452,11 @@ mod tests {
|
||||
if with_skipping_bloom {
|
||||
let column_schema =
|
||||
ColumnSchema::new("bloom", ConcreteDataType::string_datatype(), false)
|
||||
.with_skipping_options(SkippingIndexOptions {
|
||||
granularity: 42,
|
||||
index_type: SkippingIndexType::BloomFilter,
|
||||
})
|
||||
.with_skipping_options(SkippingIndexOptions::new_unchecked(
|
||||
42,
|
||||
0.01,
|
||||
SkippingIndexType::BloomFilter,
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let column = ColumnMetadata {
|
||||
|
||||
@@ -97,6 +97,7 @@ impl BloomFilterIndexer {
|
||||
|
||||
let creator = BloomFilterCreator::new(
|
||||
options.granularity as _,
|
||||
options.false_positive_rate(),
|
||||
temp_file_provider.clone(),
|
||||
global_memory_usage.clone(),
|
||||
memory_usage_threshold,
|
||||
@@ -408,10 +409,11 @@ pub(crate) mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_skipping_options(SkippingIndexOptions {
|
||||
index_type: SkippingIndexType::BloomFilter,
|
||||
granularity: 2,
|
||||
})
|
||||
.with_skipping_options(SkippingIndexOptions::new_unchecked(
|
||||
2,
|
||||
0.01,
|
||||
SkippingIndexType::BloomFilter,
|
||||
))
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Tag,
|
||||
column_id: 1,
|
||||
@@ -431,10 +433,11 @@ pub(crate) mod tests {
|
||||
ConcreteDataType::uint64_datatype(),
|
||||
false,
|
||||
)
|
||||
.with_skipping_options(SkippingIndexOptions {
|
||||
index_type: SkippingIndexType::BloomFilter,
|
||||
granularity: 4,
|
||||
})
|
||||
.with_skipping_options(SkippingIndexOptions::new_unchecked(
|
||||
4,
|
||||
0.01,
|
||||
SkippingIndexType::BloomFilter,
|
||||
))
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
|
||||
@@ -465,7 +465,6 @@ impl IndexSource {
|
||||
/// Returns the blob with the specified key from local cache or remote store.
|
||||
///
|
||||
/// Returns `None` if the blob is not found.
|
||||
#[allow(unused)]
|
||||
async fn blob(
|
||||
&self,
|
||||
file_id: FileId,
|
||||
|
||||
@@ -60,7 +60,6 @@ impl FulltextIndexer {
|
||||
intermediate_manager: &IntermediateManager,
|
||||
metadata: &RegionMetadataRef,
|
||||
compress: bool,
|
||||
bloom_row_granularity: usize,
|
||||
mem_limit: usize,
|
||||
) -> Result<Option<Self>> {
|
||||
let mut creators = HashMap::new();
|
||||
@@ -106,7 +105,8 @@ impl FulltextIndexer {
|
||||
let global_memory_usage = Arc::new(AtomicUsize::new(0));
|
||||
let creator = BloomFilterFulltextIndexCreator::new(
|
||||
config,
|
||||
bloom_row_granularity,
|
||||
options.granularity as _,
|
||||
options.false_positive_rate(),
|
||||
temp_file_provider,
|
||||
global_memory_usage,
|
||||
Some(mem_limit),
|
||||
@@ -400,12 +400,14 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: true,
|
||||
backend: backend.clone(),
|
||||
})
|
||||
.with_fulltext_options(FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
true,
|
||||
backend.clone(),
|
||||
1,
|
||||
0.01,
|
||||
))
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 1,
|
||||
@@ -416,12 +418,14 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: backend.clone(),
|
||||
})
|
||||
.with_fulltext_options(FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
backend.clone(),
|
||||
1,
|
||||
0.01,
|
||||
))
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 2,
|
||||
@@ -432,12 +436,14 @@ mod tests {
|
||||
ConcreteDataType::string_datatype(),
|
||||
true,
|
||||
)
|
||||
.with_fulltext_options(FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: false,
|
||||
backend: backend.clone(),
|
||||
})
|
||||
.with_fulltext_options(FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::Chinese,
|
||||
false,
|
||||
backend.clone(),
|
||||
1,
|
||||
0.01,
|
||||
))
|
||||
.unwrap(),
|
||||
semantic_type: SemanticType::Field,
|
||||
column_id: 3,
|
||||
@@ -547,7 +553,6 @@ mod tests {
|
||||
&intm_mgr,
|
||||
®ion_metadata,
|
||||
true,
|
||||
1,
|
||||
1024,
|
||||
)
|
||||
.await
|
||||
|
||||
@@ -593,6 +593,8 @@ pub(crate) fn to_alter_table_expr(
|
||||
FulltextBackend::Bloom => PbFulltextBackend::Bloom.into(),
|
||||
FulltextBackend::Tantivy => PbFulltextBackend::Tantivy.into(),
|
||||
},
|
||||
granularity: options.granularity as u64,
|
||||
false_positive_rate: options.false_positive_rate(),
|
||||
})),
|
||||
},
|
||||
sql::statements::alter::SetIndexOperation::Inverted { column_name } => SetIndex {
|
||||
@@ -608,6 +610,7 @@ pub(crate) fn to_alter_table_expr(
|
||||
column_name: column_name.value,
|
||||
enable: true,
|
||||
granularity: options.granularity as u64,
|
||||
false_positive_rate: options.false_positive_rate(),
|
||||
skipping_index_type: match options.index_type {
|
||||
SkippingIndexType::BloomFilter => PbSkippingIndexType::BloomFilter.into(),
|
||||
},
|
||||
|
||||
@@ -18,8 +18,10 @@ use std::collections::HashMap;
|
||||
|
||||
use common_meta::SchemaOptions;
|
||||
use datatypes::schema::{
|
||||
ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
ColumnDefaultConstraint, ColumnSchema, FulltextBackend, SchemaRef,
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
@@ -104,7 +106,7 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
|
||||
.context(GetFulltextOptionsSnafu)?
|
||||
&& opt.enable
|
||||
{
|
||||
let map = HashMap::from([
|
||||
let mut map = HashMap::from([
|
||||
(
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER.to_string(),
|
||||
opt.analyzer.to_string(),
|
||||
@@ -118,6 +120,16 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
|
||||
opt.backend.to_string(),
|
||||
),
|
||||
]);
|
||||
if opt.backend == FulltextBackend::Bloom {
|
||||
map.insert(
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY.to_string(),
|
||||
opt.granularity.to_string(),
|
||||
);
|
||||
map.insert(
|
||||
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE.to_string(),
|
||||
opt.false_positive_rate().to_string(),
|
||||
);
|
||||
}
|
||||
extensions.fulltext_index_options = Some(map.into());
|
||||
}
|
||||
|
||||
@@ -130,6 +142,10 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result<Colu
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY.to_string(),
|
||||
opt.granularity.to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE.to_string(),
|
||||
opt.false_positive_rate().to_string(),
|
||||
),
|
||||
(
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE.to_string(),
|
||||
opt.index_type.to_string(),
|
||||
@@ -327,11 +343,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
r#"
|
||||
CREATE TABLE IF NOT EXISTS "system_metrics" (
|
||||
"id" INT UNSIGNED NULL SKIPPING INDEX WITH(granularity = '4096', type = 'BLOOM'),
|
||||
"id" INT UNSIGNED NULL SKIPPING INDEX WITH(false_positive_rate = '0.01', granularity = '4096', type = 'BLOOM'),
|
||||
"host" STRING NULL INVERTED INDEX,
|
||||
"cpu" DOUBLE NULL,
|
||||
"disk" FLOAT NULL,
|
||||
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false'),
|
||||
"msg" STRING NULL FULLTEXT INDEX WITH(analyzer = 'English', backend = 'bloom', case_sensitive = 'false', false_positive_rate = '0.01', granularity = '10240'),
|
||||
"ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(),
|
||||
TIME INDEX ("ts"),
|
||||
PRIMARY KEY ("id", "host")
|
||||
|
||||
@@ -1006,7 +1006,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_parse_alter_column_fulltext() {
|
||||
let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom')";
|
||||
let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT INDEX WITH(analyzer='English',case_sensitive='false',backend='bloom',granularity=1000,false_positive_rate=0.01)";
|
||||
let mut result =
|
||||
ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default())
|
||||
.unwrap();
|
||||
@@ -1029,12 +1029,14 @@ mod tests {
|
||||
} => {
|
||||
assert_eq!("a", column_name.value);
|
||||
assert_eq!(
|
||||
FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::English,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::English,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
),
|
||||
*options
|
||||
);
|
||||
}
|
||||
|
||||
@@ -29,8 +29,9 @@ use datafusion_sql::TableReference;
|
||||
use datatypes::arrow::datatypes::DataType;
|
||||
use datatypes::schema::{
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
};
|
||||
use snafu::ResultExt;
|
||||
|
||||
@@ -126,6 +127,8 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool {
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY,
|
||||
COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
@@ -134,6 +137,7 @@ pub fn validate_column_skipping_index_create_option(key: &str) -> bool {
|
||||
[
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
|
||||
@@ -1064,6 +1064,14 @@ pub enum MetadataError {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
},
|
||||
|
||||
#[snafu(display("Invalid index option"))]
|
||||
InvalidIndexOption {
|
||||
#[snafu(implicit)]
|
||||
location: Location,
|
||||
#[snafu(source)]
|
||||
error: datatypes::error::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl ErrorExt for MetadataError {
|
||||
@@ -1620,12 +1628,14 @@ mod test {
|
||||
.alter(AlterKind::SetIndex {
|
||||
options: ApiSetIndexOptions::Fulltext {
|
||||
column_name: "b".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: true,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
options: FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::Chinese,
|
||||
true,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
),
|
||||
},
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@@ -113,13 +113,19 @@ pub const METRIC_ENGINE_INDEX_TYPE_OPTION: &str = "index.type";
|
||||
/// physical_metric_table = "",
|
||||
/// index.type = "skipping",
|
||||
/// index.granularity = "102400",
|
||||
/// index.false_positive_rate = "0.01",
|
||||
/// );
|
||||
/// ```
|
||||
pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION: &str = "index.granularity";
|
||||
pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION: &str =
|
||||
"index.false_positive_rate";
|
||||
|
||||
/// Default granularity for the skipping index in the metric engine.
|
||||
pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION_DEFAULT: u32 = 102400;
|
||||
|
||||
/// Default false positive rate for the skipping index in the metric engine.
|
||||
pub const METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION_DEFAULT: f64 = 0.01;
|
||||
|
||||
/// Returns true if the `key` is a valid option key for the metric engine.
|
||||
pub fn is_metric_engine_option_key(key: &str) -> bool {
|
||||
[
|
||||
@@ -127,6 +133,7 @@ pub fn is_metric_engine_option_key(key: &str) -> bool {
|
||||
LOGICAL_TABLE_METADATA_KEY,
|
||||
METRIC_ENGINE_INDEX_TYPE_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_GRANULARITY_OPTION,
|
||||
METRIC_ENGINE_INDEX_SKIPPING_INDEX_FALSE_POSITIVE_RATE_OPTION,
|
||||
]
|
||||
.contains(&key)
|
||||
}
|
||||
|
||||
@@ -42,8 +42,8 @@ use strum::{AsRefStr, IntoStaticStr};
|
||||
|
||||
use crate::logstore::entry;
|
||||
use crate::metadata::{
|
||||
ColumnMetadata, DecodeProtoSnafu, FlightCodecSnafu, InvalidRawRegionRequestSnafu,
|
||||
InvalidRegionRequestSnafu, InvalidSetRegionOptionRequestSnafu,
|
||||
ColumnMetadata, DecodeProtoSnafu, FlightCodecSnafu, InvalidIndexOptionSnafu,
|
||||
InvalidRawRegionRequestSnafu, InvalidRegionRequestSnafu, InvalidSetRegionOptionRequestSnafu,
|
||||
InvalidUnsetRegionOptionRequestSnafu, MetadataError, RegionMetadata, Result, UnexpectedSnafu,
|
||||
};
|
||||
use crate::metric_engine_consts::PHYSICAL_TABLE_METADATA_KEY;
|
||||
@@ -764,16 +764,19 @@ impl TryFrom<alter_request::Kind> for AlterKind {
|
||||
set_index::Options::Fulltext(x) => AlterKind::SetIndex {
|
||||
options: ApiSetIndexOptions::Fulltext {
|
||||
column_name: x.column_name.clone(),
|
||||
options: FulltextOptions {
|
||||
enable: x.enable,
|
||||
analyzer: as_fulltext_option_analyzer(
|
||||
options: FulltextOptions::new(
|
||||
x.enable,
|
||||
as_fulltext_option_analyzer(
|
||||
Analyzer::try_from(x.analyzer).context(DecodeProtoSnafu)?,
|
||||
),
|
||||
case_sensitive: x.case_sensitive,
|
||||
backend: as_fulltext_option_backend(
|
||||
x.case_sensitive,
|
||||
as_fulltext_option_backend(
|
||||
PbFulltextBackend::try_from(x.backend).context(DecodeProtoSnafu)?,
|
||||
),
|
||||
},
|
||||
x.granularity as u32,
|
||||
x.false_positive_rate,
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
set_index::Options::Inverted(i) => AlterKind::SetIndex {
|
||||
@@ -784,13 +787,15 @@ impl TryFrom<alter_request::Kind> for AlterKind {
|
||||
set_index::Options::Skipping(s) => AlterKind::SetIndex {
|
||||
options: ApiSetIndexOptions::Skipping {
|
||||
column_name: s.column_name,
|
||||
options: SkippingIndexOptions {
|
||||
index_type: as_skipping_index_type(
|
||||
options: SkippingIndexOptions::new(
|
||||
s.granularity as u32,
|
||||
s.false_positive_rate,
|
||||
as_skipping_index_type(
|
||||
PbSkippingIndexType::try_from(s.skipping_index_type)
|
||||
.context(DecodeProtoSnafu)?,
|
||||
),
|
||||
granularity: s.granularity as u32,
|
||||
},
|
||||
)
|
||||
.context(InvalidIndexOptionSnafu)?,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -1648,12 +1653,14 @@ mod tests {
|
||||
let kind = AlterKind::SetIndex {
|
||||
options: ApiSetIndexOptions::Fulltext {
|
||||
column_name: "tag_0".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: false,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
options: FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::Chinese,
|
||||
false,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
),
|
||||
},
|
||||
};
|
||||
let request = RegionAlterRequest { kind };
|
||||
|
||||
@@ -1876,12 +1876,14 @@ mod tests {
|
||||
let alter_kind = AlterKind::SetIndex {
|
||||
options: SetIndexOptions::Fulltext {
|
||||
column_name: "my_tag_first".to_string(),
|
||||
options: FulltextOptions {
|
||||
enable: true,
|
||||
analyzer: FulltextAnalyzer::Chinese,
|
||||
case_sensitive: true,
|
||||
backend: FulltextBackend::Bloom,
|
||||
},
|
||||
options: FulltextOptions::new_unchecked(
|
||||
true,
|
||||
FulltextAnalyzer::Chinese,
|
||||
true,
|
||||
FulltextBackend::Bloom,
|
||||
1000,
|
||||
0.01,
|
||||
),
|
||||
},
|
||||
};
|
||||
let new_meta = new_meta
|
||||
|
||||
Reference in New Issue
Block a user