mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-29 19:30:37 +00:00
feat: add granularity and false_positive_rate options for indexes (#6416)
* feat: add `granularity` and `false_positive_rate` options for indexes Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * address comments Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> * upgrade proto Signed-off-by: Zhenchi <zhongzc_arch@outlook.com> --------- Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -31,9 +31,10 @@ pub use crate::schema::column_schema::{
|
||||
ColumnSchema, FulltextAnalyzer, FulltextBackend, FulltextOptions, Metadata,
|
||||
SkippingIndexOptions, SkippingIndexType, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_BACKEND,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY,
|
||||
SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_FULLTEXT_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE,
|
||||
COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY,
|
||||
FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, TIME_INDEX_KEY,
|
||||
};
|
||||
pub use crate::schema::constraint::ColumnDefaultConstraint;
|
||||
pub use crate::schema::raw::RawSchema;
|
||||
|
||||
@@ -47,13 +47,18 @@ pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_BACKEND: &str = "backend";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
|
||||
|
||||
/// Keys used in SKIPPING index options
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE: &str = "false_positive_rate";
|
||||
pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type";
|
||||
|
||||
pub const DEFAULT_GRANULARITY: u32 = 10240;
|
||||
|
||||
pub const DEFAULT_FALSE_POSITIVE_RATE: f64 = 0.01;
|
||||
|
||||
/// Schema of a column, used as an immutable struct.
|
||||
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ColumnSchema {
|
||||
@@ -504,7 +509,7 @@ impl TryFrom<&ColumnSchema> for Field {
|
||||
}
|
||||
|
||||
/// Fulltext options for a column.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub struct FulltextOptions {
|
||||
/// Whether the fulltext index is enabled.
|
||||
@@ -518,6 +523,92 @@ pub struct FulltextOptions {
|
||||
/// The fulltext backend to use.
|
||||
#[serde(default)]
|
||||
pub backend: FulltextBackend,
|
||||
/// The granularity of the fulltext index (for bloom backend only)
|
||||
#[serde(default = "fulltext_options_default_granularity")]
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the fulltext index (for bloom backend only)
|
||||
#[serde(default = "fulltext_options_default_false_positive_rate_in_10000")]
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
}
|
||||
|
||||
fn fulltext_options_default_granularity() -> u32 {
|
||||
DEFAULT_GRANULARITY
|
||||
}
|
||||
|
||||
fn fulltext_options_default_false_positive_rate_in_10000() -> u32 {
|
||||
(DEFAULT_FALSE_POSITIVE_RATE * 10000.0) as u32
|
||||
}
|
||||
|
||||
impl FulltextOptions {
|
||||
/// Creates a new fulltext options.
|
||||
pub fn new(
|
||||
enable: bool,
|
||||
analyzer: FulltextAnalyzer,
|
||||
case_sensitive: bool,
|
||||
backend: FulltextBackend,
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
) -> Result<Self> {
|
||||
ensure!(
|
||||
0.0 < false_positive_rate && false_positive_rate <= 1.0,
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
granularity > 0,
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
|
||||
}
|
||||
);
|
||||
Ok(Self::new_unchecked(
|
||||
enable,
|
||||
analyzer,
|
||||
case_sensitive,
|
||||
backend,
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
))
|
||||
}
|
||||
|
||||
/// Creates a new fulltext options without checking `false_positive_rate` and `granularity`.
|
||||
pub fn new_unchecked(
|
||||
enable: bool,
|
||||
analyzer: FulltextAnalyzer,
|
||||
case_sensitive: bool,
|
||||
backend: FulltextBackend,
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
) -> Self {
|
||||
Self {
|
||||
enable,
|
||||
analyzer,
|
||||
case_sensitive,
|
||||
backend,
|
||||
granularity,
|
||||
false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the false positive rate.
|
||||
pub fn false_positive_rate(&self) -> f64 {
|
||||
self.false_positive_rate_in_10000 as f64 / 10000.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FulltextOptions {
|
||||
fn default() -> Self {
|
||||
Self::new_unchecked(
|
||||
false,
|
||||
FulltextAnalyzer::default(),
|
||||
false,
|
||||
FulltextBackend::default(),
|
||||
DEFAULT_GRANULARITY,
|
||||
DEFAULT_FALSE_POSITIVE_RATE,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for FulltextOptions {
|
||||
@@ -527,6 +618,10 @@ impl fmt::Display for FulltextOptions {
|
||||
write!(f, ", analyzer={}", self.analyzer)?;
|
||||
write!(f, ", case_sensitive={}", self.case_sensitive)?;
|
||||
write!(f, ", backend={}", self.backend)?;
|
||||
if self.backend == FulltextBackend::Bloom {
|
||||
write!(f, ", granularity={}", self.granularity)?;
|
||||
write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -611,6 +706,45 @@ impl TryFrom<HashMap<String, String>> for FulltextOptions {
|
||||
}
|
||||
}
|
||||
|
||||
if fulltext_options.backend == FulltextBackend::Bloom {
|
||||
// Parse granularity with default value 10240
|
||||
let granularity = match options.get(COLUMN_FULLTEXT_OPT_KEY_GRANULARITY) {
|
||||
Some(value) => value
|
||||
.parse::<u32>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid granularity: {value}, expected: positive integer"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_GRANULARITY,
|
||||
};
|
||||
fulltext_options.granularity = granularity;
|
||||
|
||||
// Parse false positive rate with default value 0.01
|
||||
let false_positive_rate = match options.get(COLUMN_FULLTEXT_OPT_KEY_FALSE_POSITIVE_RATE)
|
||||
{
|
||||
Some(value) => value
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0.0 && v <= 1.0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidFulltextOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_FALSE_POSITIVE_RATE,
|
||||
};
|
||||
fulltext_options.false_positive_rate_in_10000 = (false_positive_rate * 10000.0) as u32;
|
||||
}
|
||||
|
||||
Ok(fulltext_options)
|
||||
}
|
||||
}
|
||||
@@ -638,23 +772,72 @@ impl fmt::Display for FulltextAnalyzer {
|
||||
pub struct SkippingIndexOptions {
|
||||
/// The granularity of the skip index.
|
||||
pub granularity: u32,
|
||||
/// The false positive rate of the skip index (in ten-thousandths, e.g., 100 = 1%).
|
||||
pub false_positive_rate_in_10000: u32,
|
||||
/// The type of the skip index.
|
||||
#[serde(default)]
|
||||
pub index_type: SkippingIndexType,
|
||||
}
|
||||
|
||||
impl SkippingIndexOptions {
|
||||
/// Creates a new skipping index options without checking `false_positive_rate` and `granularity`.
|
||||
pub fn new_unchecked(
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
index_type: SkippingIndexType,
|
||||
) -> Self {
|
||||
Self {
|
||||
granularity,
|
||||
false_positive_rate_in_10000: (false_positive_rate * 10000.0) as u32,
|
||||
index_type,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new skipping index options.
|
||||
pub fn new(
|
||||
granularity: u32,
|
||||
false_positive_rate: f64,
|
||||
index_type: SkippingIndexType,
|
||||
) -> Result<Self> {
|
||||
ensure!(
|
||||
0.0 < false_positive_rate && false_positive_rate <= 1.0,
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid false positive rate: {false_positive_rate}, expected: 0.0 < rate <= 1.0"),
|
||||
}
|
||||
);
|
||||
ensure!(
|
||||
granularity > 0,
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {granularity}, expected: positive integer"),
|
||||
}
|
||||
);
|
||||
Ok(Self::new_unchecked(
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
index_type,
|
||||
))
|
||||
}
|
||||
|
||||
/// Gets the false positive rate.
|
||||
pub fn false_positive_rate(&self) -> f64 {
|
||||
self.false_positive_rate_in_10000 as f64 / 10000.0
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SkippingIndexOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
granularity: DEFAULT_GRANULARITY,
|
||||
index_type: SkippingIndexType::default(),
|
||||
}
|
||||
Self::new_unchecked(
|
||||
DEFAULT_GRANULARITY,
|
||||
DEFAULT_FALSE_POSITIVE_RATE,
|
||||
SkippingIndexType::default(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SkippingIndexOptions {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "granularity={}", self.granularity)?;
|
||||
write!(f, ", false_positive_rate={}", self.false_positive_rate())?;
|
||||
write!(f, ", index_type={}", self.index_type)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -681,15 +864,37 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
|
||||
fn try_from(options: HashMap<String, String>) -> Result<Self> {
|
||||
// Parse granularity with default value 1
|
||||
let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) {
|
||||
Some(value) => value.parse::<u32>().map_err(|_| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {value}, expected: positive integer"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
Some(value) => value
|
||||
.parse::<u32>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!("Invalid granularity: {value}, expected: positive integer"),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_GRANULARITY,
|
||||
};
|
||||
|
||||
// Parse false positive rate with default value 100
|
||||
let false_positive_rate =
|
||||
match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_FALSE_POSITIVE_RATE) {
|
||||
Some(value) => value
|
||||
.parse::<f64>()
|
||||
.ok()
|
||||
.filter(|&v| v > 0.0 && v <= 1.0)
|
||||
.ok_or_else(|| {
|
||||
error::InvalidSkippingIndexOptionSnafu {
|
||||
msg: format!(
|
||||
"Invalid false positive rate: {value}, expected: 0.0 < rate <= 1.0"
|
||||
),
|
||||
}
|
||||
.build()
|
||||
})?,
|
||||
None => DEFAULT_FALSE_POSITIVE_RATE,
|
||||
};
|
||||
|
||||
// Parse index type with default value BloomFilter
|
||||
let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) {
|
||||
Some(typ) => match typ.to_ascii_uppercase().as_str() {
|
||||
@@ -704,10 +909,11 @@ impl TryFrom<HashMap<String, String>> for SkippingIndexOptions {
|
||||
None => SkippingIndexType::default(),
|
||||
};
|
||||
|
||||
Ok(SkippingIndexOptions {
|
||||
Ok(SkippingIndexOptions::new_unchecked(
|
||||
granularity,
|
||||
false_positive_rate,
|
||||
index_type,
|
||||
})
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user