mirror of
https://github.com/GreptimeTeam/greptimedb.git
synced 2026-05-26 18:00:41 +00:00
feat(mito): add options to ignore building index for specific column ids (#3295)
Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
@@ -24,6 +24,7 @@ use crate::cache::write_cache::SstUploadRequest;
|
||||
use crate::cache::CacheManagerRef;
|
||||
use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
|
||||
use crate::read::Source;
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::{FileHandle, FileId, FileMeta};
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
use crate::sst::index::IndexerBuilder;
|
||||
@@ -143,6 +144,7 @@ impl AccessLayer {
|
||||
row_group_size: write_opts.row_group_size,
|
||||
object_store: self.object_store.clone(),
|
||||
intermediate_manager: self.intermediate_manager.clone(),
|
||||
index_options: request.index_options,
|
||||
}
|
||||
.build();
|
||||
let mut writer = ParquetWriter::new(
|
||||
@@ -187,6 +189,8 @@ pub(crate) struct SstWriteRequest {
|
||||
pub(crate) mem_threshold_index_create: Option<usize>,
|
||||
/// The size of write buffer for index.
|
||||
pub(crate) index_write_buffer_size: Option<usize>,
|
||||
/// The options of the index for the region.
|
||||
pub(crate) index_options: IndexOptions,
|
||||
}
|
||||
|
||||
/// Creates a fs object store with atomic write dir.
|
||||
|
||||
4
src/mito2/src/cache/write_cache.rs
vendored
4
src/mito2/src/cache/write_cache.rs
vendored
@@ -118,6 +118,7 @@ impl WriteCache {
|
||||
row_group_size: write_opts.row_group_size,
|
||||
object_store: self.file_cache.local_store(),
|
||||
intermediate_manager: self.intermediate_manager.clone(),
|
||||
index_options: write_request.index_options,
|
||||
}
|
||||
.build();
|
||||
|
||||
@@ -235,6 +236,7 @@ mod tests {
|
||||
use super::*;
|
||||
use crate::cache::test_util::new_fs_store;
|
||||
use crate::cache::CacheManager;
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::location::{index_file_path, sst_file_path};
|
||||
use crate::sst::parquet::reader::ParquetReaderBuilder;
|
||||
@@ -279,6 +281,7 @@ mod tests {
|
||||
mem_threshold_index_create: None,
|
||||
index_write_buffer_size: None,
|
||||
cache_manager: Default::default(),
|
||||
index_options: IndexOptions::default(),
|
||||
};
|
||||
|
||||
let upload_request = SstUploadRequest {
|
||||
@@ -363,6 +366,7 @@ mod tests {
|
||||
mem_threshold_index_create: None,
|
||||
index_write_buffer_size: None,
|
||||
cache_manager: cache_manager.clone(),
|
||||
index_options: IndexOptions::default(),
|
||||
};
|
||||
let write_opts = WriteOptions {
|
||||
row_group_size: 512,
|
||||
|
||||
@@ -37,6 +37,7 @@ use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED};
|
||||
use crate::read::projection::ProjectionMapper;
|
||||
use crate::read::seq_scan::SeqScan;
|
||||
use crate::read::{BoxedBatchReader, Source};
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::request::{
|
||||
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
|
||||
};
|
||||
@@ -186,6 +187,7 @@ impl Picker for TwcsPicker {
|
||||
start_time,
|
||||
cache_manager,
|
||||
storage: current_version.options.storage.clone(),
|
||||
index_options: current_version.options.index_options.clone(),
|
||||
};
|
||||
Some(Box::new(task))
|
||||
}
|
||||
@@ -251,6 +253,8 @@ pub(crate) struct TwcsCompactionTask {
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
/// Target storage of the region.
|
||||
pub(crate) storage: Option<String>,
|
||||
/// Index options of the region.
|
||||
pub(crate) index_options: IndexOptions,
|
||||
}
|
||||
|
||||
impl Debug for TwcsCompactionTask {
|
||||
@@ -327,6 +331,7 @@ impl TwcsCompactionTask {
|
||||
let file_id = output.output_file_id;
|
||||
let cache_manager = self.cache_manager.clone();
|
||||
let storage = self.storage.clone();
|
||||
let index_options = self.index_options.clone();
|
||||
futs.push(async move {
|
||||
let reader =
|
||||
build_sst_reader(metadata.clone(), sst_layer.clone(), &output.inputs).await?;
|
||||
@@ -341,6 +346,7 @@ impl TwcsCompactionTask {
|
||||
create_inverted_index,
|
||||
mem_threshold_index_create,
|
||||
index_write_buffer_size,
|
||||
index_options,
|
||||
},
|
||||
&write_opts,
|
||||
)
|
||||
|
||||
@@ -34,6 +34,7 @@ use crate::error::{
|
||||
use crate::memtable::MemtableBuilderRef;
|
||||
use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL};
|
||||
use crate::read::Source;
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::region::version::{VersionControlData, VersionControlRef, VersionRef};
|
||||
use crate::request::{
|
||||
BackgroundNotify, FlushFailed, FlushFinished, OptionOutputTx, OutputTx, SenderDdlRequest,
|
||||
@@ -203,6 +204,9 @@ pub(crate) struct RegionFlushTask {
|
||||
pub(crate) engine_config: Arc<MitoConfig>,
|
||||
pub(crate) row_group_size: Option<usize>,
|
||||
pub(crate) cache_manager: CacheManagerRef,
|
||||
|
||||
/// Index options for the region.
|
||||
pub(crate) index_options: IndexOptions,
|
||||
}
|
||||
|
||||
impl RegionFlushTask {
|
||||
@@ -338,6 +342,7 @@ impl RegionFlushTask {
|
||||
create_inverted_index,
|
||||
mem_threshold_index_create,
|
||||
index_write_buffer_size,
|
||||
index_options: self.index_options.clone(),
|
||||
};
|
||||
let Some(sst_info) = self
|
||||
.access_layer
|
||||
@@ -766,6 +771,7 @@ mod tests {
|
||||
engine_config: Arc::new(MitoConfig::default()),
|
||||
row_group_size: None,
|
||||
cache_manager: Arc::new(CacheManager::default()),
|
||||
index_options: IndexOptions::default(),
|
||||
};
|
||||
task.push_sender(OptionOutputTx::from(output_tx));
|
||||
scheduler
|
||||
|
||||
@@ -259,6 +259,14 @@ impl ScanRegion {
|
||||
self.access_layer.object_store().clone(),
|
||||
file_cache,
|
||||
self.version.metadata.as_ref(),
|
||||
self.version
|
||||
.options
|
||||
.index_options
|
||||
.inverted_index
|
||||
.ignore_column_ids
|
||||
.iter()
|
||||
.copied()
|
||||
.collect(),
|
||||
)
|
||||
.build(&self.request.filters)
|
||||
.inspect_err(|err| warn!(err; "Failed to build index applier"))
|
||||
|
||||
@@ -18,10 +18,12 @@ use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
|
||||
use common_wal::options::{WalOptions, WAL_OPTIONS_KEY};
|
||||
use serde::Deserialize;
|
||||
use serde::de::Error as _;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use serde_json::Value;
|
||||
use serde_with::{serde_as, with_prefix, DisplayFromStr};
|
||||
use snafu::ResultExt;
|
||||
use store_api::storage::ColumnId;
|
||||
|
||||
use crate::error::{Error, JsonOptionsSnafu, Result};
|
||||
|
||||
@@ -40,6 +42,8 @@ pub struct RegionOptions {
|
||||
pub storage: Option<String>,
|
||||
/// Wal options.
|
||||
pub wal_options: WalOptions,
|
||||
/// Index options.
|
||||
pub index_options: IndexOptions,
|
||||
}
|
||||
|
||||
impl TryFrom<&HashMap<String, String>> for RegionOptions {
|
||||
@@ -64,11 +68,14 @@ impl TryFrom<&HashMap<String, String>> for RegionOptions {
|
||||
},
|
||||
)?;
|
||||
|
||||
let index_options: IndexOptions = serde_json::from_str(&json).context(JsonOptionsSnafu)?;
|
||||
|
||||
Ok(RegionOptions {
|
||||
ttl: options.ttl,
|
||||
compaction,
|
||||
storage: options.storage,
|
||||
wal_options,
|
||||
index_options,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -152,6 +159,40 @@ impl Default for RegionOptionsWithoutEnum {
|
||||
}
|
||||
}
|
||||
|
||||
with_prefix!(prefix_inverted_index "index.inverted_index.");
|
||||
|
||||
/// Options for index.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct IndexOptions {
|
||||
/// Options for the inverted index.
|
||||
#[serde(flatten, with = "prefix_inverted_index")]
|
||||
pub inverted_index: InvertedIndexOptions,
|
||||
}
|
||||
|
||||
/// Options for the inverted index.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct InvertedIndexOptions {
|
||||
/// The column ids that should be ignored when building the inverted index.
|
||||
/// The column ids are separated by commas. For example, "1,2,3".
|
||||
#[serde(deserialize_with = "deserialize_ignore_column_ids")]
|
||||
pub ignore_column_ids: Vec<ColumnId>,
|
||||
}
|
||||
|
||||
fn deserialize_ignore_column_ids<'de, D>(deserializer: D) -> Result<Vec<ColumnId>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let s: String = Deserialize::deserialize(deserializer)?;
|
||||
let mut column_ids = Vec::new();
|
||||
for item in s.split(',') {
|
||||
let column_id = item.parse().map_err(D::Error::custom)?;
|
||||
column_ids.push(column_id);
|
||||
}
|
||||
Ok(column_ids)
|
||||
}
|
||||
|
||||
/// Converts the `options` map to a json object.
|
||||
///
|
||||
/// Converts all key-values to lowercase and replaces "null" strings by `null` json values.
|
||||
@@ -257,6 +298,21 @@ mod tests {
|
||||
expect == got
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_index() {
|
||||
let map = make_map(&[("index.inverted_index.ignore_column_ids", "1,2,3")]);
|
||||
let options = RegionOptions::try_from(&map).unwrap();
|
||||
let expect = RegionOptions {
|
||||
index_options: IndexOptions {
|
||||
inverted_index: InvertedIndexOptions {
|
||||
ignore_column_ids: vec![1, 2, 3],
|
||||
},
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(expect, options);
|
||||
}
|
||||
|
||||
// No need to add compatible tests for RegionOptions since the above tests already check for compatibility.
|
||||
#[test]
|
||||
fn test_with_any_wal_options() {
|
||||
@@ -281,6 +337,7 @@ mod tests {
|
||||
("compaction.twcs.time_window", "2h"),
|
||||
("compaction.type", "twcs"),
|
||||
("storage", "S3"),
|
||||
("index.inverted_index.ignore_column_ids", "1,2,3"),
|
||||
(
|
||||
WAL_OPTIONS_KEY,
|
||||
&serde_json::to_string(&wal_options).unwrap(),
|
||||
@@ -296,6 +353,11 @@ mod tests {
|
||||
}),
|
||||
storage: Some("s3".to_string()),
|
||||
wal_options,
|
||||
index_options: IndexOptions {
|
||||
inverted_index: InvertedIndexOptions {
|
||||
ignore_column_ids: vec![1, 2, 3],
|
||||
},
|
||||
},
|
||||
};
|
||||
assert_eq!(expect, options);
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ use store_api::metadata::RegionMetadataRef;
|
||||
use store_api::storage::RegionId;
|
||||
|
||||
use crate::read::Batch;
|
||||
use crate::region::options::IndexOptions;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::index::intermediate::IntermediateManager;
|
||||
|
||||
@@ -132,6 +133,7 @@ pub(crate) struct IndexerBuilder<'a> {
|
||||
pub(crate) segment_row_count: usize,
|
||||
pub(crate) object_store: ObjectStore,
|
||||
pub(crate) intermediate_manager: IntermediateManager,
|
||||
pub(crate) index_options: IndexOptions,
|
||||
}
|
||||
|
||||
impl<'a> IndexerBuilder<'a> {
|
||||
@@ -184,7 +186,15 @@ impl<'a> IndexerBuilder<'a> {
|
||||
self.mem_threshold_index_create,
|
||||
segment_row_count,
|
||||
)
|
||||
.with_buffer_size(self.write_buffer_size);
|
||||
.with_buffer_size(self.write_buffer_size)
|
||||
.with_ignore_column_ids(
|
||||
self.index_options
|
||||
.inverted_index
|
||||
.ignore_column_ids
|
||||
.iter()
|
||||
.map(|i| i.to_string())
|
||||
.collect(),
|
||||
);
|
||||
|
||||
Indexer {
|
||||
file_id: self.file_id,
|
||||
@@ -281,6 +291,7 @@ mod tests {
|
||||
row_group_size: 1024,
|
||||
object_store: mock_object_store(),
|
||||
intermediate_manager: mock_intm_mgr(),
|
||||
index_options: IndexOptions::default(),
|
||||
}
|
||||
.build();
|
||||
|
||||
@@ -301,6 +312,7 @@ mod tests {
|
||||
row_group_size: 1024,
|
||||
object_store: mock_object_store(),
|
||||
intermediate_manager: mock_intm_mgr(),
|
||||
index_options: IndexOptions::default(),
|
||||
}
|
||||
.build();
|
||||
|
||||
@@ -321,6 +333,7 @@ mod tests {
|
||||
row_group_size: 1024,
|
||||
object_store: mock_object_store(),
|
||||
intermediate_manager: mock_intm_mgr(),
|
||||
index_options: IndexOptions::default(),
|
||||
}
|
||||
.build();
|
||||
|
||||
@@ -341,6 +354,7 @@ mod tests {
|
||||
row_group_size: 0,
|
||||
object_store: mock_object_store(),
|
||||
intermediate_manager: mock_intm_mgr(),
|
||||
index_options: IndexOptions::default(),
|
||||
}
|
||||
.build();
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ mod eq_list;
|
||||
mod in_list;
|
||||
mod regex_match;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use api::v1::SemanticType;
|
||||
use common_query::logical_plan::Expr;
|
||||
@@ -54,6 +54,9 @@ pub(crate) struct SstIndexApplierBuilder<'a> {
|
||||
/// Metadata of the region, used to get metadata like column type.
|
||||
metadata: &'a RegionMetadata,
|
||||
|
||||
/// Column ids to ignore.
|
||||
ignore_column_ids: HashSet<ColumnId>,
|
||||
|
||||
/// Stores predicates during traversal on the Expr tree.
|
||||
output: HashMap<ColumnId, Vec<Predicate>>,
|
||||
}
|
||||
@@ -65,12 +68,14 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
object_store: ObjectStore,
|
||||
file_cache: Option<FileCacheRef>,
|
||||
metadata: &'a RegionMetadata,
|
||||
ignore_column_ids: HashSet<ColumnId>,
|
||||
) -> Self {
|
||||
Self {
|
||||
region_dir,
|
||||
object_store,
|
||||
file_cache,
|
||||
metadata,
|
||||
ignore_column_ids,
|
||||
output: HashMap::default(),
|
||||
}
|
||||
}
|
||||
@@ -138,7 +143,7 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
}
|
||||
|
||||
/// Helper function to get the column id and the column type of a tag column.
|
||||
/// Returns `None` if the column is not a tag column.
|
||||
/// Returns `None` if the column is not a tag column or if the column is ignored.
|
||||
fn tag_column_id_and_type(
|
||||
&self,
|
||||
column_name: &str,
|
||||
@@ -150,8 +155,18 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
column: column_name,
|
||||
})?;
|
||||
|
||||
Ok((column.semantic_type == SemanticType::Tag)
|
||||
.then(|| (column.column_id, column.column_schema.data_type.clone())))
|
||||
if self.ignore_column_ids.contains(&column.column_id) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if column.semantic_type != SemanticType::Tag {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(Some((
|
||||
column.column_id,
|
||||
column.column_schema.data_type.clone(),
|
||||
)))
|
||||
}
|
||||
|
||||
/// Helper function to get a non-null literal.
|
||||
@@ -293,8 +308,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_and_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(DfExpr::BinaryExpr(BinaryExpr {
|
||||
|
||||
@@ -58,6 +58,8 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::sst::index::applier::builder::tests::{
|
||||
@@ -68,8 +70,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_between_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let between = Between {
|
||||
negated: false,
|
||||
@@ -102,8 +109,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_between_negated() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let between = Between {
|
||||
negated: true,
|
||||
@@ -119,8 +131,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_between_field_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let between = Between {
|
||||
negated: false,
|
||||
@@ -136,8 +153,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_between_type_mismatch() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let between = Between {
|
||||
negated: false,
|
||||
@@ -154,8 +176,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_between_nonexistent_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let between = Between {
|
||||
negated: false,
|
||||
|
||||
@@ -130,6 +130,8 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::sst::index::applier::builder::tests::{
|
||||
@@ -223,8 +225,13 @@ mod tests {
|
||||
];
|
||||
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
for ((left, op, right), _) in &cases {
|
||||
builder.collect_comparison_expr(left, op, right).unwrap();
|
||||
@@ -243,8 +250,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_comparison_type_mismatch() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let res = builder.collect_comparison_expr(&tag_column(), &Operator::Lt, &int64_lit(10));
|
||||
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
|
||||
@@ -254,8 +266,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_comparison_field_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_comparison_expr(&field_column(), &Operator::Lt, &string_lit("abc"))
|
||||
@@ -266,8 +283,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_comparison_nonexistent_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let res = builder.collect_comparison_expr(
|
||||
&nonexistent_column(),
|
||||
|
||||
@@ -132,8 +132,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_eq_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_eq(&tag_column(), &string_lit("foo"))
|
||||
@@ -161,8 +166,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_eq_field_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_eq(&field_column(), &string_lit("abc"))
|
||||
@@ -173,8 +183,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_eq_nonexistent_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let res = builder.collect_eq(&nonexistent_column(), &string_lit("abc"));
|
||||
assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
|
||||
@@ -184,8 +199,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_eq_type_mismatch() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let res = builder.collect_eq(&tag_column(), &int64_lit(1));
|
||||
assert!(matches!(res, Err(Error::FieldTypeMismatch { .. })));
|
||||
@@ -195,8 +215,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_or_eq_list_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let eq_expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(tag_column()),
|
||||
@@ -245,8 +270,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_or_eq_list_invalid_op() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let eq_expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(tag_column()),
|
||||
@@ -274,8 +304,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_or_eq_list_multiple_columns() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let eq_expr = DfExpr::BinaryExpr(BinaryExpr {
|
||||
left: Box::new(tag_column()),
|
||||
|
||||
@@ -63,8 +63,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_in_list_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let in_list = InList {
|
||||
expr: Box::new(tag_column()),
|
||||
@@ -87,8 +92,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_in_list_negated() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let in_list = InList {
|
||||
expr: Box::new(tag_column()),
|
||||
@@ -103,8 +113,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_in_list_field_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let in_list = InList {
|
||||
expr: Box::new(field_column()),
|
||||
@@ -119,8 +134,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_in_list_type_mismatch() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let in_list = InList {
|
||||
expr: Box::new(tag_column()),
|
||||
@@ -136,8 +156,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_collect_in_list_nonexistent_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let in_list = InList {
|
||||
expr: Box::new(nonexistent_column()),
|
||||
|
||||
@@ -45,6 +45,8 @@ impl<'a> SstIndexApplierBuilder<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
use crate::sst::index::applier::builder::tests::{
|
||||
@@ -55,8 +57,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_regex_match_basic() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_regex_match(&tag_column(), &string_lit("abc"))
|
||||
@@ -75,8 +82,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_regex_match_field_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_regex_match(&field_column(), &string_lit("abc"))
|
||||
@@ -88,8 +100,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_regex_match_type_mismatch() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
builder
|
||||
.collect_regex_match(&tag_column(), &int64_lit(123))
|
||||
@@ -101,8 +118,13 @@ mod tests {
|
||||
#[test]
|
||||
fn test_regex_match_type_nonexist_column() {
|
||||
let metadata = test_region_metadata();
|
||||
let mut builder =
|
||||
SstIndexApplierBuilder::new("test".to_string(), test_object_store(), None, &metadata);
|
||||
let mut builder = SstIndexApplierBuilder::new(
|
||||
"test".to_string(),
|
||||
test_object_store(),
|
||||
None,
|
||||
&metadata,
|
||||
HashSet::default(),
|
||||
);
|
||||
|
||||
let res = builder.collect_regex_match(&nonexistent_column(), &string_lit("abc"));
|
||||
assert!(matches!(res, Err(Error::ColumnNotFound { .. })));
|
||||
|
||||
@@ -37,7 +37,7 @@ impl IndexValueCodec {
|
||||
}
|
||||
}
|
||||
|
||||
type ColumnId = String;
|
||||
pub(crate) type ColumnId = String;
|
||||
|
||||
/// Decodes primary key values into their corresponding column ids, data types and values.
|
||||
pub struct IndexValuesCodec {
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
mod statistics;
|
||||
mod temp_provider;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
|
||||
@@ -40,7 +40,7 @@ use crate::metrics::{
|
||||
};
|
||||
use crate::read::Batch;
|
||||
use crate::sst::file::FileId;
|
||||
use crate::sst::index::codec::{IndexValueCodec, IndexValuesCodec};
|
||||
use crate::sst::index::codec::{ColumnId, IndexValueCodec, IndexValuesCodec};
|
||||
use crate::sst::index::creator::statistics::Statistics;
|
||||
use crate::sst::index::creator::temp_provider::TempFileProvider;
|
||||
use crate::sst::index::intermediate::{IntermediateLocation, IntermediateManager};
|
||||
@@ -72,6 +72,9 @@ pub struct SstIndexCreator {
|
||||
stats: Statistics,
|
||||
/// Whether the index creation is aborted.
|
||||
aborted: bool,
|
||||
|
||||
/// Ignore column IDs for index creation.
|
||||
ignore_column_ids: HashSet<ColumnId>,
|
||||
}
|
||||
|
||||
impl SstIndexCreator {
|
||||
@@ -110,6 +113,8 @@ impl SstIndexCreator {
|
||||
|
||||
stats: Statistics::default(),
|
||||
aborted: false,
|
||||
|
||||
ignore_column_ids: HashSet::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,6 +124,12 @@ impl SstIndexCreator {
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets the ignore column IDs for index creation.
|
||||
pub fn with_ignore_column_ids(mut self, ignore_column_ids: HashSet<ColumnId>) -> Self {
|
||||
self.ignore_column_ids = ignore_column_ids;
|
||||
self
|
||||
}
|
||||
|
||||
/// Updates index with a batch of rows.
|
||||
/// Garbage will be cleaned up if failed to update.
|
||||
pub async fn update(&mut self, batch: &Batch) -> Result<()> {
|
||||
@@ -189,6 +200,10 @@ impl SstIndexCreator {
|
||||
guard.inc_row_count(n);
|
||||
|
||||
for (column_id, field, value) in self.codec.decode(batch.primary_key())? {
|
||||
if self.ignore_column_ids.contains(column_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(value) = value.as_ref() {
|
||||
self.value_buf.clear();
|
||||
IndexValueCodec::encode_value(value.as_value_ref(), field, &mut self.value_buf)?;
|
||||
|
||||
@@ -149,6 +149,7 @@ impl<S> RegionWorkerLoop<S> {
|
||||
engine_config,
|
||||
row_group_size,
|
||||
cache_manager: self.cache_manager.clone(),
|
||||
index_options: region.version().options.index_options.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user