feat(metric-engine): set index options for data region (#3330)

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
This commit is contained in:
Zhenchi
2024-02-20 10:38:35 +08:00
committed by GitHub
parent 43fd87e051
commit 6628c41c36
6 changed files with 56 additions and 1 deletions

1
Cargo.lock generated
View File

@@ -5210,6 +5210,7 @@ dependencies = [
"common-time",
"datafusion",
"datatypes",
"itertools 0.10.5",
"lazy_static",
"mito2",
"mur3",

View File

@@ -17,6 +17,7 @@ common-telemetry.workspace = true
common-time.workspace = true
datafusion.workspace = true
datatypes.workspace = true
itertools.workspace = true
lazy_static = "1.4"
mito2.workspace = true
mur3 = "0.1"

View File

@@ -17,6 +17,7 @@ mod close;
mod create;
mod drop;
mod open;
mod options;
mod put;
mod read;
mod region_metadata;

View File

@@ -36,6 +36,7 @@ use store_api::region_request::{AffectedRows, RegionCreateRequest, RegionRequest
use store_api::storage::consts::ReservedColumnId;
use store_api::storage::RegionId;
use crate::engine::options::set_index_options_for_data_region;
use crate::engine::MetricEngineInner;
use crate::error::{
ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu,
@@ -376,6 +377,9 @@ impl MetricEngineInner {
data_region_request.primary_key =
vec![ReservedColumnId::table_id(), ReservedColumnId::tsid()];
// set index options
set_index_options_for_data_region(&mut data_region_request.options);
data_region_request
}

View File

@@ -26,6 +26,7 @@ use store_api::region_request::{AffectedRows, RegionOpenRequest, RegionRequest};
use store_api::storage::RegionId;
use super::MetricEngineInner;
use crate::engine::options::set_index_options_for_data_region;
use crate::error::{OpenMitoRegionSnafu, Result};
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_REGION_COUNT};
use crate::utils;
@@ -77,9 +78,12 @@ impl MetricEngineInner {
engine: MITO_ENGINE_NAME.to_string(),
skip_wal_replay: request.skip_wal_replay,
};
let mut data_region_options = request.options;
set_index_options_for_data_region(&mut data_region_options);
let open_data_region_request = RegionOpenRequest {
region_dir: data_region_dir,
options: request.options.clone(),
options: data_region_options,
engine: MITO_ENGINE_NAME.to_string(),
skip_wal_replay: request.skip_wal_replay,
};

View File

@@ -0,0 +1,44 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Specific options for the metric engine to create or open a region.
use std::collections::HashMap;
use itertools::Itertools as _;
use store_api::storage::consts::ReservedColumnId;
use store_api::storage::ColumnId;
/// Ignore building index on the column `tsid` which is unfriendly to the inverted index and
/// will occupy excessive space if indexed.
const IGNORE_COLUMN_IDS_FOR_DATA_REGION: [ColumnId; 1] = [ReservedColumnId::tsid()];
/// The empirical value for the seg row count of the metric data region.
/// Compared to the mito engine, the pattern of the metric engine constructs smaller indices.
/// Therefore, compared to the default seg row count of 1024, by adjusting it to a smaller
/// value and appropriately increasing the size of the index, it results in an improved indexing effect.
const SEG_ROW_COUNT_FOR_DATA_REGION: u32 = 256;
/// Set the index options for the data region.
pub fn set_index_options_for_data_region(options: &mut HashMap<String, String>) {
options.insert(
"index.inverted_index.ignore_column_ids".to_string(),
IGNORE_COLUMN_IDS_FOR_DATA_REGION.iter().join(","),
);
options.insert(
"index.inverted_index.segment_row_count".to_string(),
SEG_ROW_COUNT_FOR_DATA_REGION.to_string(),
);
}