feat: support user config manifest compression (#1579)

* feat: support user config manifest compression

* chore: change style

* chore: enhance test
This commit is contained in:
WU Jingdi
2023-05-16 11:02:59 +08:00
committed by GitHub
parent 856ab5bea7
commit fb1ac0cb9c
18 changed files with 139 additions and 26 deletions

1
Cargo.lock generated
View File

@@ -5002,6 +5002,7 @@ dependencies = [
"async-trait",
"chrono",
"common-catalog",
"common-datasource",
"common-error",
"common-procedure",
"common-procedure-test",

View File

@@ -236,6 +236,7 @@ mod tests {
checkpoint_margin = 9
gc_duration = '7s'
checkpoint_on_startup = true
compress = true
[logging]
level = "debug"
@@ -295,6 +296,7 @@ mod tests {
checkpoint_margin: Some(9),
gc_duration: Some(Duration::from_secs(7)),
checkpoint_on_startup: true,
compress: true
},
options.storage.manifest,
);

View File

@@ -170,6 +170,8 @@ pub struct RegionManifestConfig {
pub gc_duration: Option<Duration>,
/// Whether to try creating a manifest checkpoint on region opening
pub checkpoint_on_startup: bool,
/// Whether to compress manifest and checkpoint file by gzip
pub compress: bool,
}
impl Default for RegionManifestConfig {
@@ -178,6 +180,7 @@ impl Default for RegionManifestConfig {
checkpoint_margin: Some(10u16),
gc_duration: Some(Duration::from_secs(30)),
checkpoint_on_startup: false,
compress: false,
}
}
}
@@ -246,6 +249,7 @@ impl From<&DatanodeOptions> for SchedulerConfig {
impl From<&DatanodeOptions> for StorageEngineConfig {
fn from(value: &DatanodeOptions) -> Self {
Self {
compress_manifest: value.storage.manifest.compress,
manifest_checkpoint_on_startup: value.storage.manifest.checkpoint_on_startup,
manifest_checkpoint_margin: value.storage.manifest.checkpoint_margin,
manifest_gc_duration: value.storage.manifest.gc_duration,

View File

@@ -116,7 +116,9 @@ impl Instance {
let log_store = Arc::new(create_log_store(&opts.wal).await?);
let mito_engine = Arc::new(DefaultEngine::new(
TableEngineConfig::default(),
TableEngineConfig {
compress_manifest: opts.storage.manifest.compress,
},
EngineImpl::new(
StorageEngineConfig::from(opts),
log_store.clone(),

View File

@@ -19,6 +19,7 @@ common-error = { path = "../common/error" }
common-procedure = { path = "../common/procedure" }
common-query = { path = "../common/query" }
common-recordbatch = { path = "../common/recordbatch" }
common-datasource = { path = "../common/datasource" }
common-telemetry = { path = "../common/telemetry" }
common-time = { path = "../common/time" }
dashmap = "5.4"

View File

@@ -15,4 +15,6 @@
//! Table Engine config
#[derive(Debug, Clone, Default)]
pub struct EngineConfig {}
pub struct EngineConfig {
pub compress_manifest: bool,
}

View File

@@ -21,6 +21,7 @@ use std::sync::Arc;
use async_trait::async_trait;
pub use common_catalog::consts::MITO_ENGINE;
use common_datasource::compression::CompressionType;
use common_error::ext::BoxedError;
use common_procedure::{BoxedProcedure, ProcedureManager};
use common_telemetry::{debug, logging};
@@ -29,6 +30,7 @@ use datatypes::schema::Schema;
use key_lock::KeyLock;
use object_store::ObjectStore;
use snafu::{ensure, OptionExt, ResultExt};
use storage::manifest::manifest_compress_type;
use store_api::storage::{
ColumnDescriptorBuilder, ColumnFamilyDescriptor, ColumnFamilyDescriptorBuilder, ColumnId,
EngineContext as StorageEngineContext, OpenOptions, RegionNumber, RowKeyDescriptor,
@@ -247,6 +249,7 @@ pub(crate) struct MitoEngineInner<S: StorageEngine> {
/// Writing to `tables` should also hold the `table_mutex`.
tables: DashMap<String, Arc<MitoTable<S::Region>>>,
object_store: ObjectStore,
compress_type: CompressionType,
storage_engine: S,
/// Table mutex is used to protect the operations such as creating/opening/closing
/// a table, to avoid things like opening the same table simultaneously.
@@ -638,6 +641,7 @@ impl<S: StorageEngine> MitoEngineInner<S> {
let manifest = MitoTable::<<S as StorageEngine>::Region>::build_manifest(
table_dir,
self.object_store.clone(),
self.compress_type,
);
let Some(table_info) =
MitoTable::<<S as StorageEngine>::Region>::recover_table_info(table_name, &manifest)
@@ -691,11 +695,12 @@ async fn close_table(lock: Arc<KeyLock<String>>, table: TableRef) -> TableResult
}
impl<S: StorageEngine> MitoEngineInner<S> {
fn new(_config: EngineConfig, storage_engine: S, object_store: ObjectStore) -> Self {
fn new(config: EngineConfig, storage_engine: S, object_store: ObjectStore) -> Self {
Self {
tables: DashMap::new(),
storage_engine,
object_store,
compress_type: manifest_compress_type(config.compress_manifest),
table_mutex: Arc::new(KeyLock::new()),
}
}

View File

@@ -367,6 +367,7 @@ impl<S: StorageEngine> TableCreator<S> {
table_info,
self.regions.clone(),
self.engine_inner.object_store.clone(),
self.engine_inner.compress_type,
)
.await?;

View File

@@ -46,7 +46,7 @@ pub type TableManifest = ManifestImpl<NoopCheckpoint, TableMetaActionList>;
#[cfg(test)]
mod tests {
use storage::manifest::MetaActionIteratorImpl;
use storage::manifest::{manifest_compress_type, MetaActionIteratorImpl};
use store_api::manifest::action::ProtocolAction;
use store_api::manifest::{Manifest, MetaActionIterator};
use table::metadata::{RawTableInfo, TableInfo};
@@ -77,10 +77,20 @@ mod tests {
}
#[tokio::test]
async fn test_table_manifest() {
async fn test_table_manifest_compress() {
test_table_manifest(true).await
}
#[tokio::test]
async fn test_table_manifest_uncompress() {
test_table_manifest(false).await
}
async fn test_table_manifest(compress: bool) {
let (_dir, object_store) = test_util::new_test_object_store("test_table_manifest").await;
let manifest = TableManifest::create("manifest/", object_store);
let manifest =
TableManifest::create("manifest/", object_store, manifest_compress_type(compress));
let mut iter = manifest.scan(0, 100).await.unwrap();
assert!(iter.next_action().await.unwrap().is_none());

View File

@@ -22,6 +22,7 @@ use std::sync::Arc;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use common_datasource::compression::CompressionType;
use common_error::ext::BoxedError;
use common_query::logical_plan::Expr;
use common_query::physical_plan::PhysicalPlanRef;
@@ -445,8 +446,10 @@ impl<R: Region> MitoTable<R> {
table_info: TableInfo,
regions: HashMap<RegionNumber, R>,
object_store: ObjectStore,
compress_type: CompressionType,
) -> Result<MitoTable<R>> {
let manifest = TableManifest::create(&table_manifest_dir(table_dir), object_store);
let manifest =
TableManifest::create(&table_manifest_dir(table_dir), object_store, compress_type);
let _timer =
common_telemetry::timer!(crate::metrics::MITO_CREATE_TABLE_UPDATE_MANIFEST_ELAPSED);
@@ -463,8 +466,12 @@ impl<R: Region> MitoTable<R> {
Ok(MitoTable::new(table_info, regions, manifest))
}
pub(crate) fn build_manifest(table_dir: &str, object_store: ObjectStore) -> TableManifest {
TableManifest::create(&table_manifest_dir(table_dir), object_store)
pub(crate) fn build_manifest(
table_dir: &str,
object_store: ObjectStore,
compress_type: CompressionType,
) -> TableManifest {
TableManifest::create(&table_manifest_dir(table_dir), object_store, compress_type)
}
pub(crate) async fn recover_table_info(

View File

@@ -30,6 +30,7 @@ pub const DEFAULT_PICKER_SCHEDULE_INTERVAL: u32 = 5 * 60 * 1000;
#[derive(Debug, Clone)]
pub struct EngineConfig {
pub manifest_checkpoint_on_startup: bool,
pub compress_manifest: bool,
pub manifest_checkpoint_margin: Option<u16>,
pub manifest_gc_duration: Option<Duration>,
pub max_files_in_l0: usize,
@@ -49,6 +50,7 @@ impl Default for EngineConfig {
fn default() -> Self {
Self {
manifest_checkpoint_on_startup: false,
compress_manifest: false,
manifest_checkpoint_margin: Some(10),
manifest_gc_duration: Some(Duration::from_secs(30)),
max_files_in_l0: 8,

View File

@@ -34,6 +34,7 @@ use crate::flush::{
FlushScheduler, FlushSchedulerRef, FlushStrategyRef, PickerConfig, SizeBasedStrategy,
};
use crate::manifest::region::RegionManifest;
use crate::manifest::storage::manifest_compress_type;
use crate::memtable::{DefaultMemtableBuilder, MemtableBuilderRef};
use crate::metadata::RegionMetadata;
use crate::region::{RegionImpl, StoreConfig};
@@ -449,6 +450,7 @@ impl<S: LogStore> EngineInner<S> {
let manifest = RegionManifest::with_checkpointer(
&manifest_dir,
self.object_store.clone(),
manifest_compress_type(config.compress_manifest),
config.manifest_checkpoint_margin,
config.manifest_gc_duration,
);

View File

@@ -23,3 +23,4 @@ pub(crate) mod storage;
pub mod test_utils;
pub use self::impl_::*;
pub use self::storage::manifest_compress_type;

View File

@@ -19,6 +19,7 @@ use std::time::Duration;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use common_datasource::compression::CompressionType;
use common_runtime::{RepeatedTask, TaskFunction};
use common_telemetry::{debug, logging, warn};
use object_store::ObjectStore;
@@ -52,11 +53,16 @@ impl<S: 'static + Checkpoint<Error = Error>, M: 'static + MetaAction<Error = Err
pub fn new(
manifest_dir: &str,
object_store: ObjectStore,
compress_type: CompressionType,
checkpoint_actions_margin: Option<u16>,
gc_duration: Option<Duration>,
checkpointer: Option<Arc<dyn Checkpointer<Checkpoint = S, MetaAction = M>>>,
) -> Self {
let inner = Arc::new(ManifestImplInner::new(manifest_dir, object_store));
let inner = Arc::new(ManifestImplInner::new(
manifest_dir,
object_store,
compress_type,
));
let gc_task = if checkpointer.is_some() {
// only start gc task when checkpoint is enabled.
Some(Arc::new(RepeatedTask::new(
@@ -79,8 +85,12 @@ impl<S: 'static + Checkpoint<Error = Error>, M: 'static + MetaAction<Error = Err
}
}
pub fn create(manifest_dir: &str, object_store: ObjectStore) -> Self {
Self::new(manifest_dir, object_store, None, None, None)
pub fn create(
manifest_dir: &str,
object_store: ObjectStore,
compress_type: CompressionType,
) -> Self {
Self::new(manifest_dir, object_store, compress_type, None, None, None)
}
#[inline]
@@ -275,11 +285,15 @@ impl<S: Checkpoint<Error = Error>, M: MetaAction<Error = Error>> TaskFunction<Er
}
impl<S: Checkpoint<Error = Error>, M: MetaAction<Error = Error>> ManifestImplInner<S, M> {
fn new(manifest_dir: &str, object_store: ObjectStore) -> Self {
fn new(manifest_dir: &str, object_store: ObjectStore, compress_type: CompressionType) -> Self {
let (reader_version, writer_version) = action::supported_protocol_version();
Self {
store: Arc::new(ManifestObjectStore::new(manifest_dir, object_store)),
store: Arc::new(ManifestObjectStore::new(
manifest_dir,
object_store,
compress_type,
)),
version: AtomicU64::new(0),
protocol: ArcSwap::new(Arc::new(ProtocolAction::new())),
supported_reader_version: reader_version,

View File

@@ -19,6 +19,7 @@ use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use common_datasource::compression::CompressionType;
use common_telemetry::{info, warn};
use object_store::ObjectStore;
use store_api::manifest::action::ProtocolAction;
@@ -148,12 +149,14 @@ impl RegionManifest {
pub fn with_checkpointer(
manifest_dir: &str,
object_store: ObjectStore,
compress_type: CompressionType,
checkpoint_actions_margin: Option<u16>,
gc_duration: Option<Duration>,
) -> Self {
Self::new(
manifest_dir,
object_store,
compress_type,
checkpoint_actions_margin,
gc_duration,
Some(Arc::new(RegionManifestCheckpointer {
@@ -186,19 +189,35 @@ mod tests {
use store_api::manifest::{Manifest, MetaActionIterator, MAX_VERSION};
use super::*;
use crate::manifest::manifest_compress_type;
use crate::manifest::test_utils::*;
use crate::metadata::RegionMetadata;
use crate::sst::FileId;
#[tokio::test]
async fn test_region_manifest() {
async fn test_region_manifest_compress() {
test_region_manifest(true).await
}
#[tokio::test]
async fn test_region_manifest_uncompress() {
test_region_manifest(false).await
}
async fn test_region_manifest(compress: bool) {
common_telemetry::init_default_ut_logging();
let tmp_dir = create_temp_dir("test_region_manifest");
let mut builder = Fs::default();
builder.root(&tmp_dir.path().to_string_lossy());
let object_store = ObjectStore::new(builder).unwrap().finish();
let manifest = RegionManifest::with_checkpointer("/manifest/", object_store, None, None);
let manifest = RegionManifest::with_checkpointer(
"/manifest/",
object_store,
manifest_compress_type(compress),
None,
None,
);
manifest.start().await.unwrap();
let region_meta = Arc::new(build_region_meta());
@@ -306,7 +325,16 @@ mod tests {
}
#[tokio::test]
async fn test_region_manifest_checkpoint() {
async fn test_region_manifest_checkpoint_compress() {
test_region_manifest_checkpoint(true).await
}
#[tokio::test]
async fn test_region_manifest_checkpoint_uncompress() {
test_region_manifest_checkpoint(false).await
}
async fn test_region_manifest_checkpoint(compress: bool) {
common_telemetry::init_default_ut_logging();
let tmp_dir = create_temp_dir("test_region_manifest_checkpoint");
let mut builder = Fs::default();
@@ -316,6 +344,7 @@ mod tests {
let manifest = RegionManifest::with_checkpointer(
"/manifest/",
object_store,
manifest_compress_type(compress),
None,
Some(Duration::from_millis(50)),
);

View File

@@ -39,11 +39,20 @@ lazy_static! {
}
const LAST_CHECKPOINT_FILE: &str = "_last_checkpoint";
const DEFAULT_MANIFEST_COMPRESSION_TYPE: CompressionType = CompressionType::Uncompressed;
const DEFAULT_MANIFEST_COMPRESSION_TYPE: CompressionType = CompressionType::Gzip;
/// Due to backward compatibility, it is possible that the user's manifest file has not been compressed.
/// So when we encounter problems, we need to fall back to `FALL_BACK_COMPRESS_TYPE` for processing.
const FALL_BACK_COMPRESS_TYPE: CompressionType = CompressionType::Uncompressed;
#[inline]
pub const fn manifest_compress_type(compress: bool) -> CompressionType {
if compress {
DEFAULT_MANIFEST_COMPRESSION_TYPE
} else {
FALL_BACK_COMPRESS_TYPE
}
}
#[inline]
pub fn delta_file(version: ManifestVersion) -> String {
format!("{version:020}.json")
@@ -133,11 +142,10 @@ pub struct ManifestObjectStore {
}
impl ManifestObjectStore {
pub fn new(path: &str, object_store: ObjectStore) -> Self {
pub fn new(path: &str, object_store: ObjectStore, compress_type: CompressionType) -> Self {
Self {
object_store,
//TODO: make it configurable
compress_type: DEFAULT_MANIFEST_COMPRESSION_TYPE,
compress_type,
path: util::normalize_dir(path),
}
}
@@ -528,7 +536,7 @@ mod tests {
let mut builder = Fs::default();
builder.root(&tmp_dir.path().to_string_lossy());
let object_store = ObjectStore::new(builder).unwrap().finish();
ManifestObjectStore::new("/", object_store)
ManifestObjectStore::new("/", object_store, CompressionType::Uncompressed)
}
#[test]

View File

@@ -42,6 +42,7 @@ use super::*;
use crate::chunk::ChunkReaderImpl;
use crate::file_purger::noop::NoopFilePurgeHandler;
use crate::manifest::action::{RegionChange, RegionMetaActionList};
use crate::manifest::manifest_compress_type;
use crate::manifest::test_utils::*;
use crate::memtable::DefaultMemtableBuilder;
use crate::scheduler::{LocalScheduler, SchedulerConfig};
@@ -301,7 +302,16 @@ async fn test_new_region() {
}
#[tokio::test]
async fn test_recover_region_manifets() {
async fn test_recover_region_manifets_compress() {
test_recover_region_manifets(true).await;
}
#[tokio::test]
async fn test_recover_region_manifets_uncompress() {
test_recover_region_manifets(false).await;
}
async fn test_recover_region_manifets(compress: bool) {
common_telemetry::init_default_ut_logging();
let tmp_dir = create_temp_dir("test_recover_region_manifets");
let memtable_builder = Arc::new(DefaultMemtableBuilder::default()) as _;
@@ -310,8 +320,13 @@ async fn test_recover_region_manifets() {
builder.root(&tmp_dir.path().to_string_lossy());
let object_store = ObjectStore::new(builder).unwrap().finish();
let manifest =
RegionManifest::with_checkpointer("/manifest/", object_store.clone(), None, None);
let manifest = RegionManifest::with_checkpointer(
"/manifest/",
object_store.clone(),
manifest_compress_type(compress),
None,
None,
);
let region_meta = Arc::new(build_region_meta());
let sst_layer = Arc::new(FsAccessLayer::new("sst", object_store)) as _;

View File

@@ -14,6 +14,7 @@
use std::sync::Arc;
use common_datasource::compression::CompressionType;
use log_store::raft_engine::log_store::RaftEngineLogStore;
use log_store::LogConfig;
use object_store::services::Fs;
@@ -57,7 +58,13 @@ pub async fn new_store_config_with_object_store(
let manifest_dir = engine::region_manifest_dir(parent_dir, region_name);
let sst_layer = Arc::new(FsAccessLayer::new(&sst_dir, object_store.clone()));
let manifest = RegionManifest::with_checkpointer(&manifest_dir, object_store, None, None);
let manifest = RegionManifest::with_checkpointer(
&manifest_dir,
object_store,
CompressionType::Uncompressed,
None,
None,
);
manifest.start().await.unwrap();
let log_config = LogConfig {
log_file_dir: log_store_dir(store_dir),