feat: support table ttl (#1052)

* feat: purge expired sst on compaction

* chore: add more log

* fix: clippy

* fix: mark expired ssts as compacting before picking candidates

* fix: some CR comments

* fix: remove useless result

* fix: cr comments
This commit is contained in:
Lei, HUANG
2023-02-22 16:56:20 +08:00
committed by GitHub
parent fb2e0c7cf3
commit 2df8143ad5
12 changed files with 231 additions and 41 deletions

View File

@@ -14,14 +14,20 @@
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::Duration;
use common_telemetry::debug;
use common_telemetry::{debug, error, info};
use common_time::Timestamp;
use snafu::ResultExt;
use store_api::logstore::LogStore;
use crate::compaction::scheduler::CompactionRequestImpl;
use crate::compaction::strategy::{SimpleTimeWindowStrategy, StrategyRef};
use crate::compaction::task::{CompactionTask, CompactionTaskImpl};
use crate::error::TtlCalculationSnafu;
use crate::scheduler::Request;
use crate::sst::{FileHandle, Level};
use crate::version::LevelMetasRef;
/// Picker picks input SST files and builds the compaction task.
/// Different compaction strategy may implement different pickers.
@@ -57,6 +63,24 @@ impl<S> SimplePicker<S> {
_phantom_data: Default::default(),
}
}
fn get_expired_ssts(
&self,
levels: &LevelMetasRef,
ttl: Option<Duration>,
) -> crate::error::Result<Vec<FileHandle>> {
let Some(ttl) = ttl else { return Ok(vec![]); };
let expire_time = Timestamp::current_millis()
.sub(ttl)
.context(TtlCalculationSnafu)?;
let mut expired_ssts = vec![];
for level in 0..levels.level_num() {
expired_ssts.extend(levels.level(level as Level).get_expired_files(&expire_time));
}
Ok(expired_ssts)
}
}
impl<S: LogStore> Picker for SimplePicker<S> {
@@ -69,6 +93,22 @@ impl<S: LogStore> Picker for SimplePicker<S> {
req: &CompactionRequestImpl<S>,
) -> crate::error::Result<Option<CompactionTaskImpl<S>>> {
let levels = &req.levels();
let expired_ssts = self
.get_expired_ssts(levels, req.ttl)
.map_err(|e| {
error!(e;"Failed to get region expired SST files, region: {}, ttl: {:?}", req.region_id, req.ttl);
e
})
.unwrap_or_default();
if !expired_ssts.is_empty() {
info!(
"Expired SSTs in region {}: {:?}",
req.region_id, expired_ssts
);
// here we mark expired SSTs as compacting to avoid them being picked.
expired_ssts.iter().for_each(|f| f.mark_compacting(true));
}
for level_num in 0..levels.level_num() {
let level = levels.level(level_num as u8);
@@ -91,6 +131,7 @@ impl<S: LogStore> Picker for SimplePicker<S> {
shared_data: req.shared.clone(),
wal: req.wal.clone(),
manifest: req.manifest.clone(),
expired_ssts,
}));
}

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::sync::Arc;
use std::time::Duration;
use common_telemetry::{debug, error, info};
use store_api::logstore::LogStore;
@@ -48,6 +49,7 @@ pub struct CompactionRequestImpl<S: LogStore> {
pub shared: SharedDataRef,
pub manifest: RegionManifest,
pub wal: Wal<S>,
pub ttl: Option<Duration>,
}
impl<S: LogStore> CompactionRequestImpl<S> {

View File

@@ -42,6 +42,7 @@ pub struct CompactionTaskImpl<S: LogStore> {
pub shared_data: SharedDataRef,
pub wal: Wal<S>,
pub manifest: RegionManifest,
pub expired_ssts: Vec<FileHandle>,
}
impl<S: LogStore> Debug for CompactionTaskImpl<S> {
@@ -60,19 +61,14 @@ impl<S: LogStore> Drop for CompactionTaskImpl<S> {
impl<S: LogStore> CompactionTaskImpl<S> {
/// Compacts inputs SSTs, returns `(output file, compacted input file)`.
async fn merge_ssts(&mut self) -> Result<(Vec<FileMeta>, Vec<FileMeta>)> {
async fn merge_ssts(&mut self) -> Result<(HashSet<FileMeta>, HashSet<FileMeta>)> {
let mut futs = Vec::with_capacity(self.outputs.len());
let mut compacted_inputs = HashSet::new();
let region_id = self.shared_data.id();
for output in self.outputs.drain(..) {
let schema = self.schema.clone();
let sst_layer = self.sst_layer.clone();
compacted_inputs.extend(output.inputs.iter().map(|f| FileMeta {
region_id,
file_name: f.file_name().to_string(),
time_range: *f.time_range(),
level: f.level(),
}));
compacted_inputs.extend(output.inputs.iter().map(FileHandle::meta));
// TODO(hl): Maybe spawn to runtime to exploit in-job parallelism.
futs.push(async move {
@@ -94,8 +90,8 @@ impl<S: LogStore> CompactionTaskImpl<S> {
/// Writes updated SST info into manifest.
async fn write_manifest_and_apply(
&self,
output: Vec<FileMeta>,
input: Vec<FileMeta>,
output: HashSet<FileMeta>,
input: HashSet<FileMeta>,
) -> Result<()> {
let version = &self.shared_data.version_control;
let region_version = version.metadata().version();
@@ -103,8 +99,8 @@ impl<S: LogStore> CompactionTaskImpl<S> {
let edit = RegionEdit {
region_version,
flushed_sequence: None,
files_to_add: output,
files_to_remove: input,
files_to_add: Vec::from_iter(output.into_iter()),
files_to_remove: Vec::from_iter(input.into_iter()),
};
info!(
"Compacted region: {}, region edit: {:?}",
@@ -131,10 +127,11 @@ impl<S: LogStore> CompactionTask for CompactionTaskImpl<S> {
async fn run(mut self) -> Result<()> {
self.mark_files_compacting(true);
let (output, compacted) = self.merge_ssts().await.map_err(|e| {
let (output, mut compacted) = self.merge_ssts().await.map_err(|e| {
error!(e; "Failed to compact region: {}", self.shared_data.name());
e
})?;
compacted.extend(self.expired_ssts.iter().map(FileHandle::meta));
self.write_manifest_and_apply(output, compacted)
.await
.map_err(|e| {

View File

@@ -14,6 +14,7 @@
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::time::Duration;
use async_trait::async_trait;
use common_telemetry::logging::info;
@@ -289,7 +290,8 @@ impl<S: LogStore> EngineInner<S> {
let mut guard = SlotGuard::new(name, &self.regions);
let store_config = self.region_store_config(&opts.parent_dir, opts.write_buffer_size, name);
let store_config =
self.region_store_config(&opts.parent_dir, opts.write_buffer_size, name, opts.ttl);
let region = match RegionImpl::open(name.to_string(), store_config, opts).await? {
None => return Ok(None),
@@ -319,8 +321,12 @@ impl<S: LogStore> EngineInner<S> {
.context(error::InvalidRegionDescSnafu {
region: &region_name,
})?;
let store_config =
self.region_store_config(&opts.parent_dir, opts.write_buffer_size, &region_name);
let store_config = self.region_store_config(
&opts.parent_dir,
opts.write_buffer_size,
&region_name,
opts.ttl,
);
let region = RegionImpl::create(metadata, store_config).await?;
@@ -341,6 +347,7 @@ impl<S: LogStore> EngineInner<S> {
parent_dir: &str,
write_buffer_size: Option<usize>,
region_name: &str,
ttl: Option<Duration>,
) -> StoreConfig<S> {
let parent_dir = util::normalize_dir(parent_dir);
@@ -363,6 +370,7 @@ impl<S: LogStore> EngineInner<S> {
compaction_scheduler: self.compaction_scheduler.clone(),
engine_config: self.config.clone(),
file_purger: self.file_purger.clone(),
ttl,
}
}
}

View File

@@ -434,6 +434,12 @@ pub enum Error {
source: object_store::Error,
backtrace: Backtrace,
},
#[snafu(display("Failed to calculate SST expire time, source: {}", source))]
TtlCalculation {
#[snafu(backtrace)]
source: common_time::error::Error,
},
}
pub type Result<T> = std::result::Result<T, Error>;
@@ -508,6 +514,7 @@ impl ErrorExt for Error {
StopScheduler { .. } => StatusCode::Internal,
DeleteSst { .. } => StatusCode::StorageUnavailable,
IllegalSchedulerState { .. } => StatusCode::Unexpected,
TtlCalculation { source, .. } => source.status_code(),
}
}

View File

@@ -19,6 +19,7 @@ mod writer;
use std::collections::BTreeMap;
use std::fmt;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use common_telemetry::logging;
@@ -140,6 +141,7 @@ pub struct StoreConfig<S: LogStore> {
pub compaction_scheduler: CompactionSchedulerRef<S>,
pub engine_config: Arc<EngineConfig>,
pub file_purger: FilePurgerRef,
pub ttl: Option<Duration>,
}
pub type RecoverdMetadata = (SequenceNumber, (ManifestVersion, RawRegionMetadata));
@@ -198,6 +200,7 @@ impl<S: LogStore> RegionImpl<S> {
writer: Arc::new(RegionWriter::new(
store_config.memtable_builder,
store_config.engine_config.clone(),
store_config.ttl,
)),
wal,
flush_strategy: store_config.flush_strategy,
@@ -277,6 +280,7 @@ impl<S: LogStore> RegionImpl<S> {
let writer = Arc::new(RegionWriter::new(
store_config.memtable_builder,
store_config.engine_config.clone(),
store_config.ttl,
));
let writer_ctx = WriterContext {
shared: &shared,

View File

@@ -13,6 +13,7 @@
// limitations under the License.
use std::sync::Arc;
use std::time::Duration;
use common_error::prelude::BoxedError;
use common_telemetry::tracing::log::info;
@@ -60,9 +61,13 @@ pub struct RegionWriter {
}
impl RegionWriter {
pub fn new(memtable_builder: MemtableBuilderRef, config: Arc<EngineConfig>) -> RegionWriter {
pub fn new(
memtable_builder: MemtableBuilderRef,
config: Arc<EngineConfig>,
ttl: Option<Duration>,
) -> RegionWriter {
RegionWriter {
inner: Mutex::new(WriterInner::new(memtable_builder, config)),
inner: Mutex::new(WriterInner::new(memtable_builder, config, ttl)),
version_mutex: Mutex::new(()),
}
}
@@ -324,15 +329,21 @@ struct WriterInner {
/// It should protected by upper mutex
closed: bool,
engine_config: Arc<EngineConfig>,
ttl: Option<Duration>,
}
impl WriterInner {
fn new(memtable_builder: MemtableBuilderRef, engine_config: Arc<EngineConfig>) -> WriterInner {
fn new(
memtable_builder: MemtableBuilderRef,
engine_config: Arc<EngineConfig>,
ttl: Option<Duration>,
) -> WriterInner {
WriterInner {
memtable_builder,
flush_handle: None,
engine_config,
closed: false,
ttl,
}
}
@@ -596,7 +607,7 @@ impl WriterInner {
return Ok(());
}
let cb = Self::build_flush_callback(&current_version, ctx, &self.engine_config);
let cb = Self::build_flush_callback(&current_version, ctx, &self.engine_config, self.ttl);
let flush_req = FlushJob {
max_memtable_id: max_memtable_id.unwrap(),
@@ -624,6 +635,7 @@ impl WriterInner {
version: &VersionRef,
ctx: &WriterContext<S>,
config: &Arc<EngineConfig>,
ttl: Option<Duration>,
) -> Option<FlushCallback> {
let region_id = version.metadata().id();
let compaction_request = CompactionRequestImpl {
@@ -633,6 +645,7 @@ impl WriterInner {
shared: ctx.shared.clone(),
manifest: ctx.manifest.clone(),
wal: ctx.wal.clone(),
ttl,
};
let compaction_scheduler = ctx.compaction_scheduler.clone();
let shared_data = ctx.shared.clone();

View File

@@ -145,6 +145,21 @@ impl LevelMeta {
self.files.len()
}
/// Returns expired SSTs from current level.
pub fn get_expired_files(&self, expire_time: &Timestamp) -> Vec<FileHandle> {
self.files
.iter()
.filter_map(|(_, v)| {
let Some((_, end)) = v.time_range() else { return None; };
if end < expire_time {
Some(v.clone())
} else {
None
}
})
.collect()
}
pub fn files(&self) -> impl Iterator<Item = &FileHandle> {
self.files.values()
}
@@ -212,6 +227,11 @@ impl FileHandle {
pub fn mark_deleted(&self) {
self.inner.deleted.store(true, Ordering::Relaxed);
}
#[inline]
pub fn meta(&self) -> FileMeta {
self.inner.meta.clone()
}
}
/// Actually data of [FileHandle].

View File

@@ -69,5 +69,6 @@ pub async fn new_store_config(
compaction_scheduler,
engine_config: Default::default(),
file_purger,
ttl: None,
}
}