Files
greptimedb/src/mito2/src/worker/handle_flush.rs
Yingwen 648b2ae293 feat(mito): Flush region (#2291)
* chore: call handle_flush_request

* feat: alias SchedulerRef and clean scheduler on drop

* feat: add scheduler to workers

* feat: remove RegionMemtableStats

* feat: pick regions to flush

* feat: add more fields to region flush task

* feat: smallvec workspace dep

* feat: Use list to hold immutable memtables

* feat: flush job wip

* feat: use access layer to read write sst

* feat: flush memtables to l0

* feat: write manifest

* feat: schedule next flush on success

* feat: schedule flush on success and failure

* feat: add purger to region

* feat: apply edit after flush

* feat: collect stats for SSTs

* feat: manual flush

* test: test flush and fix manifest test

* feat: remove flush scheduler job limit

* fix: typo

* style: clippy

* feat: clean flushed files on failure

* chore: address CR comment

* refactor: Use put_rows

* feat: Clean flush scheduler on drop

* feat: remove region flush status on drop and close

* chore: address CR comment
2023-09-12 07:57:15 -05:00

182 lines
6.7 KiB
Rust

// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Handling flush related requests.
use common_query::Output;
use common_telemetry::{error, info};
use common_time::util::current_time_millis;
use store_api::logstore::LogStore;
use store_api::storage::RegionId;
use tokio::sync::oneshot;
use crate::error::{RegionNotFoundSnafu, Result};
use crate::flush::{FlushReason, RegionFlushTask};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::region::MitoRegionRef;
use crate::request::{FlushFailed, FlushFinished};
use crate::worker::RegionWorkerLoop;
impl<S: LogStore> RegionWorkerLoop<S> {
/// On region flush job finished.
pub(crate) async fn handle_flush_finished(
&mut self,
region_id: RegionId,
mut request: FlushFinished,
) {
let Some(region) = self.regions.get_region(region_id) else {
// We may dropped or closed the region.
request.on_failure(RegionNotFoundSnafu { region_id }.build());
return;
};
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: std::mem::take(&mut request.file_metas),
files_to_remove: Vec::new(),
compaction_time_window: None,
flushed_entry_id: Some(request.flushed_entry_id),
};
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
if let Err(e) = region.manifest_manager.update(action_list).await {
error!(e; "Failed to write manifest, region: {}", region_id);
request.on_failure(e);
return;
}
// Apply edit to region's version.
region
.version_control
.apply_edit(edit, region.file_purger.clone());
// Delete wal.
info!(
"Region {} flush finished, tries to bump wal to {}",
region_id, request.flushed_entry_id
);
if let Err(e) = self.wal.obsolete(region_id, request.flushed_entry_id).await {
error!(e; "Failed to write wal, region: {}", region_id);
request.on_failure(e);
return;
}
// Handle pending requests of the region.
if let Some(ddl_requests) = self.flush_scheduler.on_flush_success(region_id) {
self.handle_ddl_requests(ddl_requests).await;
}
// Notifies waiters.
request.on_success();
}
}
impl<S> RegionWorkerLoop<S> {
/// Handles manual flush request.
pub(crate) async fn handle_flush_request(
&mut self,
region_id: RegionId,
sender: Option<oneshot::Sender<Result<Output>>>,
) {
let Some(region) = self.regions.get_region(region_id) else {
if let Some(sender) = sender {
let _ = sender.send(RegionNotFoundSnafu { region_id }.fail());
}
return;
};
let mut task = self.new_flush_task(&region, FlushReason::Manual);
if let Some(sender) = sender {
task.senders.push(sender);
}
if let Err(e) = self.flush_scheduler.schedule_flush(&region, task) {
error!(e; "Failed to schedule flush task for region {}", region.region_id);
}
}
/// On region flush job failed.
pub(crate) async fn handle_flush_failed(&mut self, region_id: RegionId, request: FlushFailed) {
self.flush_scheduler.on_flush_failed(region_id, request.err);
}
/// Checks whether the engine reaches flush threshold. If so, finds regions in this
/// worker to flush.
pub(crate) fn maybe_flush_worker(&mut self) {
if !self.write_buffer_manager.should_flush_engine() {
// No need to flush worker.
return;
}
// If the engine needs flush, each worker will find some regions to flush. We might
// flush more memory than expect but it should be acceptable.
if let Err(e) = self.flush_regions_on_engine_full() {
error!(e; "Failed to flush worker");
}
}
/// Find some regions to flush to reduce write buffer usage.
fn flush_regions_on_engine_full(&mut self) -> Result<()> {
let regions = self.regions.list_regions();
let now = current_time_millis();
let min_last_flush_time = now - self.config.auto_flush_interval.as_millis() as i64;
let mut max_mutable_size = 0;
// Region with max mutable memtable size.
let mut max_mem_region = None;
for region in &regions {
if self.flush_scheduler.is_flush_requested(region.region_id) {
// Already flushing.
continue;
}
let version = region.version();
let region_mutable_size = version.memtables.mutable_bytes_usage();
// Tracks region with max mutable memtable size.
if region_mutable_size > max_mutable_size {
max_mem_region = Some(region);
max_mutable_size = region_mutable_size;
}
if region.last_flush_millis() < min_last_flush_time {
// If flush time of this region is earlier than `min_last_flush_time`, we can flush this region.
let task = self.new_flush_task(region, FlushReason::EngineFull);
self.flush_scheduler.schedule_flush(region, task)?;
}
}
// Flush memtable with max mutable memtable.
// TODO(yingwen): Maybe flush more tables to reduce write buffer size.
if let Some(region) = max_mem_region {
if !self.flush_scheduler.is_flush_requested(region.region_id) {
let task = self.new_flush_task(region, FlushReason::EngineFull);
self.flush_scheduler.schedule_flush(region, task)?;
}
}
Ok(())
}
fn new_flush_task(&self, region: &MitoRegionRef, reason: FlushReason) -> RegionFlushTask {
// TODO(yingwen): metrics for flush requested.
RegionFlushTask {
region_id: region.region_id,
reason,
senders: Vec::new(),
request_sender: self.sender.clone(),
access_layer: region.access_layer.clone(),
memtable_builder: self.memtable_builder.clone(),
file_purger: region.file_purger.clone(),
}
}
}