Add FullAccessTimeline guard in safekeepers (#7887)

This is a preparation for
https://github.com/neondatabase/neon/issues/6337.

The idea is to add FullAccessTimeline, which will act as a guard for
tasks requiring access to WAL files. Eviction will be blocked on these
tasks and WAL won't be deleted from disk until there is at least one
active FullAccessTimeline.

To get FullAccessTimeline, tasks call `tli.full_access_guard().await?`.
After eviction is implemented, this function will be responsible for
downloading missing WAL file and waiting until the download finishes.

This commit also contains other small refactorings:
- Separate `get_tenant_dir` and `get_timeline_dir` functions for
building a local path. This is useful for looking at usages and finding
tasks requiring access to local filesystem.
- `timeline_manager` is now responsible for spawning all background
tasks
- WAL removal task is now spawned instantly after horizon is updated
This commit is contained in:
Arthur Petukhovsky
2024-05-31 14:19:45 +01:00
committed by GitHub
parent 5a394fde56
commit 16b2e74037
23 changed files with 726 additions and 576 deletions

View File

@@ -10,6 +10,7 @@ use std::sync::Arc;
use anyhow::bail;
use anyhow::Result;
use camino::Utf8Path;
use camino::Utf8PathBuf;
use chrono::{DateTime, Utc};
use postgres_ffi::XLogSegNo;
use postgres_ffi::MAX_SEND_SIZE;
@@ -26,7 +27,8 @@ use crate::safekeeper::TermHistory;
use crate::send_wal::WalSenderState;
use crate::state::TimelineMemState;
use crate::state::TimelinePersistentState;
use crate::wal_storage::WalReader;
use crate::timeline::get_timeline_dir;
use crate::timeline::FullAccessTimeline;
use crate::GlobalTimelines;
use crate::SafeKeeperConf;
@@ -68,6 +70,7 @@ pub struct Response {
pub struct TimelineDumpSer {
pub tli: Arc<crate::timeline::Timeline>,
pub args: Args,
pub timeline_dir: Utf8PathBuf,
pub runtime: Arc<tokio::runtime::Runtime>,
}
@@ -85,14 +88,20 @@ impl Serialize for TimelineDumpSer {
where
S: serde::Serializer,
{
let dump = self
.runtime
.block_on(build_from_tli_dump(self.tli.clone(), self.args.clone()));
let dump = self.runtime.block_on(build_from_tli_dump(
&self.tli,
&self.args,
&self.timeline_dir,
));
dump.serialize(serializer)
}
}
async fn build_from_tli_dump(timeline: Arc<crate::timeline::Timeline>, args: Args) -> Timeline {
async fn build_from_tli_dump(
timeline: &Arc<crate::timeline::Timeline>,
args: &Args,
timeline_dir: &Utf8Path,
) -> Timeline {
let control_file = if args.dump_control_file {
let mut state = timeline.get_state().await.1;
if !args.dump_term_history {
@@ -112,7 +121,8 @@ async fn build_from_tli_dump(timeline: Arc<crate::timeline::Timeline>, args: Arg
let disk_content = if args.dump_disk_content {
// build_disk_content can fail, but we don't want to fail the whole
// request because of that.
build_disk_content(&timeline.timeline_dir).ok()
// Note: timeline can be in offloaded state, this is not a problem.
build_disk_content(timeline_dir).ok()
} else {
None
};
@@ -186,6 +196,7 @@ pub struct FileInfo {
pub async fn build(args: Args) -> Result<Response> {
let start_time = Utc::now();
let timelines_count = GlobalTimelines::timelines_count();
let config = GlobalTimelines::get_global_config();
let ptrs_snapshot = if args.tenant_id.is_some() && args.timeline_id.is_some() {
// If both tenant_id and timeline_id are specified, we can just get the
@@ -223,12 +234,11 @@ pub async fn build(args: Args) -> Result<Response> {
timelines.push(TimelineDumpSer {
tli,
args: args.clone(),
timeline_dir: get_timeline_dir(&config, &ttid),
runtime: runtime.clone(),
});
}
let config = GlobalTimelines::get_global_config();
Ok(Response {
start_time,
finish_time: Utc::now(),
@@ -316,27 +326,19 @@ pub struct TimelineDigest {
}
pub async fn calculate_digest(
tli: &Arc<crate::timeline::Timeline>,
tli: &FullAccessTimeline,
request: TimelineDigestRequest,
) -> Result<TimelineDigest> {
if request.from_lsn > request.until_lsn {
bail!("from_lsn is greater than until_lsn");
}
let conf = GlobalTimelines::get_global_config();
let (_, persisted_state) = tli.get_state().await;
if persisted_state.timeline_start_lsn > request.from_lsn {
bail!("requested LSN is before the start of the timeline");
}
let mut wal_reader = WalReader::new(
conf.workdir.clone(),
tli.timeline_dir.clone(),
&persisted_state,
request.from_lsn,
true,
)?;
let mut wal_reader = tli.get_walreader(request.from_lsn).await?;
let mut hasher = Sha256::new();
let mut buf = [0u8; MAX_SEND_SIZE];