Fix 1.82 clippy lint too_long_first_doc_paragraph (#8941)

Addresses the 1.82 beta clippy lint `too_long_first_doc_paragraph` by
adding newlines to the first sentence if it is short enough, and making
a short first sentence if there is the need.
This commit is contained in:
Arpad Müller
2024-09-06 14:33:52 +02:00
committed by GitHub
parent e86fef05dd
commit cbcd4058ed
51 changed files with 180 additions and 103 deletions

View File

@@ -22,9 +22,10 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds
/// Escape a string for including it in a SQL literal. Wrapping the result /// Escape a string for including it in a SQL literal.
/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use ///
/// SQL string literal, e.g. `'db'''` or `E'db\\'`. /// Wrapping the result with `E'{}'` or `'{}'` is not required,
/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`.
/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47> /// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
/// for the original implementation. /// for the original implementation.
pub fn escape_literal(s: &str) -> String { pub fn escape_literal(s: &str) -> String {

View File

@@ -68,6 +68,7 @@ macro_rules! register_uint_gauge {
static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new); static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
/// Register a collector in the internal registry. MUST be called before the first call to `gather()`. /// Register a collector in the internal registry. MUST be called before the first call to `gather()`.
///
/// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector /// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector
/// while holding the lock. /// while holding the lock.
pub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> { pub fn register_internal(c: Box<dyn Collector>) -> prometheus::Result<()> {

View File

@@ -147,6 +147,8 @@ pub struct TenantDescribeResponseShard {
pub preferred_az_id: Option<String>, pub preferred_az_id: Option<String>,
} }
/// Migration request for a given tenant shard to a given node.
///
/// Explicitly migrating a particular shard is a low level operation /// Explicitly migrating a particular shard is a low level operation
/// TODO: higher level "Reschedule tenant" operation where the request /// TODO: higher level "Reschedule tenant" operation where the request
/// specifies some constraints, e.g. asking it to get off particular node(s) /// specifies some constraints, e.g. asking it to get off particular node(s)

View File

@@ -305,8 +305,10 @@ pub struct TenantConfig {
pub lsn_lease_length_for_ts: Option<String>, pub lsn_lease_length_for_ts: Option<String>,
} }
/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy` /// The policy for the aux file storage.
/// tenant config. When the first aux file written, the policy will be persisted in the ///
/// It can be switched through `switch_aux_file_policy` tenant config.
/// When the first aux file written, the policy will be persisted in the
/// `index_part.json` file and has a limited migration path. /// `index_part.json` file and has a limited migration path.
/// ///
/// Currently, we only allow the following migration path: /// Currently, we only allow the following migration path:
@@ -896,7 +898,9 @@ pub struct WalRedoManagerStatus {
pub process: Option<WalRedoManagerProcessStatus>, pub process: Option<WalRedoManagerProcessStatus>,
} }
/// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating /// The progress of a secondary tenant.
///
/// It is mostly useful when doing a long running download: e.g. initiating
/// a download job, timing out while waiting for it to run, and then inspecting this status to understand /// a download job, timing out while waiting for it to run, and then inspecting this status to understand
/// what's happening. /// what's happening.
#[derive(Default, Debug, Serialize, Deserialize, Clone)] #[derive(Default, Debug, Serialize, Deserialize, Clone)]

View File

@@ -69,8 +69,10 @@ impl QueryError {
} }
/// Returns true if the given error is a normal consequence of a network issue, /// Returns true if the given error is a normal consequence of a network issue,
/// or the client closing the connection. These errors can happen during normal /// or the client closing the connection.
/// operations, and don't indicate a bug in our code. ///
/// These errors can happen during normal operations,
/// and don't indicate a bug in our code.
pub fn is_expected_io_error(e: &io::Error) -> bool { pub fn is_expected_io_error(e: &io::Error) -> bool {
use io::ErrorKind::*; use io::ErrorKind::*;
matches!( matches!(

View File

@@ -7,6 +7,7 @@ use std::fmt;
use url::Host; use url::Host;
/// Parses a string of format either `host:port` or `host` into a corresponding pair. /// Parses a string of format either `host:port` or `host` into a corresponding pair.
///
/// The `host` part should be a correct `url::Host`, while `port` (if present) should be /// The `host` part should be a correct `url::Host`, while `port` (if present) should be
/// a valid decimal u16 of digits only. /// a valid decimal u16 of digits only.
pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> { pub fn parse_host_port<S: AsRef<str>>(host_port: S) -> Result<(Host, Option<u16>), anyhow::Error> {

View File

@@ -45,6 +45,8 @@ pub use azure_core::Etag;
pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel}; pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
/// Default concurrency limit for S3 operations
///
/// Currently, sync happens with AWS S3, that has two limits on requests per second: /// Currently, sync happens with AWS S3, that has two limits on requests per second:
/// ~200 RPS for IAM services /// ~200 RPS for IAM services
/// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html> /// <https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/UsingWithRDS.IAMDBAuth.html>
@@ -300,7 +302,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
) -> Result<(), TimeTravelError>; ) -> Result<(), TimeTravelError>;
} }
/// DownloadStream is sensitive to the timeout and cancellation used with the original /// Data part of an ongoing [`Download`].
///
/// `DownloadStream` is sensitive to the timeout and cancellation used with the original
/// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible /// [`RemoteStorage::download`] request. The type yields `std::io::Result<Bytes>` to be compatible
/// with `tokio::io::copy_buf`. /// with `tokio::io::copy_buf`.
// This has 'static because safekeepers do not use cancellation tokens (yet) // This has 'static because safekeepers do not use cancellation tokens (yet)

View File

@@ -5,9 +5,10 @@
mod calculation; mod calculation;
pub mod svg; pub mod svg;
/// StorageModel is the input to the synthetic size calculation. It represents /// StorageModel is the input to the synthetic size calculation.
/// a tree of timelines, with just the information that's needed for the ///
/// calculation. This doesn't track timeline names or where each timeline /// It represents a tree of timelines, with just the information that's needed
/// for the calculation. This doesn't track timeline names or where each timeline
/// begins and ends, for example. Instead, it consists of "points of interest" /// begins and ends, for example. Instead, it consists of "points of interest"
/// on the timelines. A point of interest could be the timeline start or end point, /// on the timelines. A point of interest could be the timeline start or end point,
/// the oldest point on a timeline that needs to be retained because of PITR /// the oldest point on a timeline that needs to be retained because of PITR

View File

@@ -5,8 +5,10 @@ use std::{
use metrics::IntCounter; use metrics::IntCounter;
/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly, /// Circuit breakers are for operations that are expensive and fallible.
/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and ///
/// If a circuit breaker fails repeatedly, we will stop attempting it for some
/// period of time, to avoid denial-of-service from retries, and
/// to mitigate the log spam from repeated failures. /// to mitigate the log spam from repeated failures.
pub struct CircuitBreaker { pub struct CircuitBreaker {
/// An identifier that enables us to log useful errors when a circuit is broken /// An identifier that enables us to log useful errors when a circuit is broken

View File

@@ -249,8 +249,10 @@ macro_rules! id_newtype {
}; };
} }
/// Neon timeline IDs are different from PostgreSQL timeline /// Neon timeline ID.
/// IDs. They serve a similar purpose though: they differentiate ///
/// They are different from PostgreSQL timeline
/// IDs, but serve a similar purpose: they differentiate
/// between different "histories" of the same cluster. However, /// between different "histories" of the same cluster. However,
/// PostgreSQL timeline IDs are a bit cumbersome, because they are only /// PostgreSQL timeline IDs are a bit cumbersome, because they are only
/// 32-bits wide, and they must be in ascending order in any given /// 32-bits wide, and they must be in ascending order in any given

View File

@@ -100,7 +100,9 @@ pub enum LockFileRead {
} }
/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to /// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked. /// inspect its content.
///
/// It is not an `Err(...)` if the file does not exist or is already locked.
/// Check the [`LockFileRead`] variants for details. /// Check the [`LockFileRead`] variants for details.
pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> { pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result<LockFileRead> {
let res = fs::OpenOptions::new().read(true).open(path); let res = fs::OpenOptions::new().read(true).open(path);

View File

@@ -8,6 +8,7 @@ use tracing::{trace, warn};
use crate::lsn::Lsn; use crate::lsn::Lsn;
/// Feedback pageserver sends to safekeeper and safekeeper resends to compute. /// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
///
/// Serialized in custom flexible key/value format. In replication protocol, it /// Serialized in custom flexible key/value format. In replication protocol, it
/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres /// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
/// Standby status update / Hot standby feedback messages. /// Standby status update / Hot standby feedback messages.

View File

@@ -65,6 +65,8 @@ impl<T> Poison<T> {
} }
} }
/// Armed pointer to a [`Poison`].
///
/// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state. /// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state.
/// Once modifications are done, use [`Self::disarm`]. /// Once modifications are done, use [`Self::disarm`].
/// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned /// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned

View File

@@ -13,10 +13,11 @@ pub struct ShardNumber(pub u8);
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)]
pub struct ShardCount(pub u8); pub struct ShardCount(pub u8);
/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant, /// Combination of ShardNumber and ShardCount.
/// when we need to know which shard we're dealing with, but do not need to know the full ///
/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know /// For use within the context of a particular tenant, when we need to know which shard we're
/// the fully qualified TenantShardId. /// dealing with, but do not need to know the full ShardIdentity (because we won't be doing
/// any page->shard mapping), and do not need to know the fully qualified TenantShardId.
#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct ShardIndex { pub struct ShardIndex {
pub shard_number: ShardNumber, pub shard_number: ShardNumber,

View File

@@ -49,12 +49,11 @@ use std::sync::{RwLock, RwLockWriteGuard};
use tokio::sync::watch; use tokio::sync::watch;
///
/// Rcu allows multiple readers to read and hold onto a value without blocking /// Rcu allows multiple readers to read and hold onto a value without blocking
/// (for very long). Storing to the Rcu updates the value, making new readers /// (for very long).
/// immediately see the new value, but it also waits for all current readers to
/// finish.
/// ///
/// Storing to the Rcu updates the value, making new readers immediately see
/// the new value, but it also waits for all current readers to finish.
pub struct Rcu<V> { pub struct Rcu<V> {
inner: RwLock<RcuInner<V>>, inner: RwLock<RcuInner<V>>,
} }

View File

@@ -5,7 +5,9 @@ use std::sync::{
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
/// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of /// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of
/// `SemaphorePermit`, allowing use of `take` which does not require holding an outer mutex guard /// `SemaphorePermit`.
///
/// Allows use of `take` which does not require holding an outer mutex guard
/// for the duration of initialization. /// for the duration of initialization.
/// ///
/// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`]. /// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`].

View File

@@ -7,6 +7,7 @@ pub enum VecMapOrdering {
} }
/// Ordered map datastructure implemented in a Vec. /// Ordered map datastructure implemented in a Vec.
///
/// Append only - can only add keys that are larger than the /// Append only - can only add keys that are larger than the
/// current max key. /// current max key.
/// Ordering can be adjusted using [`VecMapOrdering`] /// Ordering can be adjusted using [`VecMapOrdering`]

View File

@@ -6,9 +6,10 @@ pub enum YieldingLoopError {
Cancelled, Cancelled,
} }
/// Helper for long synchronous loops, e.g. over all tenants in the system. Periodically /// Helper for long synchronous loops, e.g. over all tenants in the system.
/// yields to avoid blocking the executor, and after resuming checks the provided ///
/// cancellation token to drop out promptly on shutdown. /// Periodically yields to avoid blocking the executor, and after resuming
/// checks the provided cancellation token to drop out promptly on shutdown.
#[inline(always)] #[inline(always)]
pub async fn yielding_loop<I, T, F>( pub async fn yielding_loop<I, T, F>(
interval: usize, interval: usize,

View File

@@ -180,6 +180,8 @@ pub struct PageServerConf {
pub io_buffer_alignment: usize, pub io_buffer_alignment: usize,
} }
/// Token for authentication to safekeepers
///
/// We do not want to store this in a PageServerConf because the latter may be logged /// We do not want to store this in a PageServerConf because the latter may be logged
/// and/or serialized at a whim, while the token is secret. Currently this token is the /// and/or serialized at a whim, while the token is secret. Currently this token is the
/// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in /// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in

View File

@@ -1,7 +1,9 @@
//! This module defines `RequestContext`, a structure that we use throughout //! Defines [`RequestContext`].
//! the pageserver to propagate high-level context from places //!
//! that _originate_ activity down to the shared code paths at the //! It is a structure that we use throughout the pageserver to propagate
//! heart of the pageserver. It's inspired by Golang's `context.Context`. //! high-level context from places that _originate_ activity down to the
//! shared code paths at the heart of the pageserver. It's inspired by
//! Golang's `context.Context`.
//! //!
//! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions: //! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions:
//! 1. What high-level activity ([`TaskKind`]) needs this page? //! 1. What high-level activity ([`TaskKind`]) needs this page?

View File

@@ -1021,9 +1021,10 @@ impl Timeline {
} }
/// DatadirModification represents an operation to ingest an atomic set of /// DatadirModification represents an operation to ingest an atomic set of
/// updates to the repository. It is created by the 'begin_record' /// updates to the repository.
/// function. It is called for each WAL record, so that all the modifications ///
/// by a one WAL record appear atomic. /// It is created by the 'begin_record' function. It is called for each WAL
/// record, so that all the modifications by a one WAL record appear atomic.
pub struct DatadirModification<'a> { pub struct DatadirModification<'a> {
/// The timeline this modification applies to. You can access this to /// The timeline this modification applies to. You can access this to
/// read the state, but note that any pending updates are *not* reflected /// read the state, but note that any pending updates are *not* reflected
@@ -2048,6 +2049,7 @@ impl<'a> DatadirModification<'a> {
/// This struct facilitates accessing either a committed key from the timeline at a /// This struct facilitates accessing either a committed key from the timeline at a
/// specific LSN, or the latest uncommitted key from a pending modification. /// specific LSN, or the latest uncommitted key from a pending modification.
///
/// During WAL ingestion, the records from multiple LSNs may be batched in the same /// During WAL ingestion, the records from multiple LSNs may be batched in the same
/// modification before being flushed to the timeline. Hence, the routines in WalIngest /// modification before being flushed to the timeline. Hence, the routines in WalIngest
/// need to look up the keys in the modification first before looking them up in the /// need to look up the keys in the modification first before looking them up in the

View File

@@ -1,8 +1,9 @@
//! Timeline repository implementation that keeps old data in layer files, and
//! the recent changes in ephemeral files.
//! //!
//! Timeline repository implementation that keeps old data in files on disk, and //! See tenant/*_layer.rs files. The functions here are responsible for locating
//! the recent changes in memory. See tenant/*_layer.rs files. //! the correct layer for the get/put call, walking back the timeline branching
//! The functions here are responsible for locating the correct layer for the //! history as needed.
//! get/put call, walking back the timeline branching history as needed.
//! //!
//! The files are stored in the .neon/tenants/<tenant_id>/timelines/<timeline_id> //! The files are stored in the .neon/tenants/<tenant_id>/timelines/<timeline_id>
//! directory. See docs/pageserver-storage.md for how the files are managed. //! directory. See docs/pageserver-storage.md for how the files are managed.

View File

@@ -1,7 +1,8 @@
//! Describes the legacy now hopefully no longer modified per-timeline metadata stored in //! Describes the legacy now hopefully no longer modified per-timeline metadata.
//! `index_part.json` managed by [`remote_timeline_client`]. For many tenants and their timelines, //!
//! this struct and it's original serialization format is still needed because they were written a //! It is stored in `index_part.json` managed by [`remote_timeline_client`]. For many tenants and
//! long time ago. //! their timelines, this struct and its original serialization format is still needed because
//! they were written a long time ago.
//! //!
//! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json //! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json
//! versioning. //! versioning.

View File

@@ -282,9 +282,10 @@ impl BackgroundPurges {
static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> = static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> =
Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing)); Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing));
/// The TenantManager is responsible for storing and mutating the collection of all tenants /// Responsible for storing and mutating the collection of all tenants
/// that this pageserver process has state for. Every Tenant and SecondaryTenant instance /// that this pageserver has state for.
/// lives inside the TenantManager. ///
/// Every Tenant and SecondaryTenant instance lives inside the TenantManager.
/// ///
/// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach /// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach
/// the same tenant twice concurrently, or trying to configure the same tenant into secondary /// the same tenant twice concurrently, or trying to configure the same tenant into secondary
@@ -2346,8 +2347,9 @@ pub enum TenantMapError {
ShuttingDown, ShuttingDown,
} }
/// Guards a particular tenant_id's content in the TenantsMap. While this /// Guards a particular tenant_id's content in the TenantsMap.
/// structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`] ///
/// While this structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`]
/// for this tenant, which acts as a marker for any operations targeting /// for this tenant, which acts as a marker for any operations targeting
/// this tenant to retry later, or wait for the InProgress state to end. /// this tenant to retry later, or wait for the InProgress state to end.
/// ///

View File

@@ -2184,6 +2184,8 @@ pub fn remote_timeline_path(
remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string())) remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string()))
} }
/// Obtains the path of the given Layer in the remote
///
/// Note that the shard component of a remote layer path is _not_ always the same /// Note that the shard component of a remote layer path is _not_ always the same
/// as in the TenantShardId of the caller: tenants may reference layers from a different /// as in the TenantShardId of the caller: tenants may reference layers from a different
/// ShardIndex. Use the ShardIndex from the layer's metadata. /// ShardIndex. Use the ShardIndex from the layer's metadata.

View File

@@ -1,4 +1,5 @@
//! In-memory index to track the tenant files on the remote storage. //! In-memory index to track the tenant files on the remote storage.
//!
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
//! remote timeline layers and its metadata. //! remote timeline layers and its metadata.

View File

@@ -434,10 +434,11 @@ impl ReadableLayer {
} }
} }
/// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather /// Layers contain a hint indicating whether they are likely to be used for reads.
/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility ///
/// of layers (for example when creating a branch that makes some previously covered layers visible). It should /// This is a hint rather than an authoritative value, so that we do not have to update it synchronously
/// be used for cache management but not for correctness-critical checks. /// when changing the visibility of layers (for example when creating a branch that makes some previously
/// covered layers visible). It should be used for cache management but not for correctness-critical checks.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum LayerVisibilityHint { pub enum LayerVisibilityHint {
/// A Visible layer might be read while serving a read, because there is not an image layer between it /// A Visible layer might be read while serving a read, because there is not an image layer between it

View File

@@ -136,10 +136,11 @@ impl Summary {
// Flag indicating that this version initialize the page // Flag indicating that this version initialize the page
const WILL_INIT: u64 = 1; const WILL_INIT: u64 = 1;
/// Struct representing reference to BLOB in layers. Reference contains BLOB /// Struct representing reference to BLOB in layers.
/// offset, and for WAL records it also contains `will_init` flag. The flag ///
/// helps to determine the range of records that needs to be applied, without /// Reference contains BLOB offset, and for WAL records it also contains
/// reading/deserializing records themselves. /// `will_init` flag. The flag helps to determine the range of records
/// that needs to be applied, without reading/deserializing records themselves.
#[derive(Debug, Serialize, Deserialize, Copy, Clone)] #[derive(Debug, Serialize, Deserialize, Copy, Clone)]
pub struct BlobRef(pub u64); pub struct BlobRef(pub u64);

View File

@@ -1,7 +1,9 @@
//! An ImageLayer represents an image or a snapshot of a key-range at //! An ImageLayer represents an image or a snapshot of a key-range at
//! one particular LSN. It contains an image of all key-value pairs //! one particular LSN.
//! in its key-range. Any key that falls into the image layer's range //!
//! but does not exist in the layer, does not exist. //! It contains an image of all key-value pairs in its key-range. Any key
//! that falls into the image layer's range but does not exist in the layer,
//! does not exist.
//! //!
//! An image layer is stored in a file on disk. The file is stored in //! An image layer is stored in a file on disk. The file is stored in
//! timelines/<timeline_id> directory. Currently, there are no //! timelines/<timeline_id> directory. Currently, there are no

View File

@@ -12,8 +12,10 @@ use serde::{Deserialize, Serialize};
#[cfg(test)] #[cfg(test)]
use utils::id::TenantId; use utils::id::TenantId;
/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the /// A unique identifier of a persistent layer.
/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides ///
/// This is different from `LayerDescriptor`, which is only used in the benchmarks.
/// This struct contains all necessary information to find the image / delta layer. It also provides
/// a unified way to generate layer information like file name. /// a unified way to generate layer information like file name.
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)] #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)]
pub struct PersistentLayerDesc { pub struct PersistentLayerDesc {

View File

@@ -217,8 +217,9 @@ impl fmt::Display for ImageLayerName {
} }
} }
/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. The /// LayerName is the logical identity of a layer within a LayerMap at a moment in time.
/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations ///
/// The LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations
/// over time (e.g. across shard splits or compression). The physical filenames of layers in local /// over time (e.g. across shard splits or compression). The physical filenames of layers in local
/// storage and object names in remote storage consist of the LayerName plus some extra qualifiers /// storage and object names in remote storage consist of the LayerName plus some extra qualifiers
/// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path]) /// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path])

View File

@@ -226,9 +226,11 @@ impl<'a> IteratorWrapper<'a> {
} }
} }
/// A merge iterator over delta/image layer iterators. When duplicated records are /// A merge iterator over delta/image layer iterators.
/// found, the iterator will not perform any deduplication, and the caller should handle ///
/// these situation. By saying duplicated records, there are many possibilities: /// When duplicated records are found, the iterator will not perform any
/// deduplication, and the caller should handle these situation. By saying
/// duplicated records, there are many possibilities:
/// ///
/// * Two same delta at the same LSN. /// * Two same delta at the same LSN.
/// * Two same image at the same LSN. /// * Two same image at the same LSN.

View File

@@ -34,9 +34,10 @@ impl SplitWriterResult {
} }
} }
/// An image writer that takes images and produces multiple image layers. The interface does not /// An image writer that takes images and produces multiple image layers.
/// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files ///
/// to be cleaned up) /// The interface does not guarantee atomicity (i.e., if the image layer generation
/// fails, there might be leftover files to be cleaned up)
#[must_use] #[must_use]
pub struct SplitImageLayerWriter { pub struct SplitImageLayerWriter {
inner: ImageLayerWriter, inner: ImageLayerWriter,
@@ -193,9 +194,10 @@ impl SplitImageLayerWriter {
} }
} }
/// A delta writer that takes key-lsn-values and produces multiple delta layers. The interface does not /// A delta writer that takes key-lsn-values and produces multiple delta layers.
/// guarantee atomicity (i.e., if the delta layer generation fails, there might be leftover files ///
/// to be cleaned up). /// The interface does not guarantee atomicity (i.e., if the delta layer generation fails,
/// there might be leftover files to be cleaned up).
/// ///
/// Note that if updates of a single key exceed the target size limit, all of the updates will be batched /// Note that if updates of a single key exceed the target size limit, all of the updates will be batched
/// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm /// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm

View File

@@ -593,8 +593,10 @@ impl<'a> VectoredBlobReader<'a> {
} }
} }
/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`]. It provides a streaming API for /// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`].
/// getting read blobs. It returns a batch when `handle` gets called and when the current key would just exceed the read_size and ///
/// It provides a streaming API for getting read blobs. It returns a batch when
/// `handle` gets called and when the current key would just exceed the read_size and
/// max_cnt constraints. /// max_cnt constraints.
pub struct StreamingVectoredReadPlanner { pub struct StreamingVectoredReadPlanner {
read_builder: Option<VectoredReadBuilder>, read_builder: Option<VectoredReadBuilder>,

View File

@@ -1,6 +1,7 @@
//!
//! VirtualFile is like a normal File, but it's not bound directly to //! VirtualFile is like a normal File, but it's not bound directly to
//! a file descriptor. Instead, the file is opened when it's read from, //! a file descriptor.
//!
//! Instead, the file is opened when it's read from,
//! and if too many files are open globally in the system, least-recently //! and if too many files are open globally in the system, least-recently
//! used ones are closed. //! used ones are closed.
//! //!

View File

@@ -43,13 +43,12 @@ use utils::lsn::Lsn;
use utils::sync::gate::GateError; use utils::sync::gate::GateError;
use utils::sync::heavier_once_cell; use utils::sync::heavier_once_cell;
/// The real implementation that uses a Postgres process to
/// perform WAL replay.
/// ///
/// This is the real implementation that uses a Postgres process to /// Only one thread can use the process at a time, that is controlled by the
/// perform WAL replay. Only one thread can use the process at a time, /// Mutex. In the future, we might want to launch a pool of processes to allow
/// that is controlled by the Mutex. In the future, we might want to /// concurrent replay of multiple records.
/// launch a pool of processes to allow concurrent replay of multiple
/// records.
///
pub struct PostgresRedoManager { pub struct PostgresRedoManager {
tenant_shard_id: TenantShardId, tenant_shard_id: TenantShardId,
conf: &'static PageServerConf, conf: &'static PageServerConf,

View File

@@ -14,6 +14,7 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
use tokio_rustls::server::TlsStream; use tokio_rustls::server::TlsStream;
/// Stream wrapper which implements libpq's protocol. /// Stream wrapper which implements libpq's protocol.
///
/// NOTE: This object deliberately doesn't implement [`AsyncRead`] /// NOTE: This object deliberately doesn't implement [`AsyncRead`]
/// or [`AsyncWrite`] to prevent subtle errors (e.g. trying /// or [`AsyncWrite`] to prevent subtle errors (e.g. trying
/// to pass random malformed bytes through the connection). /// to pass random malformed bytes through the connection).

View File

@@ -484,6 +484,7 @@ pub async fn validate_temp_timeline(
} }
/// Move timeline from a temp directory to the main storage, and load it to the global map. /// Move timeline from a temp directory to the main storage, and load it to the global map.
///
/// This operation is done under a lock to prevent bugs if several concurrent requests are /// This operation is done under a lock to prevent bugs if several concurrent requests are
/// trying to load the same timeline. Note that it doesn't guard against creating the /// trying to load the same timeline. Note that it doesn't guard against creating the
/// timeline with the same ttid, but no one should be doing this anyway. /// timeline with the same ttid, but no one should be doing this anyway.

View File

@@ -448,8 +448,10 @@ async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1); const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1);
/// Encapsulates a task which takes messages from msg_rx, processes and pushes /// Encapsulates a task which takes messages from msg_rx, processes and pushes
/// replies to reply_tx; reading from socket and writing to disk in parallel is /// replies to reply_tx.
/// beneficial for performance, this struct provides writing to disk part. ///
/// Reading from socket and writing to disk in parallel is beneficial for
/// performance, this struct provides the writing to disk part.
pub struct WalAcceptor { pub struct WalAcceptor {
tli: WalResidentTimeline, tli: WalResidentTimeline,
msg_rx: Receiver<ProposerAcceptorMessage>, msg_rx: Receiver<ProposerAcceptorMessage>,

View File

@@ -147,9 +147,11 @@ pub struct TimelineMemState {
pub proposer_uuid: PgUuid, pub proposer_uuid: PgUuid,
} }
/// Safekeeper persistent state plus in memory layer, to avoid frequent fsyncs /// Safekeeper persistent state plus in memory layer.
/// when we update fields like commit_lsn which don't need immediate ///
/// persistence. Provides transactional like API to atomically update the state. /// Allows us to avoid frequent fsyncs when we update fields like commit_lsn
/// which don't need immediate persistence. Provides transactional like API
/// to atomically update the state.
/// ///
/// Implements Deref into *persistent* part. /// Implements Deref into *persistent* part.
pub struct TimelineState<CTRL: control_file::Storage> { pub struct TimelineState<CTRL: control_file::Storage> {

View File

@@ -169,6 +169,7 @@ impl<'a> Drop for WriteGuardSharedState<'a> {
} }
/// This structure is stored in shared state and represents the state of the timeline. /// This structure is stored in shared state and represents the state of the timeline.
///
/// Usually it holds SafeKeeper, but it also supports offloaded timeline state. In this /// Usually it holds SafeKeeper, but it also supports offloaded timeline state. In this
/// case, SafeKeeper is not available (because WAL is not present on disk) and all /// case, SafeKeeper is not available (because WAL is not present on disk) and all
/// operations can be done only with control file. /// operations can be done only with control file.

View File

@@ -1,6 +1,8 @@
//! Code related to evicting WAL files to remote storage. The actual upload is done by the //! Code related to evicting WAL files to remote storage.
//! partial WAL backup code. This file has code to delete and re-download WAL files, //!
//! cross-validate with partial WAL backup if local file is still present. //! The actual upload is done by the partial WAL backup code. This file has
//! code to delete and re-download WAL files, cross-validate with partial WAL
//! backup if local file is still present.
use anyhow::Context; use anyhow::Context;
use camino::Utf8PathBuf; use camino::Utf8PathBuf;

View File

@@ -1,4 +1,6 @@
//! Timeline residence guard is needed to ensure that WAL segments are present on disk, //! Timeline residence guard
//!
//! It is needed to ensure that WAL segments are present on disk,
//! as long as the code is holding the guard. This file implements guard logic, to issue //! as long as the code is holding the guard. This file implements guard logic, to issue
//! and drop guards, and to notify the manager when the guard is dropped. //! and drop guards, and to notify the manager when the guard is dropped.

View File

@@ -1,4 +1,5 @@
//! The timeline manager task is responsible for managing the timeline's background tasks. //! The timeline manager task is responsible for managing the timeline's background tasks.
//!
//! It is spawned alongside each timeline and exits when the timeline is deleted. //! It is spawned alongside each timeline and exits when the timeline is deleted.
//! It watches for changes in the timeline state and decides when to spawn or kill background tasks. //! It watches for changes in the timeline state and decides when to spawn or kill background tasks.
//! It also can manage some reactive state, like should the timeline be active for broker pushes or not. //! It also can manage some reactive state, like should the timeline be active for broker pushes or not.

View File

@@ -60,7 +60,8 @@ impl TimelinesSet {
} }
} }
/// Guard is used to add or remove timeline from the set. /// Guard is used to add or remove timelines from the set.
///
/// If the timeline present in set, it will be removed from it on drop. /// If the timeline present in set, it will be removed from it on drop.
/// Note: do not use more than one guard for the same timeline, it caches the presence state. /// Note: do not use more than one guard for the same timeline, it caches the presence state.
/// It is designed to be used in the manager task only. /// It is designed to be used in the manager task only.

View File

@@ -1,6 +1,8 @@
//! Safekeeper timeline has a background task which is subscribed to `commit_lsn` //! Safekeeper timeline has a background task which is subscribed to `commit_lsn`
//! and `flush_lsn` updates. After the partial segment was updated (`flush_lsn` //! and `flush_lsn` updates.
//! was changed), the segment will be uploaded to S3 in about 15 minutes. //!
//! After the partial segment was updated (`flush_lsn` was changed), the segment
//! will be uploaded to S3 within the configured `partial_backup_timeout`.
//! //!
//! The filename format for partial segments is //! The filename format for partial segments is
//! `Segment_Term_Flush_Commit_skNN.partial`, where: //! `Segment_Term_Flush_Commit_skNN.partial`, where:

View File

@@ -17,6 +17,7 @@ use crate::SafeKeeperConf;
use postgres_backend::{AuthType, PostgresBackend}; use postgres_backend::{AuthType, PostgresBackend};
/// Accept incoming TCP connections and spawn them into a background thread. /// Accept incoming TCP connections and spawn them into a background thread.
///
/// allowed_auth_scope is either SafekeeperData (wide JWT tokens giving access /// allowed_auth_scope is either SafekeeperData (wide JWT tokens giving access
/// to any tenant are allowed) or Tenant (only tokens giving access to specific /// to any tenant are allowed) or Tenant (only tokens giving access to specific
/// tenant are allowed). Doesn't matter if auth is disabled in conf. /// tenant are allowed). Doesn't matter if auth is disabled in conf.

View File

@@ -117,7 +117,9 @@ pub(crate) const STARTUP_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);
pub const MAX_OFFLINE_INTERVAL_DEFAULT: Duration = Duration::from_secs(30); pub const MAX_OFFLINE_INTERVAL_DEFAULT: Duration = Duration::from_secs(30);
/// How long a node may be unresponsive to heartbeats during start up before we declare it /// How long a node may be unresponsive to heartbeats during start up before we declare it
/// offline. This is much more lenient than [`MAX_OFFLINE_INTERVAL_DEFAULT`] since the pageserver's /// offline.
///
/// This is much more lenient than [`MAX_OFFLINE_INTERVAL_DEFAULT`] since the pageserver's
/// handling of the re-attach response may take a long time and blocks heartbeats from /// handling of the re-attach response may take a long time and blocks heartbeats from
/// being handled on the pageserver side. /// being handled on the pageserver side.
pub const MAX_WARMING_UP_INTERVAL_DEFAULT: Duration = Duration::from_secs(300); pub const MAX_WARMING_UP_INTERVAL_DEFAULT: Duration = Duration::from_secs(300);

View File

@@ -1,6 +1,7 @@
//! Functionality for finding and purging garbage, as in "garbage collection". Garbage means //! Functionality for finding and purging garbage, as in "garbage collection".
//! S3 objects which are either not referenced by any metadata, or are referenced by a //!
//! control plane tenant/timeline in a deleted state. //! Garbage means S3 objects which are either not referenced by any metadata,
//! or are referenced by a control plane tenant/timeline in a deleted state.
use std::{ use std::{
collections::{HashMap, HashSet}, collections::{HashMap, HashSet},

View File

@@ -74,7 +74,9 @@ pub async fn stream_tenant_shards<'a>(
} }
/// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered /// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered
/// using a listing. The listing is done before the stream is built, so that this /// using a listing.
///
/// The listing is done before the stream is built, so that this
/// function can be used to generate concurrency on a stream using buffer_unordered. /// function can be used to generate concurrency on a stream using buffer_unordered.
pub async fn stream_tenant_timelines<'a>( pub async fn stream_tenant_timelines<'a>(
remote_client: &'a GenericRemoteStorage, remote_client: &'a GenericRemoteStorage,

View File

@@ -440,9 +440,10 @@ async fn gc_ancestor(
Ok(()) Ok(())
} }
/// Physical garbage collection: removing unused S3 objects. This is distinct from the garbage collection /// Physical garbage collection: removing unused S3 objects.
/// done inside the pageserver, which operates at a higher level (keys, layers). This type of garbage collection ///
/// is about removing: /// This is distinct from the garbage collection done inside the pageserver, which operates at a higher level
/// (keys, layers). This type of garbage collection is about removing:
/// - Objects that were uploaded but never referenced in the remote index (e.g. because of a shutdown between /// - Objects that were uploaded but never referenced in the remote index (e.g. because of a shutdown between
/// uploading a layer and uploading an index) /// uploading a layer and uploading an index)
/// - Index objects from historic generations /// - Index objects from historic generations