WIP

cr feedback
Add tracing for incoming request
2026-05-15 12:10:37 +00:00 · 2023-02-27 14:38:46 +02:00 · 2023-02-27 13:00:13 +02:00 · 2023-02-27 12:38:56 +02:00 · 2023-02-27 12:24:15 +02:00 · 2023-02-24 23:30:02 +04:00
20 changed files with 428 additions and 84 deletions
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -27,6 +27,8 @@ storage:
          ansible_host:  i-062227ba7f119eb8c
        pageserver-1.us-east-2.aws.neon.tech:
          ansible_host:  i-0b3ec0afab5968938
+        pageserver-2.us-east-2.aws.neon.tech:
+          ansible_host:  i-0d7a1c4325e71421d

    safekeepers:
      hosts:
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -1,6 +1,22 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 604800"]
+terminationGracePeriodSeconds: 604800
+
+
 image:
  repository: neondatabase/neon

--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -1,6 +1,22 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 604800"]
+terminationGracePeriodSeconds: 604800
+
+
 image:
  repository: neondatabase/neon

--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -1,6 +1,22 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 604800"]
+terminationGracePeriodSeconds: 604800
+
+
 image:
  repository: neondatabase/neon

--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,6 +1,22 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 604800"]
+terminationGracePeriodSeconds: 604800
+
+
 image:
  repository: neondatabase/neon

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -854,6 +854,7 @@ dependencies = [
 "opentelemetry",
 "postgres",
 "regex",
+ "reqwest",
 "serde",
 "serde_json",
 "tar",
@@ -4543,6 +4544,7 @@ dependencies = [
 "tracing",
 "tracing-subscriber",
 "url",
+ "uuid",
 "workspace_hack",
 ]

--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -32,11 +32,15 @@ RUN cd postgres && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
    # Enable some of contrib extensions
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control

 #########################################################################################
@@ -60,10 +64,11 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /

+ENV PATH "/usr/local/pgsql/bin:$PATH"
+
 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
    ./autogen.sh && \
-    export PATH="/usr/local/pgsql/bin:$PATH" && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    cd extensions/postgis && \
@@ -77,6 +82,15 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control

+RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
+    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
+    mkdir build && \
+    cd build && \
+    cmake .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
+
 #########################################################################################
 #
 # Layer "plv8-build"
@@ -181,6 +195,36 @@ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b214
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control

+#########################################################################################
+#
+# Layer "hypopg-pg-build"
+# compile hypopg extension
+#
+#########################################################################################
+FROM build-deps AS hypopg-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
+    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
+
+#########################################################################################
+#
+# Layer "pg-hashids-pg-build"
+# compile pg_hashids extension
+#
+#########################################################################################
+FROM build-deps AS pg-hashids-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
+
 #########################################################################################
 # 
 # Layer "rust extensions"
@@ -221,6 +265,8 @@ FROM rust-extensions-build AS pg-jsonschema-pg-build
 RUN git clone --depth=1 --single-branch --branch neon_abi_v0.1.4 https://github.com/vadim2404/pg_jsonschema/ && \
    cd pg_jsonschema && \
    cargo pgx install --release && \
+    # it's needed to enable extension because it uses untrusted C language
+    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_jsonschema.control && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control

 #########################################################################################
@@ -235,6 +281,8 @@ FROM rust-extensions-build AS pg-graphql-pg-build
 RUN git clone --depth=1 --single-branch --branch neon_abi_v1.1.0 https://github.com/vadim2404/pg_graphql && \
    cd pg_graphql && \  
    cargo pgx install --release && \
+    # it's needed to enable extension because it uses untrusted C language
+    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control

 #########################################################################################
@@ -254,6 +302,8 @@ COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -17,6 +17,7 @@ regex.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tar.workspace = true
+reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tracing.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -65,6 +65,9 @@ fn main() -> Result<()> {
    let spec = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

+    let compute_id = matches.get_one::<String>("compute-id");
+    let control_plane_uri = matches.get_one::<String>("control-plane-uri");
+
    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

@@ -77,8 +80,27 @@ fn main() -> Result<()> {
                let path = Path::new(sp);
                let file = File::open(path)?;
                serde_json::from_reader(file)?
+            } else if let Some(id) = compute_id {
+                if let Some(cp_base) = control_plane_uri {
+                    let cp_uri = format!("{cp_base}/management/api/v1/{id}/spec");
+                    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
+                        Ok(v) => v,
+                        Err(_) => "".to_string(),
+                    };
+
+                    reqwest::blocking::Client::new()
+                        .get(cp_uri)
+                        .header("Authorization", jwt)
+                        .send()?
+                        .json()?
+                } else {
+                    panic!(
+                        "must specify --control-plane-uri \"{:#?}\" and --compute-id \"{:#?}\"",
+                        control_plane_uri, compute_id
+                    );
+                }
            } else {
-                panic!("cluster spec should be provided via --spec or --spec-path argument");
+                panic!("compute spec should be provided via --spec or --spec-path argument");
            }
        }
    };
@@ -227,6 +249,18 @@ fn cli() -> clap::Command {
                .long("spec-path")
                .value_name("SPEC_PATH"),
        )
+        .arg(
+            Arg::new("compute-id")
+                .short('i')
+                .long("compute-id")
+                .value_name("COMPUTE_ID"),
+        )
+        .arg(
+            Arg::new("control-plane-uri")
+                .short('p')
+                .long("control-plane-uri")
+                .value_name("CONTROL_PLANE"),
+        )
 }

 #[test]
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -98,6 +98,15 @@ impl RelTag {

        name
    }
+
+    pub fn with_forknum(&self, forknum: u8) -> Self {
+        RelTag {
+            forknum,
+            spcnode: self.spcnode,
+            dbnode: self.dbnode,
+            relnode: self.relnode,
+        }
+    }
 }

 ///
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -39,7 +39,7 @@ pq_proto.workspace = true

 workspace_hack.workspace = true
 url.workspace = true
-
+uuid = { version = "1.2", features = ["v4", "serde"] }
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,13 +4,14 @@ use anyhow::{anyhow, Context};
 use hyper::header::{HeaderName, AUTHORIZATION};
 use hyper::http::HeaderValue;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
+use hyper::{Method, StatusCode};
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
 use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
 use routerify::RequestInfo;
 use routerify::{Middleware, Router, RouterBuilder, RouterService};
 use tokio::task::JoinError;
-use tracing::info;
+use tracing;

 use std::future::Future;
 use std::net::TcpListener;
@@ -27,7 +28,14 @@ static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
 });

 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
-    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
+    // cannot factor out the Level to avoid the repetition
+    // because tracing can only work with const Level
+    // which is not the case here
+    if info.method() == Method::GET && res.status() == StatusCode::OK {
+        tracing::debug!("{} {} {}", info.method(), info.uri().path(), res.status());
+    } else {
+        tracing::info!("{} {} {}", info.method(), info.uri().path(), res.status());
+    }
    Ok(res)
 }

@@ -55,8 +63,26 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    Ok(response)
 }

+pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
+) -> Middleware<B, ApiError> {
+    Middleware::pre(move |mut req| async move {
+        let headers = req.headers_mut();
+        let name = HeaderName::from_str("UUID").expect("created header name");
+        let request_id = uuid::Uuid::new_v4().to_string();
+        let value = HeaderValue::from_str(&request_id).unwrap();
+        headers.insert(name, value);
+        if req.method() == Method::GET {
+            tracing::debug!("{} {} {}", req.method(), req.uri().path(), request_id);
+        } else {
+            tracing::info!("{} {} {}", req.method(), req.uri().path(), request_id);
+        }
+        Ok(req)
+    })
+}
+
 pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
    Router::builder()
+        .middleware(add_request_id_middleware())
        .middleware(Middleware::post_with_info(logger))
        .get("/metrics", prometheus_metrics_handler)
        .err_handler(error::handler)
@@ -203,7 +229,7 @@ pub fn serve_thread_main<S>(
 where
    S: Future<Output = ()> + Send + Sync,
 {
-    info!("Starting an HTTP endpoint at {}", listener.local_addr()?);
+    tracing::info!("Starting an HTTP endpoint at {}", listener.local_addr()?);

    // Create a Service from the router above to handle incoming requests.
    let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -33,6 +33,7 @@ use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
 use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
+use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
 use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
@@ -190,14 +191,31 @@ where
        {
            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

-            // Gather and send relational files in each database if full backup is requested.
-            if self.full_backup {
-                for rel in self
-                    .timeline
-                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
-                    .await?
-                {
-                    self.add_rel(rel).await?;
+            // If full backup is requested, include all relation files.
+            // Otherwise only include init forks of unlogged relations.
+            let rels = self
+                .timeline
+                .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                .await?;
+            for &rel in rels.iter() {
+                // Send init fork as main fork to provide well formed empty
+                // contents of UNLOGGED relations. Postgres copies it in
+                // `reinit.c` during recovery.
+                if rel.forknum == INIT_FORKNUM {
+                    // I doubt we need _init fork itself, but having it at least
+                    // serves as a marker relation is unlogged.
+                    self.add_rel(rel, rel).await?;
+                    self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;
+                    continue;
+                }
+
+                if self.full_backup {
+                    if rel.forknum == MAIN_FORKNUM && rels.contains(&rel.with_forknum(INIT_FORKNUM))
+                    {
+                        // skip this, will include it when we reach the init fork
+                        continue;
+                    }
+                    self.add_rel(rel, rel).await?;
                }
            }
        }
@@ -220,15 +238,16 @@ where
        Ok(())
    }

-    async fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
+    /// Add contents of relfilenode `src`, naming it as `dst`.
+    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> {
        let nblocks = self
            .timeline
-            .get_rel_size(tag, self.lsn, false, self.ctx)
+            .get_rel_size(src, self.lsn, false, self.ctx)
            .await?;

        // If the relation is empty, create an empty file
        if nblocks == 0 {
-            let file_name = tag.to_segfile_name(0);
+            let file_name = dst.to_segfile_name(0);
            let header = new_tar_header(&file_name, 0)?;
            self.ar.append(&header, &mut io::empty()).await?;
            return Ok(());
@@ -244,12 +263,12 @@ where
            for blknum in startblk..endblk {
                let img = self
                    .timeline
-                    .get_rel_page_at_lsn(tag, blknum, self.lsn, false, self.ctx)
+                    .get_rel_page_at_lsn(src, blknum, self.lsn, false, self.ctx)
                    .await?;
                segment_data.extend_from_slice(&img[..]);
            }

-            let file_name = tag.to_segfile_name(seg as u32);
+            let file_name = dst.to_segfile_name(seg as u32);
            let header = new_tar_header(&file_name, segment_data.len() as u64)?;
            self.ar.append(&header, segment_data.as_slice()).await?;

--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,9 +1,14 @@
 use std::collections::HashMap;
+use std::str::FromStr;
 use std::sync::Arc;

 use anyhow::{anyhow, Context, Result};
-use hyper::StatusCode;
+use futures::Future;
+use hyper::body::HttpBody;
+use hyper::header::HeaderName;
+use hyper::http::HeaderValue;
 use hyper::{Body, Request, Response, Uri};
+use hyper::{Method, StatusCode};
 use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
 use remote_storage::GenericRemoteStorage;
@@ -1157,6 +1162,42 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
            evict_timeline_layer_handler,
        )
-        .get("/v1/panic", always_panic_handler)
+        .get(
+            "/v1/panic",
+            wrap_span("always_panic_handler", always_panic_handler),
+        )
        .any(handler_404))
 }
+
+fn wrap_span<H, R, B, E>(
+    handler_name: &'static str,
+    handler: H,
+) -> impl Fn(Request<hyper::Body>) -> R
+where
+    H: Fn(Request<hyper::Body>) -> R + Send + Sync + 'static,
+    B: HttpBody + Send + Sync + 'static,
+    E: Into<Box<dyn std::error::Error + Send + Sync>>,
+    R: Future<Output = Result<Response<B>, E>> + Send + 'static,
+{
+    move |r| -> R {
+        async {
+            let headers = r.headers_mut();
+            let name = HeaderName::from_str("UUID").expect("created header name");
+            let request_id = "foo";
+            let value = HeaderValue::from_str(&request_id).unwrap();
+            headers.insert(name, value);
+            if r.method() == Method::GET {
+                tracing::debug!("{} {} {}", r.method(), r.uri().path(), request_id);
+            } else {
+                tracing::info!("{} {} {}", r.method(), r.uri().path(), request_id);
+            }
+            handler(r)
+                .instrument(info_span!(
+                    "request",
+                    handler = handler_name,
+                    request_id = request_id
+                ))
+                .await
+        }
+    }
+}
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -37,7 +37,7 @@ use crate::walrecord::*;
 use crate::ZERO_PAGE;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
-use postgres_ffi::relfile_utils::{FSM_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
+use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::v14::nonrelfile_utils::mx_offset_to_member_segment;
 use postgres_ffi::v14::xlog_utils::*;
 use postgres_ffi::v14::CheckPoint;
@@ -762,7 +762,7 @@ impl<'a> WalIngest<'a> {
        )?;

        for xnode in &parsed.xnodes {
-            for forknum in MAIN_FORKNUM..=VISIBILITYMAP_FORKNUM {
+            for forknum in MAIN_FORKNUM..=INIT_FORKNUM {
                let rel = RelTag {
                    forknum,
                    spcnode: xnode.spcnode,
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1669,7 +1669,7 @@ class AbstractNeonCli(abc.ABC):
            timeout=timeout,
        )
        if not res.returncode:
-            log.info(f"Run success: {res.stdout}")
+            log.info(f"Run {res.args} success: {res.stdout}")
        elif check_return_code:
            # this way command output will be in recorded and shown in CI in failure message
            msg = f"""\
@@ -3463,6 +3463,14 @@ def wait_for_last_flush_lsn(
    return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)


+def wait_for_wal_insert_lsn(
+    env: NeonEnv, pg: Postgres, tenant: TenantId, timeline: TimelineId
+) -> Lsn:
+    """Wait for pageserver to catch up the latest flush LSN, returns the last observed lsn."""
+    last_flush_lsn = Lsn(pg.safe_psql("SELECT pg_current_wal_insert_lsn()")[0][0])
+    return wait_for_last_record_lsn(env.pageserver.http_client(), tenant, timeline, last_flush_lsn)
+
+
 def fork_at_current_lsn(
    env: NeonEnv,
    pg: Postgres,
--- a/test_runner/regress/test_tenant_size.py
+++ b/test_runner/regress/test_tenant_size.py
@@ -3,8 +3,15 @@ from typing import List, Tuple

 import pytest
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, wait_for_last_flush_lsn
-from fixtures.types import Lsn
+from fixtures.neon_fixtures import (
+    NeonEnv,
+    NeonEnvBuilder,
+    PageserverHttpClient,
+    Postgres,
+    wait_for_last_flush_lsn,
+    wait_for_wal_insert_lsn,
+)
+from fixtures.types import Lsn, TenantId, TimelineId


 def test_empty_tenant_size(neon_simple_env: NeonEnv, test_output_dir: Path):
@@ -324,7 +331,7 @@ def test_single_branch_get_tenant_size_grows(
    # inserts is larger than gc_horizon. for example 0x20000 here hid the fact
    # that there next_gc_cutoff could be smaller than initdb_lsn, which will
    # obviously lead to issues when calculating the size.
-    gc_horizon = 0x30000
+    gc_horizon = 0x38000
    neon_env_builder.pageserver_config_override = f"tenant_config={{compaction_period='0s', gc_period='0s', pitr_interval='0sec', gc_horizon={gc_horizon}}}"

    env = neon_env_builder.init_start()
@@ -334,29 +341,75 @@ def test_single_branch_get_tenant_size_grows(

    http_client = env.pageserver.http_client()

-    collected_responses: List[Tuple[Lsn, int]] = []
+    collected_responses: List[Tuple[str, Lsn, int]] = []

    size_debug_file = open(test_output_dir / "size_debug.html", "w")

-    def check_size_change(current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev: int):
-        if current_lsn - initdb_lsn > gc_horizon:
+    def check_size_change(
+        current_lsn: Lsn, initdb_lsn: Lsn, gc_horizon: int, size: int, prev_size: int
+    ):
+        if current_lsn - initdb_lsn >= gc_horizon:
            assert (
-                size >= prev
+                size >= prev_size
            ), "tenant_size may grow or not grow, because we only add gc_horizon amount of WAL to initial snapshot size"
        else:
            assert (
-                size > prev
+                size > prev_size
            ), "tenant_size should grow, because we continue to add WAL to initial snapshot size"

-    with env.postgres.create_start(branch_name, tenant_id=tenant_id) as pg:
-        initdb_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+    def get_current_consistent_size(
+        env: NeonEnv,
+        pg: Postgres,
+        size_debug_file,  # apparently there is no public signature for open()...
+        http_client: PageserverHttpClient,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Tuple[Lsn, int]:
+        consistent = False
+        size_debug = None
+
+        current_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
+        # We want to make sure we have a self-consistent set of values.
+        # Size changes with WAL, so only if both before and after getting
+        # the size of the tenant reports the same WAL insert LSN, we're OK
+        # to use that (size, LSN) combination.
+        # Note that 'wait_for_wal_flush_lsn' is not accurate enough: There
+        # can be more wal after the flush LSN that can arrive on the
+        # pageserver before we're requesting the page size.
+        # Anyway, in general this is only one iteration, so in general
+        # this is fine.
+        while not consistent:
+            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            size_debug = http_client.tenant_size_debug(tenant_id)
+
+            after_lsn = wait_for_wal_insert_lsn(env, pg, tenant_id, timeline_id)
+            consistent = current_lsn == after_lsn
+            current_lsn = after_lsn
+        size_debug_file.write(size_debug)
+        return (current_lsn, size)
+
+    with env.postgres.create_start(
+        branch_name,
+        tenant_id=tenant_id,
+        ### autovacuum is disabled to limit WAL logging.
+        config_lines=["autovacuum=off"],
+    ) as pg:
+        (initdb_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        collected_responses.append(("INITDB", initdb_lsn, size))
+
        with pg.cursor() as cur:
-            cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL)")
+            cur.execute("CREATE TABLE t0 (i BIGINT NOT NULL) WITH (fillfactor = 40)")
+
+        (current_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )
+        collected_responses.append(("CREATE", current_lsn, size))

        batch_size = 100

-        i = 0
-        while True:
+        for i in range(3):
            with pg.cursor() as cur:
                cur.execute(
                    f"INSERT INTO t0(i) SELECT i FROM generate_series({batch_size} * %s, ({batch_size} * (%s + 1)) - 1) s(i)",
@@ -365,27 +418,24 @@ def test_single_branch_get_tenant_size_grows(

            i += 1

-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )

-            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            prev_size = collected_responses[-1][2]
+            if size == 0:
+                assert prev_size == 0
+            else:
+                # branch start shouldn't be past gc_horizon yet
+                # thus the size should grow as we insert more data
+                # "gc_horizon" is tuned so that it kicks in _after_ the
+                # insert phase, but before the update phase ends.
+                assert (
+                    current_lsn - initdb_lsn <= gc_horizon
+                ), "Tuning of GC window is likely out-of-date"
+                assert size > prev_size

-            size_debug = http_client.tenant_size_debug(tenant_id)
-            size_debug_file.write(size_debug)
-
-            if len(collected_responses) > 0:
-                prev = collected_responses[-1][1]
-                if size == 0:
-                    assert prev == 0
-                else:
-                    # branch start shouldn't be past gc_horizon yet
-                    # thus the size should grow as we insert more data
-                    assert current_lsn - initdb_lsn <= gc_horizon
-                    assert size > prev
-
-            collected_responses.append((current_lsn, size))
-
-            if len(collected_responses) > 2:
-                break
+            collected_responses.append(("INSERT", current_lsn, size))

        while True:
            with pg.cursor() as cur:
@@ -397,18 +447,15 @@ def test_single_branch_get_tenant_size_grows(
            if updated == 0:
                break

-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )

-            size, sizes = http_client.tenant_size_and_modelinputs(tenant_id)
+            prev_size = collected_responses[-1][2]

-            size_debug = http_client.tenant_size_debug(tenant_id)
-            size_debug_file.write(size_debug)
+            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)

-            prev = collected_responses[-1][1]
-
-            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
-
-            collected_responses.append((current_lsn, size))
+            collected_responses.append(("UPDATE", current_lsn, size))

        while True:
            with pg.cursor() as cur:
@@ -418,40 +465,47 @@ def test_single_branch_get_tenant_size_grows(
            if deleted == 0:
                break

-            current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+            (current_lsn, size) = get_current_consistent_size(
+                env, pg, size_debug_file, http_client, tenant_id, timeline_id
+            )

-            size = http_client.tenant_size(tenant_id)
-            prev = collected_responses[-1][1]
+            prev_size = collected_responses[-1][2]

-            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
+            check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)

-            collected_responses.append((current_lsn, size))
+            collected_responses.append(("DELETE", current_lsn, size))

        with pg.cursor() as cur:
            cur.execute("DROP TABLE t0")

-        current_lsn = wait_for_last_flush_lsn(env, pg, tenant_id, timeline_id)
+        # The size of the tenant should still be as large as before we dropped
+        # the table, because the drop operation can still be undone in the PITR
+        # defined by gc_horizon.
+        (current_lsn, size) = get_current_consistent_size(
+            env, pg, size_debug_file, http_client, tenant_id, timeline_id
+        )

-        size = http_client.tenant_size(tenant_id)
-        prev = collected_responses[-1][1]
+        prev_size = collected_responses[-1][2]

-        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev)
+        check_size_change(current_lsn, initdb_lsn, gc_horizon, size, prev_size)

-        collected_responses.append((current_lsn, size))
+        collected_responses.append(("DROP", current_lsn, size))

    # this isn't too many lines to forget for a while. observed while
    # developing these tests that locally the value is a bit more than what we
    # get in the ci.
-    for lsn, size in collected_responses:
-        log.info(f"collected: {lsn}, {size}")
+    for phase, lsn, size in collected_responses:
+        log.info(f"collected: {phase}, {lsn}, {size}")

    env.pageserver.stop()
    env.pageserver.start()

+    size_after = http_client.tenant_size(tenant_id)
+    size_debug = http_client.tenant_size_debug(tenant_id)
+    size_debug_file.write(size_debug)
    size_debug_file.close()

-    size_after = http_client.tenant_size(tenant_id)
-    prev = collected_responses[-1][1]
+    prev = collected_responses[-1][2]

    assert size_after == prev, "size after restarting pageserver should not have changed"

--- a/test_runner/regress/test_unlogged.py
+++ b/test_runner/regress/test_unlogged.py
@@ -0,0 +1,34 @@
+from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
+
+
+#
+# Test UNLOGGED tables/relations. Postgres copies init fork contents to main
+# fork to reset them during recovery. In Neon, pageserver directly sends init
+# fork contents as main fork during basebackup.
+#
+def test_unlogged(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+    env.neon_cli.create_branch("test_unlogged", "empty")
+    pg = env.postgres.create_start("test_unlogged")
+
+    conn = pg.connect()
+    cur = conn.cursor()
+
+    cur.execute("CREATE UNLOGGED TABLE iut (id int);")
+    # create index to test unlogged index relation as well
+    cur.execute("CREATE UNIQUE INDEX iut_idx ON iut (id);")
+    cur.execute("INSERT INTO iut values (42);")
+
+    # create another compute to fetch inital empty contents from pageserver
+    fork_at_current_lsn(env, pg, "test_unlogged_basebackup", "test_unlogged")
+    pg2 = env.postgres.create_start(
+        "test_unlogged_basebackup",
+    )
+
+    conn2 = pg2.connect()
+    cur2 = conn2.cursor()
+    # after restart table should be empty but valid
+    cur2.execute("PREPARE iut_plan (int) AS INSERT INTO iut VALUES ($1)")
+    cur2.execute("EXECUTE iut_plan (43);")
+    cur2.execute("SELECT * FROM iut")
+    assert cur2.fetchall() == [(43,)]
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
Author	SHA1	Message	Date
Dmitry Rodionov	39fbac354f	WIP	2023-02-27 14:38:46 +02:00
Shany Pozin	84a8089ae7	cr feedback	2023-02-27 13:00:13 +02:00
Shany Pozin	6ce1638df2	Add tracing for incoming request	2023-02-27 12:38:56 +02:00
Shany Pozin	cf4965f95b	Add UUID header to mgmt API	2023-02-27 12:24:15 +02:00
Heikki Linnakangas	f51b48fa49	Fix UNLOGGED tables. Instead of trying to create missing files on the way, send init fork contents as main fork from pageserver during basebackup. Add test for that. Call put_rel_drop for init forks; previously they weren't removed. Bump vendor/postgres to revert previous approach on Postgres side. Co-authored-by: Arseny Sher <sher-ars@yandex.ru> ref https://github.com/neondatabase/postgres/pull/264 ref https://github.com/neondatabase/postgres/pull/259 ref https://github.com/neondatabase/neon/issues/1222	2023-02-24 23:30:02 +04:00
Sergey Melnikov	9f906ff236	Add pageserver-2.us-east-2.aws.neon.tech (#3701 )	2023-02-23 19:56:21 +01:00
Sam Kleinman	c79dd8d458	compute_ctl: support for fetching spec from control plane (#3610 )	2023-02-23 13:19:39 -05:00
Vadim Kharitonov	ec4ecdd543	Enable postgres SPI extensions	2023-02-23 16:49:37 +01:00
MMeent	20a4d817ce	Update vendored PostgreSQL versions to 14.7 and 15.2 (#3581 ) ## Describe your changes Rebase vendored PostgreSQL onto 14.7 and 15.2 ## Issue ticket number and link #3579 ## Checklist before requesting a review - [x] I have performed a self-review of my code. - [x] If it is a core feature, I have added thorough tests. - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard? - [x] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section. ``` The version of PostgreSQL that we use is updated to 14.7 for PostgreSQL 14 and 15.2 for PostgreSQL 15. ```	2023-02-23 16:10:22 +02:00
Vadim Kharitonov	5ebf7e5619	Fix `pg_jsonschema` and `pg_graphql`	2023-02-23 10:43:46 +01:00
Arseny Sher	0692fffbf3	Bump vendor/postgres to include hotfix for unlogged tables with indexes. https://github.com/neondatabase/postgres/pull/259 https://github.com/neondatabase/postgres/pull/262	2023-02-23 01:34:59 +04:00
Vadim Kharitonov	093570af20	Compile `pg_hashids` extension	2023-02-22 21:00:25 +01:00
Dmitry Rodionov	eb403da814	Use debug level for successful GET http requests (#3681 ) We started rather frequently scrap some apis for metadata. This includes layer eviction tester, I believe console does that too. It should eliminate these logs: https://neonprod.grafana.net/goto/rr_ace1Vz?orgId=1 (Note the rate around 2k messages per minute)	2023-02-22 22:19:05 +03:00
Vadim Kharitonov	f3ad635911	Compile `pgrouting` extension	2023-02-22 20:16:11 +01:00
Vadim Kharitonov	a8d7360881	Compile `hypopg` extension	2023-02-22 20:14:30 +01:00
Lassi Pölönen	b0311cfdeb	Change the production neon-proxy-scram update strategy to RollingUpdate (#3683 ) ## Describe your changes The same change in production as was done in staging by https://github.com/neondatabase/neon/pull/3678 ## Issue ticket number and link https://github.com/neondatabase/neon/issues/3333	2023-02-22 20:15:37 +02:00