diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 07cb7edbe7..3459449e15 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -74,6 +74,7 @@ runs: run: ./scripts/pysync - name: Download compatibility snapshot for Postgres 14 + if: inputs.build_type != 'remote' uses: ./.github/actions/download with: name: compatibility-snapshot-${{ inputs.build_type }}-pg14 diff --git a/.github/ansible/prod.ap-southeast-1.hosts.yaml b/.github/ansible/prod.ap-southeast-1.hosts.yaml new file mode 100644 index 0000000000..bb4af91f71 --- /dev/null +++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml @@ -0,0 +1,35 @@ +storage: + vars: + bucket_name: neon-prod-storage-ap-southeast-1 + bucket_region: ap-southeast-1 + console_mgmt_base_url: http://console-release.local + etcd_endpoints: etcd-0.ap-southeast-1.aws.neon.tech:2379 + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: ap-southeast-1 + ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1 + console_region_id: aws-ap-southeast-1 + + children: + pageservers: + hosts: + pageserver-0.ap-southeast-1.aws.neon.tech: + ansible_host: i-064de8ea28bdb495b + pageserver-1.ap-southeast-1.aws.neon.tech: + ansible_host: i-0b180defcaeeb6b93 + + safekeepers: + hosts: + safekeeper-0.ap-southeast-1.aws.neon.tech: + ansible_host: i-0d6f1dc5161eef894 + safekeeper-1.ap-southeast-1.aws.neon.tech: + ansible_host: i-0e338adda8eb2d19f + safekeeper-2.ap-southeast-1.aws.neon.tech: + ansible_host: i-04fb63634e4679eb9 diff --git a/.github/ansible/prod.eu-central-1.hosts.yaml b/.github/ansible/prod.eu-central-1.hosts.yaml new file mode 100644 index 0000000000..68b1579746 --- /dev/null +++ b/.github/ansible/prod.eu-central-1.hosts.yaml @@ -0,0 +1,35 @@ +storage: + vars: + bucket_name: neon-prod-storage-eu-central-1 + bucket_region: eu-central-1 + console_mgmt_base_url: http://console-release.local + etcd_endpoints: etcd-0.eu-central-1.aws.neon.tech:2379 + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: eu-central-1 + ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1 + console_region_id: aws-eu-central-1 + + children: + pageservers: + hosts: + pageserver-0.eu-central-1.aws.neon.tech: + ansible_host: i-0cd8d316ecbb715be + pageserver-1.eu-central-1.aws.neon.tech: + ansible_host: i-090044ed3d383fef0 + + safekeepers: + hosts: + safekeeper-0.eu-central-1.aws.neon.tech: + ansible_host: i-0b238612d2318a050 + safekeeper-1.eu-central-1.aws.neon.tech: + ansible_host: i-07b9c45e5c2637cd4 + safekeeper-2.eu-central-1.aws.neon.tech: + ansible_host: i-020257302c3c93d88 diff --git a/.github/ansible/prod.us-east-2.hosts.yaml b/.github/ansible/prod.us-east-2.hosts.yaml new file mode 100644 index 0000000000..1d54e2ef0a --- /dev/null +++ b/.github/ansible/prod.us-east-2.hosts.yaml @@ -0,0 +1,36 @@ +storage: + vars: + bucket_name: neon-prod-storage-us-east-2 + bucket_region: us-east-2 + console_mgmt_base_url: http://console-release.local + etcd_endpoints: etcd-0.us-east-2.aws.neon.tech:2379 + pageserver_config_stub: + pg_distrib_dir: /usr/local + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: us-east-2 + ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2 + console_region_id: aws-us-east-2 + + children: + pageservers: + hosts: + pageserver-0.us-east-2.aws.neon.tech: + ansible_host: i-062227ba7f119eb8c + pageserver-1.us-east-2.aws.neon.tech: + ansible_host: i-0b3ec0afab5968938 + + safekeepers: + hosts: + safekeeper-0.us-east-2.aws.neon.tech: + ansible_host: i-0e94224750c57d346 + safekeeper-1.us-east-2.aws.neon.tech: + ansible_host: i-06d113fb73bfddeb0 + safekeeper-2.us-east-2.aws.neon.tech: + ansible_host: i-09f66c8e04afff2e8 + diff --git a/.github/ansible/ssm_config b/.github/ansible/ssm_config index 94958b4490..0dc67507f2 100644 --- a/.github/ansible/ssm_config +++ b/.github/ansible/ssm_config @@ -1,3 +1,2 @@ ansible_connection: aws_ssm -ansible_aws_ssm_bucket_name: neon-dev-bucket ansible_python_interpreter: /usr/bin/python3 diff --git a/.github/ansible/staging.us-east-2.hosts.yaml b/.github/ansible/staging.us-east-2.hosts.yaml index db3ed87c45..3bbf5fe8cb 100644 --- a/.github/ansible/staging.us-east-2.hosts.yaml +++ b/.github/ansible/staging.us-east-2.hosts.yaml @@ -14,6 +14,7 @@ storage: hostname_suffix: "" remote_user: ssm-user ansible_aws_ssm_region: us-east-2 + ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2 console_region_id: aws-us-east-2 children: diff --git a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml new file mode 100644 index 0000000000..f90f89a516 --- /dev/null +++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml @@ -0,0 +1,31 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://console-release.local/management/api/v2" + domain: "*.ap-southeast-1.aws.neon.tech" + +# -- Additional labels for neon-proxy pods +podLabels: + zenith_service: proxy-scram + zenith_env: prod + zenith_region: ap-southeast-1 + zenith_region_slug: ap-southeast-1 + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech + +#metrics: +# enabled: true +# serviceMonitor: +# enabled: true +# selector: +# release: kube-prometheus-stack diff --git a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml new file mode 100644 index 0000000000..33a1154099 --- /dev/null +++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml @@ -0,0 +1,31 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://console-release.local/management/api/v2" + domain: "*.eu-central-1.aws.neon.tech" + +# -- Additional labels for neon-proxy pods +podLabels: + zenith_service: proxy-scram + zenith_env: prod + zenith_region: eu-central-1 + zenith_region_slug: eu-central-1 + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech + +#metrics: +# enabled: true +# serviceMonitor: +# enabled: true +# selector: +# release: kube-prometheus-stack diff --git a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml new file mode 100644 index 0000000000..5f9f2d2e66 --- /dev/null +++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml @@ -0,0 +1,31 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://console-release.local/management/api/v2" + domain: "*.us-east-2.aws.neon.tech" + +# -- Additional labels for neon-proxy pods +podLabels: + zenith_service: proxy-scram + zenith_env: prod + zenith_region: us-east-2 + zenith_region_slug: us-east-2 + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech + +#metrics: +# enabled: true +# serviceMonitor: +# enabled: true +# selector: +# release: kube-prometheus-stack diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1b8b380179..91d9561e7d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -625,11 +625,11 @@ jobs: (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' run: | - crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/neon:latest - crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-tools:latest - crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node:latest - crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v14:latest - crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.us-east-2.amazonaws.com/compute-node-v15:latest + crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest + crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest + crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node:latest + crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest + crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest - name: Configure Docker Hub login run: | @@ -756,9 +756,9 @@ jobs: defaults: run: shell: bash - env: - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }} + strategy: + matrix: + target_region: [ us-east-2 ] steps: - name: Checkout uses: actions/checkout@v3 @@ -781,7 +781,47 @@ jobs: fi ansible-galaxy collection install sivel.toiletwater - ansible-playbook deploy.yaml -i staging.us-east-2.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}} + ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}} + rm -f neon_install.tar.gz .neon_current_version + + deploy-prod-new: + runs-on: prod + container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest + # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. + # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly + needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ] + if: | + (github.ref_name == 'release') && + github.event_name != 'workflow_dispatch' + defaults: + run: + shell: bash + strategy: + matrix: + target_region: [ us-east-2, eu-central-1, ap-southeast-1 ] + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + fetch-depth: 0 + + - name: Redeploy + run: | + export DOCKER_TAG=${{needs.tag.outputs.build-tag}} + cd "$(pwd)/.github/ansible" + + if [[ "$GITHUB_REF_NAME" == "main" ]]; then + ./get_binaries.sh + elif [[ "$GITHUB_REF_NAME" == "release" ]]; then + RELEASE=true ./get_binaries.sh + else + echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'" + exit 1 + fi + + ansible-galaxy collection install sivel.toiletwater + ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_PRODUCTION_API_KEY}} rm -f neon_install.tar.gz .neon_current_version deploy-proxy: @@ -837,6 +877,11 @@ jobs: defaults: run: shell: bash + strategy: + matrix: + include: + - target_region: us-east-2 + target_cluster: dev-us-east-2-beta steps: - name: Checkout uses: actions/checkout@v3 @@ -847,12 +892,49 @@ jobs: - name: Configure environment run: | helm repo add neondatabase https://neondatabase.github.io/helm-charts - aws --region us-east-2 eks update-kubeconfig --name dev-us-east-2-beta --role-arn arn:aws:iam::369495373322:role/github-runner + aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }} - name: Re-deploy proxy run: | DOCKER_TAG=${{needs.tag.outputs.build-tag}} - helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s + helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s + + deploy-proxy-prod-new: + runs-on: prod + container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest + # Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently. + needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ] + if: | + (github.ref_name == 'release') && + github.event_name != 'workflow_dispatch' + defaults: + run: + shell: bash + strategy: + matrix: + include: + - target_region: us-east-2 + target_cluster: prod-us-east-2-delta + - target_region: eu-central-1 + target_cluster: prod-eu-central-1-gamma + - target_region: ap-southeast-1 + target_cluster: prod-ap-southeast-1-epsilon + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + fetch-depth: 0 + + - name: Configure environment + run: | + helm repo add neondatabase https://neondatabase.github.io/helm-charts + aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }} + + - name: Re-deploy proxy + run: | + DOCKER_TAG=${{needs.tag.outputs.build-tag}} + helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s promote-compatibility-test-snapshot: runs-on: dev diff --git a/Dockerfile.compute-node-v14 b/Dockerfile.compute-node-v14 index 035dfc0d08..27e15593ad 100644 --- a/Dockerfile.compute-node-v14 +++ b/Dockerfile.compute-node-v14 @@ -13,7 +13,7 @@ ARG TAG=pinned FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ - zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config + zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev ######################################################################################### # @@ -24,7 +24,7 @@ RUN apt update && \ FROM build-deps AS pg-build COPY vendor/postgres-v14 postgres RUN cd postgres && \ - ./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \ + ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ # Install headers diff --git a/Dockerfile.compute-node-v15 b/Dockerfile.compute-node-v15 index 0b6e570b44..567848ffd7 100644 --- a/Dockerfile.compute-node-v15 +++ b/Dockerfile.compute-node-v15 @@ -13,7 +13,7 @@ ARG TAG=pinned FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ - zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config + zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev ######################################################################################### # @@ -24,7 +24,7 @@ RUN apt update && \ FROM build-deps AS pg-build COPY vendor/postgres-v15 postgres RUN cd postgres && \ - ./configure CFLAGS='-O2 -g3' --enable-debug --with-uuid=ossp && \ + ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ # Install headers diff --git a/pageserver/src/tenant/delta_layer.rs b/pageserver/src/tenant/delta_layer.rs index 41715ab0a4..a908d66200 100644 --- a/pageserver/src/tenant/delta_layer.rs +++ b/pageserver/src/tenant/delta_layer.rs @@ -610,9 +610,9 @@ impl DeltaLayer { /// /// 3. Call `finish`. /// -pub struct DeltaLayerWriter { +struct DeltaLayerWriterInner { conf: &'static PageServerConf, - path: PathBuf, + pub path: PathBuf, timeline_id: TimelineId, tenant_id: TenantId, @@ -624,17 +624,17 @@ pub struct DeltaLayerWriter { blob_writer: WriteBlobWriter>, } -impl DeltaLayerWriter { +impl DeltaLayerWriterInner { /// /// Start building a new delta layer. /// - pub fn new( + fn new( conf: &'static PageServerConf, timeline_id: TimelineId, tenant_id: TenantId, key_start: Key, lsn_range: Range, - ) -> Result { + ) -> anyhow::Result { // Create the file initially with a temporary filename. We don't know // the end key yet, so we cannot form the final filename yet. We will // rename it when we're done. @@ -653,7 +653,7 @@ impl DeltaLayerWriter { let block_buf = BlockBuf::new(); let tree_builder = DiskBtreeBuilder::new(block_buf); - Ok(DeltaLayerWriter { + Ok(Self { conf, path, timeline_id, @@ -670,17 +670,17 @@ impl DeltaLayerWriter { /// /// The values must be appended in key, lsn order. /// - pub fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> Result<()> { + fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> { self.put_value_bytes(key, lsn, &Value::ser(&val)?, val.will_init()) } - pub fn put_value_bytes( + fn put_value_bytes( &mut self, key: Key, lsn: Lsn, val: &[u8], will_init: bool, - ) -> Result<()> { + ) -> anyhow::Result<()> { assert!(self.lsn_range.start <= lsn); let off = self.blob_writer.write_blob(val)?; @@ -693,14 +693,14 @@ impl DeltaLayerWriter { Ok(()) } - pub fn size(&self) -> u64 { + fn size(&self) -> u64 { self.blob_writer.size() + self.tree.borrow_writer().size() } /// /// Finish writing the delta layer. /// - pub fn finish(self, key_end: Key) -> anyhow::Result { + fn finish(self, key_end: Key) -> anyhow::Result { let index_start_blk = ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32; @@ -768,6 +768,102 @@ impl DeltaLayerWriter { } } +/// A builder object for constructing a new delta layer. +/// +/// Usage: +/// +/// 1. Create the DeltaLayerWriter by calling DeltaLayerWriter::new(...) +/// +/// 2. Write the contents by calling `put_value` for every page +/// version to store in the layer. +/// +/// 3. Call `finish`. +/// +/// # Note +/// +/// As described in https://github.com/neondatabase/neon/issues/2650, it's +/// possible for the writer to drop before `finish` is actually called. So this +/// could lead to odd temporary files in the directory, exhausting file system. +/// This structure wraps `DeltaLayerWriterInner` and also contains `Drop` +/// implementation that cleans up the temporary file in failure. It's not +/// possible to do this directly in `DeltaLayerWriterInner` since `finish` moves +/// out some fields, making it impossible to implement `Drop`. +/// +#[must_use] +pub struct DeltaLayerWriter { + inner: Option, +} + +impl DeltaLayerWriter { + /// + /// Start building a new delta layer. + /// + pub fn new( + conf: &'static PageServerConf, + timeline_id: TimelineId, + tenant_id: TenantId, + key_start: Key, + lsn_range: Range, + ) -> anyhow::Result { + Ok(Self { + inner: Some(DeltaLayerWriterInner::new( + conf, + timeline_id, + tenant_id, + key_start, + lsn_range, + )?), + }) + } + + /// + /// Append a key-value pair to the file. + /// + /// The values must be appended in key, lsn order. + /// + pub fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> { + self.inner.as_mut().unwrap().put_value(key, lsn, val) + } + + pub fn put_value_bytes( + &mut self, + key: Key, + lsn: Lsn, + val: &[u8], + will_init: bool, + ) -> anyhow::Result<()> { + self.inner + .as_mut() + .unwrap() + .put_value_bytes(key, lsn, val, will_init) + } + + pub fn size(&self) -> u64 { + self.inner.as_ref().unwrap().size() + } + + /// + /// Finish writing the delta layer. + /// + pub fn finish(mut self, key_end: Key) -> anyhow::Result { + self.inner.take().unwrap().finish(key_end) + } +} + +impl Drop for DeltaLayerWriter { + fn drop(&mut self) { + if let Some(inner) = self.inner.take() { + match inner.blob_writer.into_inner().into_inner() { + Ok(vfile) => vfile.remove(), + Err(err) => warn!( + "error while flushing buffer of image layer temporary file: {}", + err + ), + } + } + } +} + /// /// Iterator over all key-value pairse stored in a delta layer /// diff --git a/pageserver/src/tenant/image_layer.rs b/pageserver/src/tenant/image_layer.rs index cbfa0134b0..8409d34bc9 100644 --- a/pageserver/src/tenant/image_layer.rs +++ b/pageserver/src/tenant/image_layer.rs @@ -411,7 +411,7 @@ impl ImageLayer { /// /// 3. Call `finish`. /// -pub struct ImageLayerWriter { +struct ImageLayerWriterInner { conf: &'static PageServerConf, path: PathBuf, timeline_id: TimelineId, @@ -423,14 +423,17 @@ pub struct ImageLayerWriter { tree: DiskBtreeBuilder, } -impl ImageLayerWriter { - pub fn new( +impl ImageLayerWriterInner { + /// + /// Start building a new image layer. + /// + fn new( conf: &'static PageServerConf, timeline_id: TimelineId, tenant_id: TenantId, key_range: &Range, lsn: Lsn, - ) -> anyhow::Result { + ) -> anyhow::Result { // Create the file initially with a temporary filename. // We'll atomically rename it to the final name when we're done. let path = ImageLayer::temp_path_for( @@ -455,7 +458,7 @@ impl ImageLayerWriter { let block_buf = BlockBuf::new(); let tree_builder = DiskBtreeBuilder::new(block_buf); - let writer = ImageLayerWriter { + let writer = Self { conf, path, timeline_id, @@ -474,7 +477,7 @@ impl ImageLayerWriter { /// /// The page versions must be appended in blknum order. /// - pub fn put_image(&mut self, key: Key, img: &[u8]) -> Result<()> { + fn put_image(&mut self, key: Key, img: &[u8]) -> anyhow::Result<()> { ensure!(self.key_range.contains(&key)); let off = self.blob_writer.write_blob(img)?; @@ -485,7 +488,10 @@ impl ImageLayerWriter { Ok(()) } - pub fn finish(self) -> anyhow::Result { + /// + /// Finish writing the image layer. + /// + fn finish(self) -> anyhow::Result { let index_start_blk = ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32; @@ -552,3 +558,76 @@ impl ImageLayerWriter { Ok(layer) } } + +/// A builder object for constructing a new image layer. +/// +/// Usage: +/// +/// 1. Create the ImageLayerWriter by calling ImageLayerWriter::new(...) +/// +/// 2. Write the contents by calling `put_page_image` for every key-value +/// pair in the key range. +/// +/// 3. Call `finish`. +/// +/// # Note +/// +/// As described in https://github.com/neondatabase/neon/issues/2650, it's +/// possible for the writer to drop before `finish` is actually called. So this +/// could lead to odd temporary files in the directory, exhausting file system. +/// This structure wraps `ImageLayerWriterInner` and also contains `Drop` +/// implementation that cleans up the temporary file in failure. It's not +/// possible to do this directly in `ImageLayerWriterInner` since `finish` moves +/// out some fields, making it impossible to implement `Drop`. +/// +#[must_use] +pub struct ImageLayerWriter { + inner: Option, +} + +impl ImageLayerWriter { + /// + /// Start building a new image layer. + /// + pub fn new( + conf: &'static PageServerConf, + timeline_id: TimelineId, + tenant_id: TenantId, + key_range: &Range, + lsn: Lsn, + ) -> anyhow::Result { + Ok(Self { + inner: Some(ImageLayerWriterInner::new( + conf, + timeline_id, + tenant_id, + key_range, + lsn, + )?), + }) + } + + /// + /// Write next value to the file. + /// + /// The page versions must be appended in blknum order. + /// + pub fn put_image(&mut self, key: Key, img: &[u8]) -> anyhow::Result<()> { + self.inner.as_mut().unwrap().put_image(key, img) + } + + /// + /// Finish writing the image layer. + /// + pub fn finish(mut self) -> anyhow::Result { + self.inner.take().unwrap().finish() + } +} + +impl Drop for ImageLayerWriter { + fn drop(&mut self) { + if let Some(inner) = self.inner.take() { + inner.blob_writer.into_inner().remove(); + } + } +} diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6a96254df4..d63429ea6a 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1541,6 +1541,10 @@ impl Timeline { lsn, )?; + fail_point!("image-layer-writer-fail-before-finish", |_| { + anyhow::bail!("failpoint image-layer-writer-fail-before-finish"); + }); + for range in &partition.ranges { let mut key = range.start; while key < range.end { @@ -1835,6 +1839,11 @@ impl Timeline { }, )?); } + + fail_point!("delta-layer-writer-fail-before-finish", |_| { + anyhow::bail!("failpoint delta-layer-writer-fail-before-finish"); + }); + writer.as_mut().unwrap().put_value(key, lsn, value)?; prev_key = Some(key); } diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 896c2603a2..46e4acd50c 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -319,6 +319,12 @@ impl VirtualFile { Ok(result) } + + pub fn remove(self) { + let path = self.path.clone(); + drop(self); + std::fs::remove_file(path).expect("failed to remove the virtual file"); + } } impl Drop for VirtualFile { diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 38a0db7cf7..e7e0e4ce56 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1628,6 +1628,8 @@ class NeonPageserver(PgProtocol): Initializes the repository via `neon init`. """ + TEMP_FILE_SUFFIX = "___temp" + def __init__(self, env: NeonEnv, port: PageserverPort, config_override: Optional[str] = None): super().__init__(host="localhost", port=port.pg, user="cloud_admin") self.env = env diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 944ff64390..20a17e449d 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -19,6 +19,8 @@ from fixtures.neon_fixtures import ( from fixtures.types import Lsn from pytest import FixtureRequest +DEFAILT_LOCAL_SNAPSHOT_DIR = "test_output/test_prepare_snapshot/compatibility_snapshot_pg14" + def dump_differs(first: Path, second: Path, output: Path) -> bool: """ @@ -76,14 +78,18 @@ class PortReplacer(object): raise TypeError(f"unsupported type {type(value)} of {value=}") +@pytest.mark.order(after="test_prepare_snapshot") def test_backward_compatibility( pg_bin: PgBin, port_distributor: PortDistributor, test_output_dir: Path, request: FixtureRequest ): - compatibility_snapshot_dir_env = os.environ.get("COMPATIBILITY_SNAPSHOT_DIR") - assert ( - compatibility_snapshot_dir_env is not None - ), "COMPATIBILITY_SNAPSHOT_DIR is not set. It should be set to `compatibility_snapshot_pg14` path generateted by test_prepare_snapshot" - compatibility_snapshot_dir = Path(compatibility_snapshot_dir_env).resolve() + compatibility_snapshot_dir = Path( + os.environ.get("COMPATIBILITY_SNAPSHOT_DIR", DEFAILT_LOCAL_SNAPSHOT_DIR) + ) + assert compatibility_snapshot_dir.exists(), ( + f"{compatibility_snapshot_dir} doesn't exist. Please run `test_prepare_snapshot` test first " + "to create the snapshot or set COMPATIBILITY_SNAPSHOT_DIR env variable to the existing snapshot" + ) + compatibility_snapshot_dir = compatibility_snapshot_dir.resolve() # Make compatibility snapshot artifacts pickupable by Allure # by copying the snapshot directory to the curent test output directory. @@ -229,7 +235,6 @@ def test_backward_compatibility( assert not initial_dump_differs, "initial dump differs" -@pytest.mark.order(after="test_backward_compatibility") # Note: if renaming this test, don't forget to update a reference to it in a workflow file: # "Upload compatibility snapshot" step in .github/actions/run-python-test-set/action.yml def test_prepare_snapshot(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path): diff --git a/test_runner/regress/test_layer_writers_fail.py b/test_runner/regress/test_layer_writers_fail.py new file mode 100644 index 0000000000..e8ba0e7d91 --- /dev/null +++ b/test_runner/regress/test_layer_writers_fail.py @@ -0,0 +1,92 @@ +import pytest +from fixtures.neon_fixtures import NeonEnv, NeonPageserver + + +@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2703") +def test_image_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, timeline_id = env.neon_cli.create_tenant( + conf={ + # small checkpoint distance to create more delta layer files + "checkpoint_distance": f"{1024 ** 2}", + # set the target size to be large to allow the image layer to cover the whole key space + "compaction_target_size": f"{1024 ** 3}", + # tweak the default settings to allow quickly create image layers and L1 layers + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", + } + ) + + pg = env.postgres.create_start("main", tenant_id=tenant_id) + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", + """INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g""", + ] + ) + + pageserver_http.configure_failpoints(("image-layer-writer-fail-before-finish", "return")) + with pytest.raises(Exception, match="image-layer-writer-fail-before-finish"): + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) + + new_temp_layer_files = list( + filter( + lambda file: str(file).endswith(NeonPageserver.TEMP_FILE_SUFFIX), + [path for path in env.timeline_dir(tenant_id, timeline_id).iterdir()], + ) + ) + + assert ( + len(new_temp_layer_files) == 0 + ), "pageserver should clean its temporary new image layer files on failure" + + +@pytest.mark.skip("See https://github.com/neondatabase/neon/issues/2703") +def test_delta_layer_writer_fail_before_finish(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, timeline_id = env.neon_cli.create_tenant( + conf={ + # small checkpoint distance to create more delta layer files + "checkpoint_distance": f"{1024 ** 2}", + # set the target size to be large to allow the image layer to cover the whole key space + "compaction_target_size": f"{1024 ** 3}", + # tweak the default settings to allow quickly create image layers and L1 layers + "compaction_period": "1 s", + "compaction_threshold": "2", + "image_creation_threshold": "1", + } + ) + + pg = env.postgres.create_start("main", tenant_id=tenant_id) + pg.safe_psql_many( + [ + "CREATE TABLE foo (t text) WITH (autovacuum_enabled = off)", + """INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g""", + ] + ) + + pageserver_http.configure_failpoints(("delta-layer-writer-fail-before-finish", "return")) + # Note: we cannot test whether the exception is exactly 'delta-layer-writer-fail-before-finish' + # since our code does it in loop, we cannot get this exact error for our request. + with pytest.raises(Exception): + pageserver_http.timeline_checkpoint(tenant_id, timeline_id) + + new_temp_layer_files = list( + filter( + lambda file: str(file).endswith(NeonPageserver.TEMP_FILE_SUFFIX), + [path for path in env.timeline_dir(tenant_id, timeline_id).iterdir()], + ) + ) + + assert ( + len(new_temp_layer_files) == 0 + ), "pageserver should clean its temporary new delta layer files on failure" diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index f7c5269e9c..64558b386b 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit f7c5269e9c7e818653ad6fe95ba072d1901c4497 +Subproject commit 64558b386bcd5a3300163ec7ea5d7f31cef8593c