diff --git a/.config/hakari.toml b/.config/hakari.toml index 12d2d1bf9c..15b939e86f 100644 --- a/.config/hakari.toml +++ b/.config/hakari.toml @@ -4,7 +4,7 @@ hakari-package = "workspace_hack" # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above. -dep-format-version = "3" +dep-format-version = "4" # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. # Hakari works much better with the new feature resolver. diff --git a/.github/PULL_REQUEST_TEMPLATE/release-pr.md b/.github/PULL_REQUEST_TEMPLATE/release-pr.md index a848077e6a..1e18fd5d44 100644 --- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md +++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md @@ -10,6 +10,7 @@ ### Checklist after release +- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them). - [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files)) - [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel - [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true) diff --git a/.github/actions/allure-report/action.yml b/.github/actions/allure-report/action.yml index e35cbb20fd..9a1037064a 100644 --- a/.github/actions/allure-report/action.yml +++ b/.github/actions/allure-report/action.yml @@ -45,12 +45,12 @@ runs: shell: bash -euxo pipefail {0} run: | if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then - echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" + echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only" exit 1 fi if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then - echo 2>&1 "inputs.test_selection must be set for 'store' action" + echo >&2 "inputs.test_selection must be set for 'store' action" exit 2 fi diff --git a/.github/actions/download/action.yml b/.github/actions/download/action.yml index eb34d4206a..d3f9bc0414 100644 --- a/.github/actions/download/action.yml +++ b/.github/actions/download/action.yml @@ -37,7 +37,7 @@ runs: echo 'SKIPPED=true' >> $GITHUB_OUTPUT exit 0 else - echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" + echo >&2 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist" exit 1 fi fi diff --git a/.github/actions/neon-branch-create/action.yml b/.github/actions/neon-branch-create/action.yml index 7ee43a3587..f1eea34ab9 100644 --- a/.github/actions/neon-branch-create/action.yml +++ b/.github/actions/neon-branch-create/action.yml @@ -58,7 +58,7 @@ runs: done if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then - echo 2>&1 "Failed to create branch after 10 attempts, the latest response was: ${branch}" + echo >&2 "Failed to create branch after 10 attempts, the latest response was: ${branch}" exit 1 fi @@ -122,7 +122,7 @@ runs: done if [ -z "${password}" ] || [ "${password}" == "null" ]; then - echo 2>&1 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}" + echo >&2 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}" exit 1 fi diff --git a/.github/actions/neon-branch-delete/action.yml b/.github/actions/neon-branch-delete/action.yml index 5689093e2e..f8cd351dd9 100644 --- a/.github/actions/neon-branch-delete/action.yml +++ b/.github/actions/neon-branch-delete/action.yml @@ -48,7 +48,7 @@ runs: done if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then - echo 2>&1 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}" + echo >&2 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}" exit 1 fi env: diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 11f5c78f19..115f555913 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -202,7 +202,7 @@ runs: prefix: latest - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: store diff --git a/.github/actions/upload/action.yml b/.github/actions/upload/action.yml index 291a2cf3b0..63973dfbe7 100644 --- a/.github/actions/upload/action.yml +++ b/.github/actions/upload/action.yml @@ -23,7 +23,7 @@ runs: mkdir -p $(dirname $ARCHIVE) if [ -f ${ARCHIVE} ]; then - echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before" + echo >&2 "File ${ARCHIVE} already exist. Something went wrong before" exit 1 fi @@ -33,10 +33,10 @@ runs: elif [ -f ${SOURCE} ]; then time tar -cf ${ARCHIVE} --zstd ${SOURCE} elif ! ls ${SOURCE} > /dev/null 2>&1; then - echo 2>&1 "${SOURCE} does not exist" + echo >&2 "${SOURCE} does not exist" exit 2 else - echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it" + echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it" exit 3 fi diff --git a/.github/ansible/prod.ap-southeast-1.hosts.yaml b/.github/ansible/prod.ap-southeast-1.hosts.yaml index c185086eef..9c53733491 100644 --- a/.github/ansible/prod.ap-southeast-1.hosts.yaml +++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.eu-central-1.hosts.yaml b/.github/ansible/prod.eu-central-1.hosts.yaml index 0a0f974ea4..3186519ca8 100644 --- a/.github/ansible/prod.eu-central-1.hosts.yaml +++ b/.github/ansible/prod.eu-central-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.us-east-1.hosts.yaml b/.github/ansible/prod.us-east-1.hosts.yaml new file mode 100644 index 0000000000..b5b2b076bb --- /dev/null +++ b/.github/ansible/prod.us-east-1.hosts.yaml @@ -0,0 +1,50 @@ +storage: + vars: + bucket_name: neon-prod-storage-us-east-1 + bucket_region: us-east-1 + console_mgmt_base_url: http://neon-internal-api.aws.neon.tech + broker_endpoint: http://storage-broker-lb.theta.us-east-1.internal.aws.neon.tech:50051 + pageserver_config_stub: + pg_distrib_dir: /usr/local + metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events + metric_collection_interval: 10min + disk_usage_based_eviction: + max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80 + min_avail_bytes: 0 + period: "10s" + tenant_config: + eviction_policy: + kind: "LayerAccessThreshold" + period: "10m" + threshold: &default_eviction_threshold "24h" + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: us-east-1 + ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-1 + console_region_id: aws-us-east-1 + sentry_environment: production + + children: + pageservers: + hosts: + pageserver-0.us-east-1.aws.neon.tech: + ansible_host: i-085222088b0d2e0c7 + pageserver-1.us-east-1.aws.neon.tech: + ansible_host: i-0969d4f684d23a21e + pageserver-2.us-east-1.aws.neon.tech: + ansible_host: i-05dee87895da58dad + + safekeepers: + hosts: + safekeeper-0.us-east-1.aws.neon.tech: + ansible_host: i-04ce739e88793d864 + safekeeper-1.us-east-1.aws.neon.tech: + ansible_host: i-0e9e6c9227fb81410 + safekeeper-2.us-east-1.aws.neon.tech: + ansible_host: i-072f4dd86a327d52f diff --git a/.github/ansible/prod.us-east-2.hosts.yaml b/.github/ansible/prod.us-east-2.hosts.yaml index 4427bb344e..3062475b20 100644 --- a/.github/ansible/prod.us-east-2.hosts.yaml +++ b/.github/ansible/prod.us-east-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/prod.us-west-2.hosts.yaml b/.github/ansible/prod.us-west-2.hosts.yaml index 53626b4f59..9cf847bcb1 100644 --- a/.github/ansible/prod.us-west-2.hosts.yaml +++ b/.github/ansible/prod.us-west-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "10m" threshold: &default_eviction_threshold "24h" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" @@ -34,7 +34,7 @@ storage: pageservers: hosts: pageserver-0.us-west-2.aws.neon.tech: - ansible_host: i-0d9f6dfae0e1c780d + ansible_host: i-0d9f6dfae0e1c780d pageserver-1.us-west-2.aws.neon.tech: ansible_host: i-0c834be1dddba8b3f pageserver-2.us-west-2.aws.neon.tech: @@ -49,5 +49,5 @@ storage: safekeeper-1.us-west-2.aws.neon.tech: ansible_host: i-074682f9d3c712e7c safekeeper-2.us-west-2.aws.neon.tech: - ansible_host: i-042b7efb1729d7966 - + ansible_host: i-042b7efb1729d7966 + diff --git a/.github/ansible/staging.eu-central-1.hosts.yaml b/.github/ansible/staging.eu-central-1.hosts.yaml new file mode 100644 index 0000000000..db1d1adcff --- /dev/null +++ b/.github/ansible/staging.eu-central-1.hosts.yaml @@ -0,0 +1,47 @@ +storage: + vars: + bucket_name: neon-dev-storage-eu-central-1 + bucket_region: eu-central-1 + # We only register/update storage in one preview console and manually copy to other instances + console_mgmt_base_url: http://neon-internal-api.helium.aws.neon.build + broker_endpoint: http://storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build:50051 + pageserver_config_stub: + pg_distrib_dir: /usr/local + metric_collection_endpoint: http://neon-internal-api.helium.aws.neon.build/billing/api/v1/usage_events + metric_collection_interval: 10min + disk_usage_based_eviction: + max_usage_pct: 80 + min_avail_bytes: 0 + period: "10s" + tenant_config: + eviction_policy: + kind: "LayerAccessThreshold" + period: "20m" + threshold: &default_eviction_threshold "20m" + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + remote_storage: + bucket_name: "{{ bucket_name }}" + bucket_region: "{{ bucket_region }}" + prefix_in_bucket: "pageserver/v1" + safekeeper_s3_prefix: safekeeper/v1/wal + hostname_suffix: "" + remote_user: ssm-user + ansible_aws_ssm_region: eu-central-1 + ansible_aws_ssm_bucket_name: neon-dev-storage-eu-central-1 + console_region_id: aws-eu-central-1 + sentry_environment: staging + + children: + pageservers: + hosts: + pageserver-0.eu-central-1.aws.neon.build: + ansible_host: i-011f93ec26cfba2d4 + + safekeepers: + hosts: + safekeeper-0.eu-central-1.aws.neon.build: + ansible_host: i-0ff026d27babf8ddd + safekeeper-1.eu-central-1.aws.neon.build: + ansible_host: i-03983a49ee54725d9 + safekeeper-2.eu-central-1.aws.neon.build: + ansible_host: i-0bd025ecdb61b0db3 diff --git a/.github/ansible/staging.eu-west-1.hosts.yaml b/.github/ansible/staging.eu-west-1.hosts.yaml index 34c8e77280..39f5613935 100644 --- a/.github/ansible/staging.eu-west-1.hosts.yaml +++ b/.github/ansible/staging.eu-west-1.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "20m" threshold: &default_eviction_threshold "20m" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" diff --git a/.github/ansible/staging.us-east-2.hosts.yaml b/.github/ansible/staging.us-east-2.hosts.yaml index 94f2be83a4..fb218c443d 100644 --- a/.github/ansible/staging.us-east-2.hosts.yaml +++ b/.github/ansible/staging.us-east-2.hosts.yaml @@ -17,7 +17,7 @@ storage: kind: "LayerAccessThreshold" period: "20m" threshold: &default_eviction_threshold "20m" - evictions_low_residence_duration_metric_threshold: *default_eviction_threshold + evictions_low_residence_duration_metric_threshold: *default_eviction_threshold remote_storage: bucket_name: "{{ bucket_name }}" bucket_region: "{{ bucket_region }}" @@ -48,9 +48,9 @@ storage: hosts: safekeeper-0.us-east-2.aws.neon.build: ansible_host: i-027662bd552bf5db0 - safekeeper-1.us-east-2.aws.neon.build: - ansible_host: i-0171efc3604a7b907 safekeeper-2.us-east-2.aws.neon.build: ansible_host: i-0de0b03a51676a6ce + safekeeper-3.us-east-2.aws.neon.build: + ansible_host: i-05f8ba2cda243bd18 safekeeper-99.us-east-2.aws.neon.build: ansible_host: i-0d61b6a2ea32028d5 diff --git a/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml new file mode 100644 index 0000000000..aaa1ec59b4 --- /dev/null +++ b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml @@ -0,0 +1,52 @@ +# Helm chart values for neon-storage-broker +podLabels: + neon_env: staging + neon_service: storage-broker + +# Use L4 LB +service: + # service.annotations -- Annotations to add to the service + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet + # assign service to this name at external-dns + external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build + # service.type -- Service type + type: LoadBalancer + # service.port -- broker listen port + port: 50051 + +ingress: + enabled: false + +metrics: + enabled: false + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-storage-broker.fullname\" . }}" + labels: + helm.sh/chart: neon-storage-broker-{{ .Chart.Version }} + app.kubernetes.io/name: neon-storage-broker + app.kubernetes.io/instance: neon-storage-broker + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-storage-broker" + endpoints: + - port: broker + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" + +settings: + sentryEnvironment: "staging" diff --git a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml index 2307856464..a7d8587ec2 100644 --- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 image: @@ -23,6 +23,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.eu-west-1.aws.neon.build" + otelExporterOtlpEndpoint: "https://otel-collector.zeta.eu-west-1.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml index feca05aff6..893e0fab10 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml @@ -9,6 +9,7 @@ settings: authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/" uri: "https://console.stage.neon.tech/psql_session/" domain: "pg.neon.build" + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" metricCollectionInterval: "1min" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml index feee1b369a..77f6cf080e 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml @@ -1,6 +1,22 @@ # Helm chart values for neon-proxy-scram. # This is a YAML-formatted file. +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +# Delay the kill signal by 5 minutes (5 * 60) +# The pod(s) will stay in Terminating, keeps the existing connections +# but doesn't receive new ones +containerLifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 300"] +terminationGracePeriodSeconds: 604800 + + image: repository: neondatabase/neon @@ -8,6 +24,7 @@ settings: authBackend: "console" authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.cloud.stage.neon.tech" + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" diff --git a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml index 2a8f028f3b..2510d624cd 100644 --- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml +++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml @@ -7,15 +7,16 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 + image: repository: neondatabase/neon @@ -24,6 +25,7 @@ settings: authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2" domain: "*.us-east-2.aws.neon.build" extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"] + otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build" sentryEnvironment: "staging" wssPort: 8443 metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events" diff --git a/.github/helm-values/preview-template.neon-proxy-scram.yaml b/.github/helm-values/preview-template.neon-proxy-scram.yaml new file mode 100644 index 0000000000..f4bd418e28 --- /dev/null +++ b/.github/helm-values/preview-template.neon-proxy-scram.yaml @@ -0,0 +1,67 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/management/api/v2" + domain: "*.cloud.${PREVIEW_NAME}.aws.neon.build" + sentryEnvironment: "staging" + wssPort: 8443 + metricCollectionEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/billing/api/v1/usage_events" + metricCollectionInterval: "1min" + +# -- Additional labels for neon-proxy pods +podLabels: + neon_service: proxy-scram + neon_env: test + neon_region: ${PREVIEW_NAME}.eu-central-1 + + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: cloud.${PREVIEW_NAME}.aws.neon.build + httpsPort: 443 + +#metrics: +# enabled: true +# serviceMonitor: +# enabled: true +# selector: +# release: kube-prometheus-stack + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-proxy.fullname\" . }}" + labels: + helm.sh/chart: neon-proxy-{{ .Chart.Version }} + app.kubernetes.io/name: neon-proxy + app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}" + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-proxy" + endpoints: + - port: http + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" diff --git a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml index 5a98217bae..6088d62fba 100644 --- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml index a9ee49d82f..7d26f2e02f 100644 --- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml new file mode 100644 index 0000000000..1c7e646810 --- /dev/null +++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml @@ -0,0 +1,69 @@ +# Helm chart values for neon-proxy-scram. +# This is a YAML-formatted file. + +deploymentStrategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 100% + maxUnavailable: 50% + +# Delay the kill signal by 5 minutes (5 * 60) +# The pod(s) will stay in Terminating, keeps the existing connections +# but doesn't receive new ones +containerLifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "sleep 300"] +terminationGracePeriodSeconds: 604800 + +image: + repository: neondatabase/neon + +settings: + authBackend: "console" + authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2" + domain: "*.us-east-1.aws.neon.tech" + # *.us-east-1.retooldb.com hasn't been delegated yet. + extraDomains: ["*.us-east-1.postgres.vercel-storage.com"] + sentryEnvironment: "production" + wssPort: 8443 + metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events" + metricCollectionInterval: "10min" + +podLabels: + neon_service: proxy-scram + neon_env: prod + neon_region: us-east-1 + +exposedService: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing + external-dns.alpha.kubernetes.io/hostname: us-east-1.aws.neon.tech + httpsPort: 443 + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-proxy.fullname\" . }}" + labels: + helm.sh/chart: neon-proxy-{{ .Chart.Version }} + app.kubernetes.io/name: neon-proxy + app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}" + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-proxy" + endpoints: + - port: http + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" diff --git a/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml new file mode 100644 index 0000000000..7c16911b5e --- /dev/null +++ b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml @@ -0,0 +1,52 @@ +# Helm chart values for neon-storage-broker +podLabels: + neon_env: production + neon_service: storage-broker + +# Use L4 LB +service: + # service.annotations -- Annotations to add to the service + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: external # use newer AWS Load Balancer Controller + service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip + service.beta.kubernetes.io/aws-load-balancer-scheme: internal # deploy LB to private subnet + # assign service to this name at external-dns + external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.theta.us-east-1.internal.aws.neon.tech + # service.type -- Service type + type: LoadBalancer + # service.port -- broker listen port + port: 50051 + +ingress: + enabled: false + +metrics: + enabled: false + +extraManifests: + - apiVersion: operator.victoriametrics.com/v1beta1 + kind: VMServiceScrape + metadata: + name: "{{ include \"neon-storage-broker.fullname\" . }}" + labels: + helm.sh/chart: neon-storage-broker-{{ .Chart.Version }} + app.kubernetes.io/name: neon-storage-broker + app.kubernetes.io/instance: neon-storage-broker + app.kubernetes.io/version: "{{ .Chart.AppVersion }}" + app.kubernetes.io/managed-by: Helm + namespace: "{{ .Release.Namespace }}" + spec: + selector: + matchLabels: + app.kubernetes.io/name: "neon-storage-broker" + endpoints: + - port: broker + path: /metrics + interval: 10s + scrapeTimeout: 10s + namespaceSelector: + matchNames: + - "{{ .Release.Namespace }}" + +settings: + sentryEnvironment: "production" diff --git a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml index 239a9911c7..ae239fd3c1 100644 --- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml index a186fb833f..7378e8abda 100644 --- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml +++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml index c987ae236a..d9d458f081 100644 --- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml +++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml @@ -7,13 +7,13 @@ deploymentStrategy: maxSurge: 100% maxUnavailable: 50% -# Delay the kill signal by 7 days (7 * 24 * 60 * 60) +# Delay the kill signal by 5 minutes (5 * 60) # The pod(s) will stay in Terminating, keeps the existing connections # but doesn't receive new ones containerLifecycle: preStop: exec: - command: ["/bin/sh", "-c", "sleep 604800"] + command: ["/bin/sh", "-c", "sleep 300"] terminationGracePeriodSeconds: 604800 diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 028fe8d8ad..a5a27e59a8 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -30,7 +30,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true jobs: @@ -42,7 +42,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: "neon-staging" runs-on: [ self-hosted, us-east-2, x64 ] @@ -92,7 +92,7 @@ jobs: api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -174,7 +174,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -226,7 +226,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -282,7 +282,7 @@ jobs: api_key: ${{ secrets.NEON_STAGING_API_KEY }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -305,7 +305,7 @@ jobs: # # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, pgbench-compare ] strategy: @@ -317,7 +317,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -356,7 +356,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -379,7 +379,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -401,7 +401,7 @@ jobs: # We might change it after https://github.com/neondatabase/neon/issues/2900. # # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB) - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, clickbench-compare ] strategy: @@ -413,7 +413,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -452,7 +452,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -475,7 +475,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate @@ -491,7 +491,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} user-examples-compare: - if: success() || failure() + if: ${{ !cancelled() }} needs: [ generate-matrices, tpch-compare ] strategy: @@ -503,7 +503,7 @@ jobs: DEFAULT_PG_VERSION: 14 TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }} + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} PLATFORM: ${{ matrix.platform }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -542,7 +542,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }} ;; *) - echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -565,7 +565,7 @@ jobs: BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} - name: Create Allure report - if: success() || failure() + if: ${{ !cancelled() }} uses: ./.github/actions/allure-report with: action: generate diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c096aef4a9..e5ba7aa3eb 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -13,7 +13,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true env: @@ -111,8 +111,21 @@ jobs: - name: Get postgres headers run: make postgres-headers -j$(nproc) - - name: Run cargo clippy - run: ./run_clippy.sh + # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations. + # This will catch compiler & clippy warnings in all feature combinations. + # TODO: use cargo hack for build and test as well, but, that's quite expensive. + # NB: keep clippy args in sync with ./run_clippy.sh + - run: | + CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" + if [ "$CLIPPY_COMMON_ARGS" = "" ]; then + echo "No clippy args found in .neon_clippy_args" + exit 1 + fi + echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV + - name: Run cargo clippy (debug) + run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS + - name: Run cargo clippy (release) + run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run - name: Check formatting @@ -368,7 +381,7 @@ jobs: build_type: ${{ matrix.build_type }} test_selection: performance run_in_parallel: false - save_perf_report: ${{ github.ref == 'refs/heads/main' }} + save_perf_report: ${{ github.ref_name == 'main' }} env: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" @@ -541,7 +554,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned options: --init - needs: [ push-docker-hub, tag ] + needs: [ promote-images, tag ] steps: - name: Set PR's status to pending and request a remote CI test run: | @@ -584,8 +597,7 @@ jobs: neon-image: runs-on: [ self-hosted, gen3, large ] needs: [ tag ] - # https://github.com/GoogleContainerTools/kaniko/issues/2005 - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug defaults: run: shell: sh -eu {0} @@ -597,11 +609,32 @@ jobs: submodules: true fetch-depth: 0 - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build neon - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} + --destination neondatabase/neon:${{needs.tag.outputs.build-tag}} # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder @@ -652,7 +685,7 @@ jobs: compute-tools-image: runs-on: [ self-hosted, gen3, large ] needs: [ tag ] - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug defaults: run: shell: sh -eu {0} @@ -661,18 +694,41 @@ jobs: - name: Checkout uses: actions/checkout@v1 # v3 won't work with kaniko - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build compute tools - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --dockerfile Dockerfile.compute-tools + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} + --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} + # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder run: rm -rf ~/.ecr compute-node-image: runs-on: [ self-hosted, gen3, large ] - container: gcr.io/kaniko-project/executor:v1.7.0-debug + container: gcr.io/kaniko-project/executor:v1.9.2-debug needs: [ tag ] strategy: fail-fast: false @@ -689,12 +745,36 @@ jobs: submodules: true fetch-depth: 0 - - name: Configure ECR login - run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json + - name: Configure ECR and Docker Hub login + run: | + DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64) + echo "::add-mask::${DOCKERHUB_AUTH}" + + cat <<-EOF > /kaniko/.docker/config.json + { + "auths": { + "https://index.docker.io/v1/": { + "auth": "${DOCKERHUB_AUTH}" + } + }, + "credHelpers": { + "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login" + } + } + EOF - name: Kaniko build compute node with extensions - run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --build-arg PG_VERSION=${{ matrix.version }} --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + run: + /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true + --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache + --context . + --build-arg GIT_VERSION=${{ github.sha }} + --build-arg PG_VERSION=${{ matrix.version }} + --dockerfile Dockerfile.compute-node + --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} + # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied - name: Cleanup ECR folder run: rm -rf ~/.ecr @@ -786,13 +866,11 @@ jobs: runs-on: [ self-hosted, gen3, small ] needs: [ tag, test-images, vm-compute-node-image ] container: golang:1.19-bullseye - if: github.event_name != 'workflow_dispatch' + # Don't add if-condition here. + # The job should always be run because we have dependant other jobs that shouldn't be skipped steps: - name: Install Crane & ECR helper - if: | - (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' run: | go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 @@ -802,10 +880,15 @@ jobs: mkdir /github/home/.docker/ echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json + - name: Copy vm-compute-node images to Docker Hub + run: | + crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14 + crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15 + - name: Add latest tag to images if: | (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + github.event_name != 'workflow_dispatch' run: | crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} latest crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest @@ -814,50 +897,10 @@ jobs: crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest - - name: Cleanup ECR folder - run: rm -rf ~/.ecr - - push-docker-hub: - runs-on: [ self-hosted, dev, x64 ] - needs: [ promote-images, tag ] - container: golang:1.19-bullseye - - steps: - - name: Install Crane & ECR helper - run: | - go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0 - go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0 - - - name: Configure ECR login - run: | - mkdir /github/home/.docker/ - echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json - - - name: Pull neon image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon - - - name: Pull compute tools image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools - - - name: Pull compute node v14 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14 - - - name: Pull vm compute node v14 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14 - - - name: Pull compute node v15 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15 - - - name: Pull vm compute node v15 image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15 - - - name: Pull rust image from ECR - run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust - - name: Push images to production ECR if: | (github.ref_name == 'main' || github.ref_name == 'release') && - github.event_name != 'workflow_dispatch' + github.event_name != 'workflow_dispatch' run: | crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest @@ -872,28 +915,12 @@ jobs: echo "" > /github/home/.docker/config.json crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io - - name: Push neon image to Docker Hub - run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}} + - name: Push vm-compute-node to Docker Hub + run: | + crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} + crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} - - name: Push compute tools image to Docker Hub - run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} - - - name: Push compute node v14 image to Docker Hub - run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} - - - name: Push vm compute node v14 image to Docker Hub - run: crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} - - - name: Push compute node v15 image to Docker Hub - run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} - - - name: Push vm compute node v15 image to Docker Hub - run: crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} - - - name: Push rust image to Docker Hub - run: crane push rust neondatabase/rust:pinned - - - name: Add latest tag to images in Docker Hub + - name: Push latest tags to Docker Hub if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' @@ -913,7 +940,7 @@ jobs: container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version. # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: | contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') && github.event_name != 'workflow_dispatch' @@ -947,7 +974,7 @@ jobs: deploy: runs-on: [ self-hosted, gen3, small ] container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch' steps: - name: Fix git ownership @@ -984,7 +1011,7 @@ jobs: container: image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned options: --init - needs: [ push-docker-hub, tag, regress-tests ] + needs: [ promote-images, tag, regress-tests ] if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch' steps: - name: Promote compatibility snapshot for the release @@ -1007,7 +1034,7 @@ jobs: S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true) if [ -z "${S3_KEY}" ]; then - echo 2>&1 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist" + echo >&2 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist" exit 1 fi diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index b080a29f7c..5d1c6e0e16 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -48,7 +48,8 @@ jobs: shell: bash strategy: matrix: - target_region: [ eu-west-1, us-east-2 ] + # TODO(sergey): Fix storage deploy in eu-central-1 + target_region: [ eu-west-1, us-east-2] environment: name: dev-${{ matrix.target_region }} steps: @@ -133,6 +134,53 @@ jobs: - name: Cleanup helm folder run: rm -rf ~/.cache + + deploy-preview-proxy-new: + runs-on: [ self-hosted, gen3, small ] + container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned + if: inputs.deployProxy + defaults: + run: + shell: bash + strategy: + matrix: + include: + - target_region: eu-central-1 + target_cluster: dev-eu-central-1-alpha + environment: + name: dev-${{ matrix.target_region }} + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + submodules: true + fetch-depth: 0 + ref: ${{ inputs.branch }} + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1-node16 + with: + role-to-assume: arn:aws:iam::369495373322:role/github-runner + aws-region: eu-central-1 + role-skip-session-tagging: true + role-duration-seconds: 1800 + + - name: Configure environment + run: | + helm repo add neondatabase https://neondatabase.github.io/helm-charts + aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }} + + - name: Re-deploy preview proxies + run: | + DOCKER_TAG=${{ inputs.dockerTag }} + for PREVIEW_NAME in helium argon krypton xenon radon oganesson hydrogen nitrogen oxygen fluorine chlorine; do + export PREVIEW_NAME + envsubst <.github/helm-values/preview-template.neon-proxy-scram.yaml >preview-${PREVIEW_NAME}.neon-proxy-scram.yaml + helm upgrade neon-proxy-scram-${PREVIEW_NAME} neondatabase/neon-proxy --namespace neon-proxy-${PREVIEW_NAME} --create-namespace --install --atomic -f preview-${PREVIEW_NAME}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s + done + + - name: Cleanup helm folder + run: rm -rf ~/.cache deploy-storage-broker-new: runs-on: [ self-hosted, gen3, small ] @@ -148,6 +196,8 @@ jobs: target_cluster: dev-us-east-2-beta - target_region: eu-west-1 target_cluster: dev-eu-west-1-zeta + - target_region: eu-central-1 + target_cluster: dev-eu-central-1-alpha environment: name: dev-${{ matrix.target_region }} steps: diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 6096ac8ab9..9fa31b3225 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -49,7 +49,7 @@ jobs: shell: bash strategy: matrix: - target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ] + target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1, us-east-1 ] environment: name: prod-${{ matrix.target_region }} steps: @@ -97,6 +97,10 @@ jobs: target_cluster: prod-ap-southeast-1-epsilon deploy_link_proxy: false deploy_legacy_scram_proxy: false + - target_region: us-east-1 + target_cluster: prod-us-east-1-theta + deploy_link_proxy: false + deploy_legacy_scram_proxy: false environment: name: prod-${{ matrix.target_region }} steps: @@ -147,6 +151,8 @@ jobs: target_cluster: prod-eu-central-1-gamma - target_region: ap-southeast-1 target_cluster: prod-ap-southeast-1-epsilon + - target_region: us-east-1 + target_cluster: prod-us-east-1-theta environment: name: prod-${{ matrix.target_region }} steps: diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index ef4c293e31..1196881541 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -12,7 +12,7 @@ defaults: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true env: diff --git a/.github/workflows/pg_clients.yml b/.github/workflows/pg_clients.yml index 9f57519589..224b7b4a6d 100644 --- a/.github/workflows/pg_clients.yml +++ b/.github/workflows/pg_clients.yml @@ -14,7 +14,7 @@ on: concurrency: # Allow only one workflow per any non-`main` branch. - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }} cancel-in-progress: true jobs: diff --git a/.neon_clippy_args b/.neon_clippy_args new file mode 100644 index 0000000000..25e09c61a6 --- /dev/null +++ b/.neon_clippy_args @@ -0,0 +1,4 @@ +# * `-A unknown_lints` – do not warn about unknown lint suppressions +# that people with newer toolchains might use +# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) +export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings" diff --git a/Cargo.lock b/Cargo.lock index 5b99e93e76..5f3a83ce2d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,28 +64,77 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] -name = "anyhow" -version = "1.0.68" +name = "anstream" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" +checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] + +[[package]] +name = "anyhow" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" dependencies = [ "backtrace", ] [[package]] name = "archery" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02" +checksum = "b6cd774058b1b415c4855d8b86436c04bf050c003156fe24bc326fb3fe75c343" dependencies = [ "static_assertions", ] [[package]] name = "asn1-rs" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf6690c370453db30743b373a60ba498fc0d6d83b11f4abfd87a84a075db5dd4" +checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0" dependencies = [ "asn1-rs-derive", "asn1-rs-impl", @@ -105,7 +154,7 @@ checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "synstructure", ] @@ -117,46 +166,47 @@ checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "async-stream" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" +checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51" dependencies = [ "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" +checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "async-trait" -version = "0.1.64" +version = "0.1.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "atomic-polyfill" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d299f547288d6db8d5c3a2916f7b2f66134b15b8c1ac1c4357dd3b8752af7bb2" +checksum = "c314e70d181aa6053b26e3f7fbf86d1dfff84f816a6175b967666b3506ef7289" dependencies = [ "critical-section", ] @@ -187,13 +237,13 @@ dependencies = [ "aws-http", "aws-sdk-sso", "aws-sdk-sts", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "hex", "http", @@ -206,15 +256,29 @@ dependencies = [ "zeroize", ] +[[package]] +name = "aws-credential-types" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4232d3729eefc287adc0d5a8adc97b7d94eefffe6bbe94312cc86c7ab6b06ce" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-types 0.55.1", + "fastrand", + "tokio", + "tracing", + "zeroize", +] + [[package]] name = "aws-endpoint" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ca8f374874f6459aaa88dc861d7f5d834ca1ff97668eae190e97266b5f6c3fb" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "http", "regex", "tracing", @@ -226,9 +290,9 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78d41e19e779b73463f5f0c21b3aacc995f4ba783ab13a7ae9f5dfb159a551b4" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "http-body", @@ -248,15 +312,15 @@ dependencies = [ "aws-http", "aws-sig-auth", "aws-sigv4", - "aws-smithy-async", + "aws-smithy-async 0.51.0", "aws-smithy-checksums", - "aws-smithy-client", + "aws-smithy-client 0.51.0", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "bytes-utils", "http", @@ -275,13 +339,13 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-json", - "aws-smithy-types", - "aws-types", + "aws-smithy-types 0.51.0", + "aws-types 0.51.0", "bytes", "http", "tokio-stream", @@ -297,14 +361,14 @@ dependencies = [ "aws-endpoint", "aws-http", "aws-sig-auth", - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-http-tower", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", "aws-smithy-query", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "aws-smithy-xml", - "aws-types", + "aws-types 0.51.0", "bytes", "http", "tower", @@ -318,20 +382,20 @@ checksum = "12cbe7b2be9e185c1fbce27fc9c41c66b195b32d89aa099f98768d9544221308" dependencies = [ "aws-sigv4", "aws-smithy-eventstream", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.51.0", "http", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.51.0" +version = "0.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ff4cff8c4a101962d593ba94e72cd83891aecd423f0c6e3146bff6fb92c9e3" +checksum = "5c0b2658d2cb66dbf02f0e8dee80810ef1e0ca3530ede463e0ef994c301087d1" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-http", + "aws-smithy-http 0.51.0", "bytes", "form_urlencoded", "hex", @@ -356,14 +420,26 @@ dependencies = [ "tokio-stream", ] +[[package]] +name = "aws-smithy-async" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88573bcfbe1dcfd54d4912846df028b42d6255cbf9ce07be216b1bbfd11fc4b9" +dependencies = [ + "futures-util", + "pin-project-lite", + "tokio", + "tokio-stream", +] + [[package]] name = "aws-smithy-checksums" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc227e36e346f45298288359f37123e1a92628d1cec6b11b5eb335553278bd9e" dependencies = [ - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "crc32c", "crc32fast", @@ -383,10 +459,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff28d553714f8f54cd921227934fc13a536a1c03f106e56b362fd57e16d450ad" dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-http-tower 0.51.0", + "aws-smithy-types 0.51.0", "bytes", "fastrand", "http", @@ -400,13 +476,33 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-client" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2f52352bae50d3337d5d6151b695d31a8c10ebea113eca5bead531f8301b067" +dependencies = [ + "aws-smithy-async 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-http-tower 0.55.1", + "aws-smithy-types 0.55.1", + "bytes", + "fastrand", + "http", + "http-body", + "pin-project-lite", + "tokio", + "tower", + "tracing", +] + [[package]] name = "aws-smithy-eventstream" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7ea0df7161ce65b5c8ca6eb709a1a907376fa18226976e41c748ce02ccccf24" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "crc32fast", ] @@ -418,7 +514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf58ed4fefa61dbf038e5421a521cbc2c448ef69deff0ab1d915d8a10eda5664" dependencies = [ "aws-smithy-eventstream", - "aws-smithy-types", + "aws-smithy-types 0.51.0", "bytes", "bytes-utils", "futures-core", @@ -434,13 +530,49 @@ dependencies = [ "tracing", ] +[[package]] +name = "aws-smithy-http" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03bcc02d7ed9649d855c8ce4a735e9848d7b8f7568aad0504c158e3baa955df8" +dependencies = [ + "aws-smithy-types 0.55.1", + "bytes", + "bytes-utils", + "futures-core", + "http", + "http-body", + "hyper", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + [[package]] name = "aws-smithy-http-tower" version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20c96d7bd35e7cf96aca1134b2f81b1b59ffe493f7c6539c051791cbbf7a42d3" dependencies = [ - "aws-smithy-http", + "aws-smithy-http 0.51.0", + "bytes", + "http", + "http-body", + "pin-project-lite", + "tower", + "tracing", +] + +[[package]] +name = "aws-smithy-http-tower" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da88b3a860f65505996c29192d800f1aeb9480440f56d63aad33a3c12045017a" +dependencies = [ + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", "bytes", "http", "http-body", @@ -455,7 +587,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8324ba98c8a94187723cc16c37aefa09504646ee65c3d2c3af495bab5ea701b" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", ] [[package]] @@ -464,7 +596,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83834ed2ff69ea6f6657baf205267dc2c0abe940703503a3e5d60ce23be3d306" dependencies = [ - "aws-smithy-types", + "aws-smithy-types 0.51.0", "urlencoding", ] @@ -480,6 +612,19 @@ dependencies = [ "time", ] +[[package]] +name = "aws-smithy-types" +version = "0.55.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd0afc731fd1417d791f9145a1e0c30e23ae0beaab9b4814017708ead2fc20f1" +dependencies = [ + "base64-simd", + "itoa", + "num-integer", + "ryu", + "time", +] + [[package]] name = "aws-smithy-xml" version = "0.51.0" @@ -495,10 +640,10 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05701d32da168b44f7ee63147781aed8723e792cc131cb9b18363b5393f17f70" dependencies = [ - "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", - "aws-smithy-types", + "aws-smithy-async 0.51.0", + "aws-smithy-client 0.51.0", + "aws-smithy-http 0.51.0", + "aws-smithy-types 0.51.0", "http", "rustc_version", "tracing", @@ -506,10 +651,26 @@ dependencies = [ ] [[package]] -name = "axum" -version = "0.6.4" +name = "aws-types" +version = "0.55.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" +checksum = "b9b082e329d9a304d39e193ad5c7ab363a0d6507aca6965e0673a746686fb0cc" +dependencies = [ + "aws-credential-types", + "aws-smithy-async 0.55.1", + "aws-smithy-client 0.55.1", + "aws-smithy-http 0.55.1", + "aws-smithy-types 0.55.1", + "http", + "rustc_version", + "tracing", +] + +[[package]] +name = "axum" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b32c5ea3aabaf4deb5f5ced2d688ec0844c881c9e6c696a8b769a05fc691e62" dependencies = [ "async-trait", "axum-core", @@ -529,16 +690,15 @@ dependencies = [ "serde", "sync_wrapper", "tower", - "tower-http", "tower-layer", "tower-service", ] [[package]] name = "axum-core" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cae3e661676ffbacb30f1a824089a8c9150e71017f7e1e38f2aa32009188d34" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" dependencies = [ "async-trait", "bytes", @@ -584,6 +744,16 @@ version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" +dependencies = [ + "outref", + "vsimd", +] + [[package]] name = "bincode" version = "1.3.3" @@ -595,9 +765,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.61.0" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a022e58a142a46fea340d68012b9201c094e93ec3d033a944a24f8fd4a4f09a" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", @@ -606,12 +776,13 @@ dependencies = [ "lazycell", "log", "peeking_take_while", + "prettyplease 0.2.4", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn", + "syn 2.0.15", "which", ] @@ -623,18 +794,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ "generic-array", ] [[package]] name = "bstr" -version = "1.2.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7f0778972c64420fdedc63f09919c8a88bda7b25135357fd25a5d9f3257e832" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" dependencies = [ "memchr", "once_cell", @@ -702,9 +873,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.23" +version = "0.4.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", "num-integer", @@ -742,9 +913,9 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.4.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" dependencies = [ "glob", "libc", @@ -765,30 +936,38 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.4" +version = "4.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +checksum = "9b802d85aaf3a1cdb02b224ba472ebdea62014fccfcb269b95a4d76443b5ee5a" dependencies = [ - "bitflags", + "clap_builder", "clap_derive", - "clap_lex 0.3.1", - "is-terminal", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14a1a858f532119338887a4b8e1af9c60de8249cd7bafd68036a489e261e37b6" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex 0.4.1", "strsim", - "termcolor", ] [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -802,12 +981,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" -dependencies = [ - "os_str_bytes", -] +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "close_fds" @@ -829,6 +1005,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "comfy-table" version = "6.1.4" @@ -859,7 +1041,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.1.4", + "clap 4.2.2", "compute_api", "futures", "hyper", @@ -879,6 +1061,7 @@ dependencies = [ "tracing-subscriber", "tracing-utils", "url", + "utils", "workspace_hack", ] @@ -920,7 +1103,7 @@ name = "control_plane" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "comfy-table", "git-version", "nix", @@ -956,15 +1139,15 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" dependencies = [ "libc", ] @@ -1031,9 +1214,9 @@ checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1041,9 +1224,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1052,22 +1235,22 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset 0.7.1", + "memoffset 0.8.0", "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", ] @@ -1109,9 +1292,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9" +checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" dependencies = [ "cc", "cxxbridge-flags", @@ -1121,9 +1304,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d" +checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" dependencies = [ "cc", "codespan-reporting", @@ -1131,31 +1314,31 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 2.0.15", ] [[package]] name = "cxxbridge-flags" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a" +checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" [[package]] name = "cxxbridge-macro" -version = "1.0.89" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2" +checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "darling" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" dependencies = [ "darling_core", "darling_macro", @@ -1163,27 +1346,27 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn", + "syn 1.0.109", ] [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1217,9 +1400,9 @@ dependencies = [ [[package]] name = "der-parser" -version = "8.1.0" +version = "8.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d4bc9b0db0a0df9ae64634ac5bdefb7afcb534e182275ca0beadbe486701c1" +checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e" dependencies = [ "asn1-rs", "displaydoc", @@ -1248,7 +1431,7 @@ checksum = "3bf95dc3f046b9da4f2d51833c0d3547d8564ef6910f5c1ed130306a75b92886" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1268,9 +1451,9 @@ dependencies = [ [[package]] name = "enum-map" -version = "2.4.2" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50c25992259941eb7e57b936157961b217a4fc8597829ddef0596d6c3cd86e1a" +checksum = "988f0d17a0fa38291e5f41f71ea8d46a5d5497b9054d5a759fae2cbb819f2356" dependencies = [ "enum-map-derive", ] @@ -1283,7 +1466,7 @@ checksum = "2a4da76b3b6116d758c7ba93f7ec6a35d2e2cf24feda76c6e38a375f4d5c59f2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1304,7 +1487,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1322,13 +1505,13 @@ dependencies = [ [[package]] name = "errno" -version = "0.2.8" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "winapi", + "windows-sys 0.48.0", ] [[package]] @@ -1360,23 +1543,23 @@ checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "fastrand" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", - "redox_syscall", - "windows-sys 0.42.0", + "redox_syscall 0.2.16", + "windows-sys 0.48.0", ] [[package]] @@ -1421,9 +1604,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" dependencies = [ "futures-channel", "futures-core", @@ -1436,9 +1619,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", "futures-sink", @@ -1446,15 +1629,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" [[package]] name = "futures-executor" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" dependencies = [ "futures-core", "futures-task", @@ -1463,32 +1646,32 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" [[package]] name = "futures-macro" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "futures-sink" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" [[package]] name = "futures-task" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" [[package]] name = "futures-timer" @@ -1498,9 +1681,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.26" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ "futures-channel", "futures-core", @@ -1516,9 +1699,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.6" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1526,20 +1709,22 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] name = "gimli" -version = "0.27.1" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" [[package]] name = "git-version" @@ -1560,7 +1745,7 @@ dependencies = [ "proc-macro-hack", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1571,9 +1756,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.15" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "17f8a914c2987b688368b5138aa05321db91f4090cf26118185672ad588bce21" dependencies = [ "bytes", "fnv", @@ -1638,7 +1823,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.4", + "spin 0.9.8", "stable_deref_trait", ] @@ -1666,6 +1851,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -1677,9 +1868,9 @@ dependencies = [ [[package]] name = "hex-literal" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "hmac" @@ -1703,9 +1894,9 @@ dependencies = [ [[package]] name = "http" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ "bytes", "fnv", @@ -1723,12 +1914,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "http-range-header" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" - [[package]] name = "httparse" version = "1.8.0" @@ -1759,9 +1944,9 @@ dependencies = [ [[package]] name = "hyper" -version = "0.14.23" +version = "0.14.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "034711faac9d2166cb1baf1a2fb0b60b1f277f8492fd72176c17f3515e1abd3c" +checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" dependencies = [ "bytes", "futures-channel", @@ -1774,7 +1959,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -1790,10 +1975,10 @@ dependencies = [ "http", "hyper", "log", - "rustls", + "rustls 0.20.8", "rustls-native-certs", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -1823,16 +2008,16 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.53" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "wasm-bindgen", - "winapi", + "windows", ] [[package]] @@ -1863,9 +2048,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown 0.12.3", @@ -1903,30 +2088,31 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.4" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ + "hermit-abi 0.3.1", "libc", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] name = "ipnet" -version = "2.7.1" +version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" [[package]] name = "is-terminal" -version = "0.4.2" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.1", "io-lifetimes", - "rustix", - "windows-sys 0.42.0", + "rustix 0.37.11", + "windows-sys 0.48.0", ] [[package]] @@ -1940,9 +2126,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" +checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "js-sys" @@ -1955,11 +2141,11 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.2.0" +version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f4f04699947111ec1733e71778d763555737579e44b85844cae8e1940a1828" +checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ - "base64 0.13.1", + "base64 0.21.0", "pem", "ring", "serde", @@ -2001,9 +2187,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.139" +version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" +checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "libloading" @@ -2030,6 +2216,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" +[[package]] +name = "linux-raw-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" + [[package]] name = "lock_api" version = "0.4.9" @@ -2122,9 +2314,9 @@ dependencies = [ [[package]] name = "mime" -version = "0.3.16" +version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mime_guess" @@ -2144,23 +2336,23 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.4" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2e212582ede878b109755efd0773a4f0f4ec851584cf0aefbeb4d9ecc114822" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" dependencies = [ "adler", ] [[package]] name = "mio" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -2193,15 +2385,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom8" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae01545c9c7fc4486ab7debaf2aad7003ac19431791868fb2e8066df97fad2f8" -dependencies = [ - "memchr", -] - [[package]] name = "notify" version = "5.1.0" @@ -2290,9 +2473,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "oorandom" @@ -2357,8 +2540,8 @@ dependencies = [ "futures-util", "opentelemetry", "prost", - "tonic", - "tonic-build", + "tonic 0.8.3", + "tonic-build 0.8.4", ] [[package]] @@ -2410,9 +2593,9 @@ dependencies = [ [[package]] name = "os_info" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c424bc68d15e0778838ac013b5b3449544d8133633d8016319e7e05a820b8c0" +checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e" dependencies = [ "log", "serde", @@ -2421,9 +2604,15 @@ dependencies = [ [[package]] name = "os_str_bytes" -version = "6.4.1" +version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" + +[[package]] +name = "outref" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" [[package]] name = "overload" @@ -2441,7 +2630,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "close_fds", "const_format", "consumption_metrics", @@ -2492,6 +2681,7 @@ dependencies = [ "tenant_size_model", "thiserror", "tokio", + "tokio-io-timeout", "tokio-postgres", "tokio-tar", "tokio-util", @@ -2516,6 +2706,8 @@ dependencies = [ "serde", "serde_json", "serde_with", + "strum", + "strum_macros", "utils", "workspace_hack", ] @@ -2538,7 +2730,7 @@ checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" dependencies = [ "cfg-if", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "smallvec", "windows-sys 0.45.0", ] @@ -2566,9 +2758,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "petgraph" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5014253a1331579ce62aa67443b4a658c5e7dd03d4bc6d302b94474888143" +checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4" dependencies = [ "fixedbitset", "indexmap", @@ -2609,7 +2801,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2703,14 +2895,14 @@ dependencies = [ "futures", "once_cell", "pq_proto", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "thiserror", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", "tracing", "workspace_hack", ] @@ -2767,7 +2959,6 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "rand", - "serde", "thiserror", "tokio", "tracing", @@ -2776,36 +2967,22 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.1.23" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" dependencies = [ "proc-macro2", - "syn", + "syn 1.0.109", ] [[package]] -name = "proc-macro-error" -version = "1.0.4" +name = "prettyplease" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +checksum = "1ceca8aaf45b5c46ec7ed39fff75f57290368c1846d33d24a122ca81416ab058" dependencies = [ "proc-macro2", - "quote", - "version_check", + "syn 2.0.15", ] [[package]] @@ -2816,9 +2993,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -2833,7 +3010,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix", + "rustix 0.36.12", ] [[package]] @@ -2854,9 +3031,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ "bytes", "prost-derive", @@ -2864,9 +3041,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck", @@ -2875,35 +3052,34 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.25", "prost", "prost-types", "regex", - "syn", + "syn 1.0.109", "tempfile", "which", ] [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" dependencies = [ - "bytes", "prost", ] @@ -2918,7 +3094,7 @@ dependencies = [ "bstr", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "consumption_metrics", "futures", "git-version", @@ -2948,20 +3124,21 @@ dependencies = [ "reqwest-tracing", "routerify", "rstest", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "scopeguard", "serde", "serde_json", "sha2", - "socket2", + "socket2 0.5.2", "sync_wrapper", "thiserror", "tls-listener", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.23.4", + "tokio-util", "tracing", "tracing-opentelemetry", "tracing-subscriber", @@ -2969,16 +3146,16 @@ dependencies = [ "url", "utils", "uuid", - "webpki-roots", + "webpki-roots 0.23.0", "workspace_hack", "x509-parser", ] [[package]] name = "quote" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] @@ -3015,9 +3192,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" dependencies = [ "either", "rayon-core", @@ -3025,9 +3202,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.10.2" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" dependencies = [ "crossbeam-channel", "crossbeam-deque", @@ -3057,10 +3234,19 @@ dependencies = [ ] [[package]] -name = "regex" -version = "1.7.1" +name = "redox_syscall" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", @@ -3078,9 +3264,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.28" +version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "remote_storage" @@ -3090,8 +3276,8 @@ dependencies = [ "async-trait", "aws-config", "aws-sdk-s3", - "aws-smithy-http", - "aws-types", + "aws-smithy-http 0.51.0", + "aws-types 0.55.1", "hyper", "metrics", "once_cell", @@ -3110,9 +3296,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.14" +version = "0.11.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21eed90ec8570952d53b772ecf8f206aa1ec9a3d76b2521c56c42973f2d91ee9" +checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" dependencies = [ "base64 0.21.0", "bytes", @@ -3132,27 +3318,27 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", + "webpki-roots 0.22.6", "winreg", ] [[package]] name = "reqwest-middleware" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1c03e9011a8c59716ad13115550469e081e2e9892656b0ba6a47c907921894" +checksum = "99c50db2c7ccd815f976473dd7d0bde296f8c3b77c383acf4fc021cdcf10852b" dependencies = [ "anyhow", "async-trait", @@ -3165,11 +3351,12 @@ dependencies = [ [[package]] name = "reqwest-tracing" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b739d87a6b2cf4743968ad2b4cef648fbe0204c19999509824425babb2097bce" +checksum = "8a71d77945a1c5ae9604f0504901e77a1e2e71f2932b1cb8103078179ca62ff8" dependencies = [ "async-trait", + "getrandom", "opentelemetry", "reqwest", "reqwest-middleware", @@ -3208,18 +3395,18 @@ dependencies = [ [[package]] name = "rpds" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000" +checksum = "9bd6ce569b15c331b1e5fd8cf6adb0bf240678b5f0cdc4d0f41e11683f6feba9" dependencies = [ "archery", ] [[package]] name = "rstest" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b07f2d176c472198ec1e6551dc7da28f1c089652f66a7b722676c2238ebc0edf" +checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" dependencies = [ "futures", "futures-timer", @@ -3229,23 +3416,23 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.16.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" +checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" dependencies = [ "cfg-if", "proc-macro2", "quote", "rustc_version", - "syn", + "syn 1.0.109", "unicode-ident", ] [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d4a36c42d1873f9a77c53bde094f9664d9891bc604a45b4798fd2c389ed12e5b" [[package]] name = "rustc-hash" @@ -3273,16 +3460,30 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.7" +version = "0.36.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +checksum = "e0af200a3324fa5bcd922e84e9b55a298ea9f431a489f01961acdebc6e908f25" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", - "linux-raw-sys", - "windows-sys 0.42.0", + "linux-raw-sys 0.1.4", + "windows-sys 0.45.0", +] + +[[package]] +name = "rustix" +version = "0.37.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys 0.3.1", + "windows-sys 0.48.0", ] [[package]] @@ -3297,6 +3498,18 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07180898a28ed6a7f7ba2311594308f595e3dd2e3c3812fa0a80a47b45f17e5d" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-native-certs" version = "0.6.2" @@ -3319,16 +3532,26 @@ dependencies = [ ] [[package]] -name = "rustversion" -version = "1.0.11" +name = "rustls-webpki" +version = "0.100.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustversion" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "ryu" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" +checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "safekeeper" @@ -3340,7 +3563,7 @@ dependencies = [ "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", "const_format", "crc32c", "fs2", @@ -3413,9 +3636,9 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "scratch" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" +checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" [[package]] name = "sct" @@ -3452,33 +3675,33 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a" +checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "sentry" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6097dc270a9c4555c5d6222ed243eaa97ff38e29299ed7c5cb36099033c604e" +checksum = "b5ce6d3512e2617c209ec1e86b0ca2fea06454cd34653c91092bf0f3ec41f8e3" dependencies = [ "httpdate", "reqwest", - "rustls", + "rustls 0.20.8", "sentry-backtrace", "sentry-contexts", "sentry-core", "sentry-panic", "tokio", "ureq", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] name = "sentry-backtrace" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d92d1e4d591534ae4f872d6142f3b500f4ffc179a6aed8a3e86c7cc96d10a6a" +checksum = "0e7fe408d4d1f8de188a9309916e02e129cbe51ca19e55badea5a64899399b1a" dependencies = [ "backtrace", "once_cell", @@ -3488,9 +3711,9 @@ dependencies = [ [[package]] name = "sentry-contexts" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3afa877b1898ff67dd9878cf4bec4e53cef7d3be9f14b1fc9e4fcdf36f8e4259" +checksum = "5695096a059a89973ec541062d331ff4c9aeef9c2951416c894f0fff76340e7d" dependencies = [ "hostname", "libc", @@ -3502,9 +3725,9 @@ dependencies = [ [[package]] name = "sentry-core" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc43eb7e4e3a444151a0fe8a0e9ce60eabd905dae33d66e257fa26f1b509c1bd" +checksum = "5b22828bfd118a7b660cf7a155002a494755c0424cebb7061e4743ecde9c7dbc" dependencies = [ "once_cell", "rand", @@ -3515,9 +3738,9 @@ dependencies = [ [[package]] name = "sentry-panic" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccab4fab11e3e63c45f4524bee2e75cde39cdf164cb0b0cbe6ccd1948ceddf66" +checksum = "1f4ced2a7a8c14899d58eec402d946f69d5ed26a3fc363a7e8b1e5cb88473a01" dependencies = [ "sentry-backtrace", "sentry-core", @@ -3525,9 +3748,9 @@ dependencies = [ [[package]] name = "sentry-types" -version = "0.29.2" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63708ec450b6bdcb657af760c447416d69c38ce421f34e5e2e9ce8118410bc7" +checksum = "360ee3270f7a4a1eee6c667f7d38360b995431598a73b740dfe420da548d9cc9" dependencies = [ "debugid", "getrandom", @@ -3542,35 +3765,44 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.152" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -3585,9 +3817,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d904179146de381af4c93d3af6ca4984b3152db687dacb9c3c35e86f39809c" +checksum = "331bb8c3bf9b92457ab7abecf07078c13f7d270ba490103e84e8b014490cd0b0" dependencies = [ "base64 0.13.1", "chrono", @@ -3601,14 +3833,14 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "2.2.0" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1966009f3c05f095697c537312f5415d1e3ed31ce0a56942bac4c771c5c335e" +checksum = "859011bddcc11f289f07f467cc1fe01c7a941daa4d8f6c40d4d1c92eb6d9319c" dependencies = [ "darling", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3650,9 +3882,9 @@ checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "signal-hook" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a253b5e89e2698464fc26b545c9edceb338e18a89effeeecfea192c3025be29d" +checksum = "732768f1176d21d09e076c23a93123d40bba92d50c4058da34d45c8de8e682b9" dependencies = [ "libc", "signal-hook-registry", @@ -3671,9 +3903,9 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] @@ -3698,9 +3930,9 @@ checksum = "7bd3e3206899af3f8b12af284fafc038cc1dc2b41d1b89dd17297221c5d225de" [[package]] name = "slab" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" dependencies = [ "autocfg", ] @@ -3713,14 +3945,24 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" dependencies = [ "libc", "winapi", ] +[[package]] +name = "socket2" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d283f86695ae989d1e18440a943880967156325ba025f05049946bff47bcc2b" +dependencies = [ + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "spin" version = "0.5.2" @@ -3729,9 +3971,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.4" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" dependencies = [ "lock_api", ] @@ -3755,7 +3997,7 @@ dependencies = [ "anyhow", "async-stream", "bytes", - "clap 4.1.4", + "clap 4.2.2", "const_format", "futures", "futures-core", @@ -3769,8 +4011,8 @@ dependencies = [ "prost", "tokio", "tokio-stream", - "tonic", - "tonic-build", + "tonic 0.9.1", + "tonic-build 0.9.1", "tracing", "utils", "workspace_hack", @@ -3808,7 +4050,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn", + "syn 1.0.109", ] [[package]] @@ -3825,9 +4067,20 @@ checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2" [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -3848,7 +4101,7 @@ checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "unicode-xid", ] @@ -3865,24 +4118,24 @@ dependencies = [ [[package]] name = "task-local-extensions" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4167afbec18ae012de40f8cf1b9bf48420abb390678c34821caa07d924941cc4" +checksum = "ba323866e5d033818e3240feeb9f7db2c4296674e4d9e16b97b7bf8f490434e8" dependencies = [ - "tokio", + "pin-utils", ] [[package]] name = "tempfile" -version = "3.4.0" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" +checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" dependencies = [ "cfg-if", "fastrand", - "redox_syscall", - "rustix", - "windows-sys 0.42.0", + "redox_syscall 0.3.5", + "rustix 0.37.11", + "windows-sys 0.45.0", ] [[package]] @@ -3922,7 +4175,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -3933,38 +4186,39 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] [[package]] name = "time" -version = "0.3.17" +version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ "itoa", "serde", @@ -3980,9 +4234,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" [[package]] name = "time-macros" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" dependencies = [ "time-core", ] @@ -4008,9 +4262,9 @@ dependencies = [ [[package]] name = "tinyvec_macros" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tls-listener" @@ -4023,26 +4277,25 @@ dependencies = [ "pin-project-lite", "thiserror", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] name = "tokio" -version = "1.25.0" +version = "1.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" dependencies = [ "autocfg", "bytes", "libc", - "memchr", "mio", "num_cpus", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.4.9", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -4057,13 +4310,13 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "1.8.2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d266c00fde287f55d3f1c3e96c500c362a2b8c695076ec180f27918820bc6df8" +checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.15", ] [[package]] @@ -4084,7 +4337,7 @@ dependencies = [ "pin-project-lite", "postgres-protocol", "postgres-types", - "socket2", + "socket2 0.4.9", "tokio", "tokio-util", ] @@ -4097,10 +4350,10 @@ checksum = "606f2b73660439474394432239c82249c0d45eb5f23d91f401be1e33590444a7" dependencies = [ "futures", "ring", - "rustls", + "rustls 0.20.8", "tokio", "tokio-postgres", - "tokio-rustls", + "tokio-rustls 0.23.4", ] [[package]] @@ -4109,16 +4362,26 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] [[package]] -name = "tokio-stream" -version = "0.1.11" +name = "tokio-rustls" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +dependencies = [ + "rustls 0.21.0", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" dependencies = [ "futures-core", "pin-project-lite", @@ -4133,7 +4396,7 @@ dependencies = [ "filetime", "futures-core", "libc", - "redox_syscall", + "redox_syscall 0.2.16", "tokio", "tokio-stream", "xattr", @@ -4153,9 +4416,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.4" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" dependencies = [ "bytes", "futures-core", @@ -4167,33 +4430,36 @@ dependencies = [ [[package]] name = "toml" -version = "0.5.11" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" dependencies = [ "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", ] [[package]] name = "toml_datetime" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4553f467ac8e3d374bc9a177a26801e5d0f9b211aa1673fb137a403afd1c9cf5" +checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.17.1" +version = "0.19.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a34cc558345efd7e88b9eda9626df2138b80bb46a7606f695e751c892bc7dac6" +checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" dependencies = [ "indexmap", - "itertools", - "nom8", "serde", + "serde_spanned", "toml_datetime", + "winnow", ] [[package]] @@ -4218,10 +4484,7 @@ dependencies = [ "pin-project", "prost", "prost-derive", - "rustls-native-certs", - "rustls-pemfile", "tokio", - "tokio-rustls", "tokio-stream", "tokio-util", "tower", @@ -4231,17 +4494,62 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "tonic" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bd8e87955eb13c1986671838177d6792cdc52af9bffced0d2c8a9a7f741ab3" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.21.0", + "bytes", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "rustls-native-certs", + "rustls-pemfile", + "tokio", + "tokio-rustls 0.24.0", + "tokio-stream", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tonic-build" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" dependencies = [ - "prettyplease", + "prettyplease 0.1.25", "proc-macro2", "prost-build", "quote", - "syn", + "syn 1.0.109", +] + +[[package]] +name = "tonic-build" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f60a933bbea70c95d633c04c951197ddf084958abaa2ed502a3743bdd8d8dd7" +dependencies = [ + "prettyplease 0.1.25", + "proc-macro2", + "prost-build", + "quote", + "syn 1.0.109", ] [[package]] @@ -4264,25 +4572,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "tower-http" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" -dependencies = [ - "bitflags", - "bytes", - "futures-core", - "futures-util", - "http", - "http-body", - "http-range-header", - "pin-project-lite", - "tower", - "tower-layer", - "tower-service", -] - [[package]] name = "tower-layer" version = "0.3.2" @@ -4300,7 +4589,7 @@ name = "trace" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "pageserver_api", "utils", "workspace_hack", @@ -4327,7 +4616,7 @@ checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -4340,6 +4629,16 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + [[package]] name = "tracing-futures" version = "0.2.5" @@ -4473,15 +4772,15 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.10" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54675592c1dbefd78cbd98db9bacd89886e1ca50692a0692baefffdeb92dd58" +checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-ident" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-normalization" @@ -4519,10 +4818,10 @@ dependencies = [ "base64 0.13.1", "log", "once_cell", - "rustls", + "rustls 0.20.8", "url", "webpki", - "webpki-roots", + "webpki-roots 0.22.6", ] [[package]] @@ -4549,6 +4848,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "utils" version = "0.1.0" @@ -4559,6 +4864,7 @@ dependencies = [ "bincode", "byteorder", "bytes", + "chrono", "criterion", "futures", "heapless", @@ -4570,6 +4876,7 @@ dependencies = [ "nix", "once_cell", "pin-project-lite", + "pq_proto", "rand", "regex", "routerify", @@ -4584,6 +4891,7 @@ dependencies = [ "thiserror", "tokio", "tracing", + "tracing-error", "tracing-subscriber", "url", "uuid", @@ -4592,9 +4900,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" dependencies = [ "getrandom", "serde", @@ -4612,12 +4920,18 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + [[package]] name = "wal_craft" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.1.4", + "clap 4.2.2", "env_logger", "log", "once_cell", @@ -4629,12 +4943,11 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.3.2" +version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" dependencies = [ "same-file", - "winapi", "winapi-util", ] @@ -4675,7 +4988,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -4709,7 +5022,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4749,6 +5062,15 @@ dependencies = [ "webpki", ] +[[package]] +name = "webpki-roots" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa54963694b65584e170cf5dc46aeb4dcaa5584e652ff5f3952e56d66aff0125" +dependencies = [ + "rustls-webpki", +] + [[package]] name = "which" version = "4.4.0" @@ -4791,19 +5113,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.0", +] + [[package]] name = "windows-sys" version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", ] [[package]] @@ -4812,65 +5143,140 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.0", ] [[package]] name = "windows-targets" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm 0.48.0", + "windows_aarch64_msvc 0.48.0", + "windows_i686_gnu 0.48.0", + "windows_i686_msvc 0.48.0", + "windows_x86_64_gnu 0.48.0", + "windows_x86_64_gnullvm 0.48.0", + "windows_x86_64_msvc 0.48.0", ] [[package]] name = "windows_aarch64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" -version = "0.42.1" +version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "winnow" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +dependencies = [ + "memchr", +] [[package]] name = "winreg" @@ -4886,12 +5292,11 @@ name = "workspace_hack" version = "0.1.0" dependencies = [ "anyhow", - "byteorder", "bytes", "chrono", - "clap 4.1.4", + "clap 4.2.2", + "clap_builder", "crossbeam-utils", - "digest", "either", "fail", "futures", @@ -4901,7 +5306,6 @@ dependencies = [ "futures-sink", "futures-util", "hashbrown 0.12.3", - "indexmap", "itertools", "libc", "log", @@ -4916,16 +5320,18 @@ dependencies = [ "regex-syntax", "reqwest", "ring", - "rustls", + "rustls 0.20.8", "scopeguard", "serde", "serde_json", - "socket2", - "syn", + "socket2 0.4.9", + "syn 1.0.109", + "syn 2.0.15", "tokio", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", - "tonic", + "toml_datetime", + "toml_edit", "tower", "tracing", "tracing-core", @@ -4935,12 +5341,11 @@ dependencies = [ [[package]] name = "x509-parser" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0ecbeb7b67ce215e40e3cc7f2ff902f94a223acf44995934763467e7b1febc8" +checksum = "bab0c2f54ae1d92f4fcb99c0b7ccf0b1e3451cbd395e5f115ccbdbcb18d4f634" dependencies = [ "asn1-rs", - "base64 0.13.1", "data-encoding", "der-parser", "lazy_static", @@ -4968,15 +5373,15 @@ checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" [[package]] name = "yasna" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aed2e7a52e3744ab4d0c05c20aa065258e84c49fd4226f5191b2ed29712710b4" +checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd" dependencies = [ "time", ] [[package]] name = "zeroize" -version = "1.5.7" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" +checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" diff --git a/Cargo.toml b/Cargo.toml index 679605dc1d..f4872433cd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,10 +24,10 @@ atty = "0.2.14" aws-config = { version = "0.51.0", default-features = false, features=["rustls"] } aws-sdk-s3 = "0.21.0" aws-smithy-http = "0.51.0" -aws-types = "0.51.0" +aws-types = "0.55" base64 = "0.13.0" bincode = "1.3" -bindgen = "0.61" +bindgen = "0.65" bstr = "1.0" byteorder = "1.4" bytes = "1.0" @@ -50,7 +50,7 @@ git-version = "0.3" hashbrown = "0.13" hashlink = "0.8.1" hex = "0.4" -hex-literal = "0.3" +hex-literal = "0.4" hmac = "0.12.1" hostname = "0.3.1" humantime = "2.1" @@ -80,18 +80,18 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls" reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] } reqwest-middleware = "0.2.0" routerify = "3" -rpds = "0.12.0" +rpds = "0.13" rustls = "0.20" rustls-pemfile = "1" rustls-split = "0.3" scopeguard = "1.1" -sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } +sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] } serde = { version = "1.0", features = ["derive"] } serde_json = "1" serde_with = "2.0" sha2 = "0.10.2" signal-hook = "0.3" -socket2 = "0.4.4" +socket2 = "0.5" strum = "0.24" strum_macros = "0.24" svg_fmt = "0.4.1" @@ -106,17 +106,18 @@ tokio-postgres-rustls = "0.9.0" tokio-rustls = "0.23" tokio-stream = "0.1" tokio-util = { version = "0.7", features = ["io"] } -toml = "0.5" -toml_edit = { version = "0.17", features = ["easy"] } -tonic = {version = "0.8", features = ["tls", "tls-roots"]} +toml = "0.7" +toml_edit = "0.19" +tonic = {version = "0.9", features = ["tls", "tls-roots"]} tracing = "0.1" +tracing-error = "0.2.0" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3", features = ["env-filter"] } url = "2.2" uuid = { version = "1.2", features = ["v4", "serde"] } walkdir = "2.3.2" -webpki-roots = "0.22.5" -x509-parser = "0.14" +webpki-roots = "0.23" +x509-parser = "0.15" ## TODO replace this with tracing env_logger = "0.10" @@ -154,9 +155,9 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" } ## Build dependencies criterion = "0.4" rcgen = "0.10" -rstest = "0.16" +rstest = "0.17" tempfile = "3.4" -tonic-build = "0.8" +tonic-build = "0.9" # This is only needed for proxy's tests. # TODO: we should probably fork `tokio-postgres-rustls` instead. diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node index 3473487444..229e09aa98 100644 --- a/Dockerfile.compute-node +++ b/Dockerfile.compute-node @@ -12,7 +12,7 @@ FROM debian:bullseye-slim AS build-deps RUN apt update && \ apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \ zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \ - libicu-dev libxslt1-dev + libicu-dev libxslt1-dev liblz4-dev libzstd-dev ######################################################################################### # @@ -24,8 +24,13 @@ FROM build-deps AS pg-build ARG PG_VERSION COPY vendor/postgres-${PG_VERSION} postgres RUN cd postgres && \ - ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \ - --with-libxml --with-libxslt && \ + export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \ + --with-icu --with-libxml --with-libxslt --with-lz4" && \ + if [ "${PG_VERSION}" != "v14" ]; then \ + # zstd is available only from PG15 + export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \ + fi && \ + eval $CONFIGURE_CMD && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \ make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \ # Install headers @@ -60,6 +65,7 @@ RUN apt update && \ # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \ + echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \ mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \ cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \ DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \ @@ -68,6 +74,7 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar ENV PATH "/usr/local/pgsql/bin:$PATH" RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \ + echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \ mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \ ./autogen.sh && \ ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \ @@ -84,6 +91,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \ + echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \ mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \ mkdir build && \ cd build && \ @@ -104,6 +112,7 @@ RUN apt update && \ apt install -y ninja-build python3-dev libncurses5 binutils clang RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \ + echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \ mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \ @@ -125,11 +134,13 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ # packaged cmake is too old RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \ -q -O /tmp/cmake-install.sh \ + && echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \ && chmod u+x /tmp/cmake-install.sh \ && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \ && rm /tmp/cmake-install.sh RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \ + echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \ mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \ mkdir build && cd build && \ cmake .. -DCMAKE_BUILD_TYPE=Release && \ @@ -139,6 +150,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz rm -rf build RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \ + echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \ mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \ export PATH="/usr/local/pgsql/bin:$PATH" && \ make -j $(getconf _NPROCESSORS_ONLN) && \ @@ -156,6 +168,7 @@ FROM build-deps AS unit-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \ + echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \ mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -176,6 +189,7 @@ FROM build-deps AS vector-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \ + echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \ mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -192,6 +206,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021 RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \ + echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \ mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control @@ -206,6 +221,7 @@ FROM build-deps AS hypopg-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \ + echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \ mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -221,6 +237,7 @@ FROM build-deps AS pg-hashids-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \ + echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \ mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -236,6 +253,7 @@ FROM build-deps AS rum-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \ + echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \ mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -251,11 +269,28 @@ FROM build-deps AS pgtap-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \ + echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \ mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control +######################################################################################### +# +# Layer "ip4r-pg-build" +# compile ip4r extension +# +######################################################################################### +FROM build-deps AS ip4r-pg-build +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \ + echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \ + mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ + make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ + echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control + ######################################################################################### # # Layer "prefix-pg-build" @@ -266,6 +301,7 @@ FROM build-deps AS prefix-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \ + echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \ mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -281,6 +317,7 @@ FROM build-deps AS hll-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \ + echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \ mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \ @@ -296,6 +333,7 @@ FROM build-deps AS plpgsql-check-pg-build COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \ + echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \ mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \ make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \ @@ -315,6 +353,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH" RUN apt-get update && \ apt-get install -y cmake && \ wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \ + echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \ mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \ ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \ cd build && \ @@ -323,7 +362,39 @@ RUN apt-get update && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control ######################################################################################### -# +# +# Layer "pg-hint-plan-pg-build" +# compile pg_hint_plan extension +# +######################################################################################### +FROM build-deps AS pg-hint-plan-pg-build +COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/ + +ARG PG_VERSION +ENV PATH "/usr/local/pgsql/bin:$PATH" + +RUN case "${PG_VERSION}" in \ + "v14") \ + export PG_HINT_PLAN_VERSION=14_1_4_1 \ + export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \ + ;; \ + "v15") \ + export PG_HINT_PLAN_VERSION=15_1_5_0 \ + export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \ + ;; \ + *) \ + echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \ + ;; \ + esac && \ + wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \ + echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \ + mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \ + make -j $(getconf _NPROCESSORS_ONLN) && \ + make install -j $(getconf _NPROCESSORS_ONLN) && \ + echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control + +######################################################################################### +# # Layer "rust extensions" # This layer is used to build `pgx` deps # @@ -351,7 +422,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux USER root ######################################################################################### -# +# # Layer "pg-jsonschema-pg-build" # Compile "pg_jsonschema" extension # @@ -359,15 +430,17 @@ USER root FROM rust-extensions-build AS pg-jsonschema-pg-build -# there is no release tag yet, but we need it due to the superuser fix in the control file +# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023 +# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5 RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \ + echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \ mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \ sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ cargo pgx install --release && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control ######################################################################################### -# +# # Layer "pg-graphql-pg-build" # Compile "pg_graphql" extension # @@ -375,11 +448,13 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421e FROM rust-extensions-build AS pg-graphql-pg-build +# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch) # Currently pgx version bump to >= 0.7.2 causes "call to unsafe function" compliation errors in # pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the # same 1.1 version we've used before. -RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yrashk/pg_graphql && \ - cd pg_graphql && \ +RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \ + echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \ + mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \ sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \ sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \ cargo pgx install --release && \ @@ -396,8 +471,10 @@ RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yr FROM rust-extensions-build AS pg-tiktoken-pg-build -RUN git clone --depth=1 --single-branch https://github.com/kelvich/pg_tiktoken && \ - cd pg_tiktoken && \ +# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023 +RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \ + echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \ + mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \ cargo pgx install --release && \ echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control @@ -423,10 +500,12 @@ COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/ +COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/ COPY pgxn/ pgxn/ RUN make -j $(getconf _NPROCESSORS_ONLN) \ @@ -491,13 +570,17 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb # Install: # libreadline8 for psql # libicu67, locales for collations (including ICU and plpgsql_check) +# liblz4-1 for lz4 # libossp-uuid16 for extension ossp-uuid # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS # libxml2, libxslt1.1 for xml2 +# libzstd1 for zstd RUN apt update && \ apt install --no-install-recommends -y \ + gdb \ locales \ libicu67 \ + liblz4-1 \ libreadline8 \ libossp-uuid16 \ libgeos-c1v5 \ @@ -507,7 +590,8 @@ RUN apt update && \ libsfcgal1 \ libxml2 \ libxslt1.1 \ - gdb && \ + libzstd1 \ + procps && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 diff --git a/Dockerfile.vm-compute-node b/Dockerfile.vm-compute-node index 957166ecd1..aabb3c9953 100644 --- a/Dockerfile.vm-compute-node +++ b/Dockerfile.vm-compute-node @@ -54,7 +54,7 @@ RUN set -e \ RUN set -e \ && echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \ - && CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \ + && CONNSTR="dbname=postgres user=cloud_admin sslmode=disable" \ && ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \ && echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab diff --git a/README.md b/README.md index 55df67f6c7..810937aff7 100644 --- a/README.md +++ b/README.md @@ -147,15 +147,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one # start postgres compute node -> ./target/debug/neon_local pg start main -Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ... +> ./target/debug/neon_local endpoint start main +Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432 -Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres' # check list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS - main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS + main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16B5BA8 running ``` 2. Now, it is possible to connect to postgres and run some queries: @@ -184,14 +184,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601] # start postgres on that branch -> ./target/debug/neon_local pg start migration_check --branch-name migration_check -Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ... +> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check +Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ... Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433 -Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' +Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres' # check the new list of running postgres instances -> ./target/debug/neon_local pg list - NODE ADDRESS TIMELINE BRANCH NAME LSN STATUS +> ./target/debug/neon_local endpoint list + ENDPOINT ADDRESS TIMELINE BRANCH NAME LSN STATUS main 127.0.0.1:55432 de200bd42b49cc1814412c7e592dd6e9 main 0/16F9A38 running migration_check 127.0.0.1:55433 b3b863fa45fa9e57e615f9f2d944e601 migration_check 0/16F9A70 running diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index f315d2b7d9..21226249cf 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -28,4 +28,5 @@ tracing-utils.workspace = true url.workspace = true compute_api.workspace = true +utils.workspace = true workspace_hack.workspace = true diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index d61eae5f7a..36dbc382b5 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -34,7 +34,7 @@ use std::fs::File; use std::panic; use std::path::Path; use std::process::exit; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{mpsc, Arc, Condvar, Mutex}; use std::{thread, time::Duration}; use anyhow::{Context, Result}; @@ -45,12 +45,12 @@ use url::Url; use compute_api::responses::ComputeStatus; -use compute_tools::compute::{ComputeNode, ComputeState}; +use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec}; +use compute_tools::configurator::launch_configurator; use compute_tools::http::api::launch_http_server; use compute_tools::logger::*; use compute_tools::monitor::launch_monitor; use compute_tools::params::*; -use compute_tools::pg_helpers::*; use compute_tools::spec::*; fn main() -> Result<()> { @@ -73,29 +73,29 @@ fn main() -> Result<()> { // Try to use just 'postgres' if no path is provided let pgbin = matches.get_one::("pgbin").unwrap(); - let mut spec = Default::default(); - let mut spec_set = false; + let spec; let mut live_config_allowed = false; match spec_json { // First, try to get cluster spec from the cli argument Some(json) => { - spec = serde_json::from_str(json)?; - spec_set = true; + spec = Some(serde_json::from_str(json)?); } None => { // Second, try to read it from the file if path is provided if let Some(sp) = spec_path { let path = Path::new(sp); let file = File::open(path)?; - spec = serde_json::from_reader(file)?; - spec_set = true; + spec = Some(serde_json::from_reader(file)?); } else if let Some(id) = compute_id { if let Some(cp_base) = control_plane_uri { live_config_allowed = true; - if let Ok(s) = get_spec_from_control_plane(cp_base, id) { - spec = s; - spec_set = true; - } + spec = match get_spec_from_control_plane(cp_base, id) { + Ok(s) => s, + Err(e) => { + error!("cannot get response from control plane: {}", e); + panic!("neither spec nor confirmation that compute is in the Empty state was received"); + } + }; } else { panic!("must specify both --control-plane-uri and --compute-id or none"); } @@ -109,11 +109,15 @@ fn main() -> Result<()> { }; let mut new_state = ComputeState::new(); - if spec_set { - new_state.spec = spec; + let spec_set; + if let Some(spec) = spec { + let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?; + new_state.pspec = Some(pspec); + spec_set = true; + } else { + spec_set = false; } let compute_node = ComputeNode { - start_time: Utc::now(), connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?, pgdata: pgdata.to_string(), pgbin: pgbin.to_string(), @@ -142,33 +146,21 @@ fn main() -> Result<()> { } } - // We got all we need, fill in the state. + // We got all we need, update the state. let mut state = compute.state.lock().unwrap(); - let pageserver_connstr = state - .spec - .cluster - .settings - .find("neon.pageserver_connstring") - .expect("pageserver connstr should be provided"); - let storage_auth_token = state.spec.storage_auth_token.clone(); - let tenant = state - .spec - .cluster - .settings - .find("neon.tenant_id") - .expect("tenant id should be provided"); - let timeline = state - .spec - .cluster - .settings - .find("neon.timeline_id") - .expect("tenant id should be provided"); - let startup_tracing_context = state.spec.startup_tracing_context.clone(); + let pspec = state.pspec.as_ref().expect("spec must be set"); + let startup_tracing_context = pspec.spec.startup_tracing_context.clone(); + + // Record for how long we slept waiting for the spec. + state.metrics.wait_for_spec_ms = Utc::now() + .signed_duration_since(state.start_time) + .to_std() + .unwrap() + .as_millis() as u64; + // Reset start time to the actual start of the configuration, so that + // total startup time was properly measured at the end. + state.start_time = Utc::now(); - state.pageserver_connstr = pageserver_connstr; - state.storage_auth_token = storage_auth_token; - state.tenant = tenant; - state.timeline = timeline; state.status = ComputeStatus::Init; compute.state_changed.notify_all(); drop(state); @@ -198,6 +190,8 @@ fn main() -> Result<()> { // Launch remaining service threads let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread"); + let _configurator_handle = + launch_configurator(&compute).expect("cannot launch configurator thread"); // Start Postgres let mut delay_exit = false; @@ -239,10 +233,25 @@ fn main() -> Result<()> { thread::sleep(Duration::from_secs(30)); } - info!("shutting down tracing"); // Shutdown trace pipeline gracefully, so that it has a chance to send any - // pending traces before we exit. - tracing_utils::shutdown_tracing(); + // pending traces before we exit. Shutting down OTEL tracing provider may + // hang for quite some time, see, for example: + // - https://github.com/open-telemetry/opentelemetry-rust/issues/868 + // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636 + // + // Yet, we want computes to shut down fast enough, as we may need a new one + // for the same timeline ASAP. So wait no longer than 2s for the shutdown to + // complete, then just error out and exit the main thread. + info!("shutting down tracing"); + let (sender, receiver) = mpsc::channel(); + let _ = thread::spawn(move || { + tracing_utils::shutdown_tracing(); + sender.send(()).ok() + }); + let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000)); + if shutdown_res.is_err() { + error!("timed out while shutting down tracing, exiting anyway"); + } info!("shutting down"); exit(exit_code.unwrap_or(1)) diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index b8413de516..b6a287bdeb 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,12 +1,28 @@ use anyhow::{anyhow, Result}; -use postgres::Client; use tokio_postgres::NoTls; use tracing::{error, instrument}; use crate::compute::ComputeNode; +/// Update timestamp in a row in a special service table to check +/// that we can actually write some data in this particular timeline. +/// Create table if it's missing. #[instrument(skip_all)] -pub fn create_writability_check_data(client: &mut Client) -> Result<()> { +pub async fn check_writability(compute: &ComputeNode) -> Result<()> { + // Connect to the database. + let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?; + if client.is_closed() { + return Err(anyhow!("connection to postgres closed")); + } + + // The connection object performs the actual communication with the database, + // so spawn it off to run on its own. + tokio::spawn(async move { + if let Err(e) = connection.await { + error!("connection error: {}", e); + } + }); + let query = " CREATE TABLE IF NOT EXISTS health_check ( id serial primary key, @@ -15,31 +31,15 @@ pub fn create_writability_check_data(client: &mut Client) -> Result<()> { INSERT INTO health_check VALUES (1, now()) ON CONFLICT (id) DO UPDATE SET updated_at = now();"; - let result = client.simple_query(query)?; - if result.len() < 2 { - return Err(anyhow::format_err!("executed {} queries", result.len())); - } - Ok(()) -} - -#[instrument(skip_all)] -pub async fn check_writability(compute: &ComputeNode) -> Result<()> { - let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?; - if client.is_closed() { - return Err(anyhow!("connection to postgres closed")); - } - tokio::spawn(async move { - if let Err(e) = connection.await { - error!("connection error: {}", e); - } - }); - - let result = client - .simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;") - .await?; - - if result.len() != 1 { - return Err(anyhow!("statement can't be executed")); + + let result = client.simple_query(query).await?; + + if result.len() != 2 { + return Err(anyhow::format_err!( + "expected 2 query results, but got {}", + result.len() + )); } + Ok(()) } diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 689aa6ef43..b6bc234beb 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -26,18 +26,18 @@ use chrono::{DateTime, Utc}; use postgres::{Client, NoTls}; use tokio_postgres; use tracing::{info, instrument, warn}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; use compute_api::responses::{ComputeMetrics, ComputeStatus}; use compute_api::spec::ComputeSpec; -use crate::checker::create_writability_check_data; use crate::config; use crate::pg_helpers::*; use crate::spec::*; /// Compute node info shared across several `compute_ctl` threads. pub struct ComputeNode { - pub start_time: DateTime, // Url type maintains proper escaping pub connstr: url::Url, pub pgdata: String, @@ -65,30 +65,23 @@ pub struct ComputeNode { #[derive(Clone, Debug)] pub struct ComputeState { + pub start_time: DateTime, pub status: ComputeStatus, /// Timestamp of the last Postgres activity pub last_active: DateTime, pub error: Option, - pub spec: ComputeSpec, - pub tenant: String, - pub timeline: String, - pub pageserver_connstr: String, - pub storage_auth_token: Option, - + pub pspec: Option, pub metrics: ComputeMetrics, } impl ComputeState { pub fn new() -> Self { Self { + start_time: Utc::now(), status: ComputeStatus::Empty, last_active: Utc::now(), error: None, - spec: ComputeSpec::default(), - tenant: String::new(), - timeline: String::new(), - pageserver_connstr: String::new(), - storage_auth_token: None, + pspec: None, metrics: ComputeMetrics::default(), } } @@ -100,6 +93,49 @@ impl Default for ComputeState { } } +#[derive(Clone, Debug)] +pub struct ParsedSpec { + pub spec: ComputeSpec, + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub pageserver_connstr: String, + pub storage_auth_token: Option, +} + +impl TryFrom for ParsedSpec { + type Error = String; + fn try_from(spec: ComputeSpec) -> Result { + let pageserver_connstr = spec + .cluster + .settings + .find("neon.pageserver_connstring") + .ok_or("pageserver connstr should be provided")?; + let storage_auth_token = spec.storage_auth_token.clone(); + let tenant_id: TenantId = spec + .cluster + .settings + .find("neon.tenant_id") + .ok_or("tenant id should be provided") + .map(|s| TenantId::from_str(&s))? + .or(Err("invalid tenant id"))?; + let timeline_id: TimelineId = spec + .cluster + .settings + .find("neon.timeline_id") + .ok_or("timeline id should be provided") + .map(|s| TimelineId::from_str(&s))? + .or(Err("invalid timeline id"))?; + + Ok(ParsedSpec { + spec, + pageserver_connstr, + storage_auth_token, + tenant_id, + timeline_id, + }) + } +} + impl ComputeNode { pub fn set_status(&self, status: ComputeStatus) { let mut state = self.state.lock().unwrap(); @@ -125,14 +161,15 @@ impl ComputeNode { // Get basebackup from the libpq connection to pageserver using `connstr` and // unarchive it to `pgdata` directory overriding all its previous content. #[instrument(skip(self, compute_state))] - fn get_basebackup(&self, compute_state: &ComputeState, lsn: &str) -> Result<()> { + fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> { + let spec = compute_state.pspec.as_ref().expect("spec must be set"); let start_time = Utc::now(); - let mut config = postgres::Config::from_str(&compute_state.pageserver_connstr)?; + let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?; // Use the storage auth token from the config file, if given. // Note: this overrides any password set in the connection string. - if let Some(storage_auth_token) = &compute_state.storage_auth_token { + if let Some(storage_auth_token) = &spec.storage_auth_token { info!("Got storage auth token from spec file"); config.password(storage_auth_token); } else { @@ -141,14 +178,8 @@ impl ComputeNode { let mut client = config.connect(NoTls)?; let basebackup_cmd = match lsn { - "0/0" => format!( - "basebackup {} {}", - &compute_state.tenant, &compute_state.timeline - ), // First start of the compute - _ => format!( - "basebackup {} {} {}", - &compute_state.tenant, &compute_state.timeline, lsn - ), + Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute + _ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn), }; let copyreader = client.copy_out(basebackup_cmd.as_str())?; @@ -172,7 +203,7 @@ impl ComputeNode { // Run `postgres` in a special mode with `--sync-safekeepers` argument // and return the reported LSN back to the caller. #[instrument(skip(self, storage_auth_token))] - fn sync_safekeepers(&self, storage_auth_token: Option) -> Result { + fn sync_safekeepers(&self, storage_auth_token: Option) -> Result { let start_time = Utc::now(); let sync_handle = Command::new(&self.pgbin) @@ -209,7 +240,7 @@ impl ComputeNode { .unwrap() .as_millis() as u64; - let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim()); + let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?; Ok(lsn) } @@ -218,33 +249,85 @@ impl ComputeNode { /// safekeepers sync, basebackup, etc. #[instrument(skip(self, compute_state))] pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { - let spec = &compute_state.spec; + #[derive(Clone)] + enum Replication { + Primary, + Static { lsn: Lsn }, + HotStandby, + } + + let pspec = compute_state.pspec.as_ref().expect("spec must be set"); + let spec = &pspec.spec; let pgdata_path = Path::new(&self.pgdata); + let hot_replica = if let Some(option) = spec.cluster.settings.find_ref("hot_standby") { + if let Some(value) = &option.value { + anyhow::ensure!(option.vartype == "bool"); + matches!(value.as_str(), "on" | "yes" | "true") + } else { + false + } + } else { + false + }; + + let replication = if hot_replica { + Replication::HotStandby + } else if let Some(lsn) = spec.cluster.settings.find("recovery_target_lsn") { + Replication::Static { + lsn: Lsn::from_str(&lsn)?, + } + } else { + Replication::Primary + }; + // Remove/create an empty pgdata directory and put configuration there. self.create_pgdata()?; - config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?; + config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?; - info!("starting safekeepers syncing"); - let lsn = self - .sync_safekeepers(compute_state.storage_auth_token.clone()) - .with_context(|| "failed to sync safekeepers")?; - info!("safekeepers synced at LSN {}", lsn); + // Syncing safekeepers is only safe with primary nodes: if a primary + // is already connected it will be kicked out, so a secondary (standby) + // cannot sync safekeepers. + let lsn = match &replication { + Replication::Primary => { + info!("starting safekeepers syncing"); + let lsn = self + .sync_safekeepers(pspec.storage_auth_token.clone()) + .with_context(|| "failed to sync safekeepers")?; + info!("safekeepers synced at LSN {}", lsn); + lsn + } + Replication::Static { lsn } => { + info!("Starting read-only node at static LSN {}", lsn); + *lsn + } + Replication::HotStandby => { + info!("Initializing standby from latest Pageserver LSN"); + Lsn(0) + } + }; info!( "getting basebackup@{} from pageserver {}", - lsn, &compute_state.pageserver_connstr + lsn, &pspec.pageserver_connstr ); - self.get_basebackup(compute_state, &lsn).with_context(|| { + self.get_basebackup(compute_state, lsn).with_context(|| { format!( "failed to get basebackup@{} from pageserver {}", - lsn, &compute_state.pageserver_connstr + lsn, &pspec.pageserver_connstr ) })?; // Update pg_hba.conf received with basebackup. update_pg_hba(pgdata_path)?; + match &replication { + Replication::Primary | Replication::Static { .. } => {} + Replication::HotStandby => { + add_standby_signal(pgdata_path)?; + } + } + Ok(()) } @@ -306,19 +389,61 @@ impl ComputeNode { }; // Proceed with post-startup configuration. Note, that order of operations is important. - handle_roles(&compute_state.spec, &mut client)?; - handle_databases(&compute_state.spec, &mut client)?; - handle_role_deletions(&compute_state.spec, self.connstr.as_str(), &mut client)?; - handle_grants(&compute_state.spec, self.connstr.as_str(), &mut client)?; - create_writability_check_data(&mut client)?; - handle_extensions(&compute_state.spec, &mut client)?; + let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec; + handle_roles(spec, &mut client)?; + handle_databases(spec, &mut client)?; + handle_role_deletions(spec, self.connstr.as_str(), &mut client)?; + handle_grants(spec, self.connstr.as_str(), &mut client)?; + handle_extensions(spec, &mut client)?; // 'Close' connection drop(client); info!( "finished configuration of compute for project {}", - compute_state.spec.cluster.cluster_id + spec.cluster.cluster_id + ); + + Ok(()) + } + + // We could've wrapped this around `pg_ctl reload`, but right now we don't use + // `pg_ctl` for start / stop, so this just seems much easier to do as we already + // have opened connection to Postgres and superuser access. + #[instrument(skip(self, client))] + fn pg_reload_conf(&self, client: &mut Client) -> Result<()> { + client.simple_query("SELECT pg_reload_conf()")?; + Ok(()) + } + + /// Similar to `apply_config()`, but does a bit different sequence of operations, + /// as it's used to reconfigure a previously started and configured Postgres node. + #[instrument(skip(self))] + pub fn reconfigure(&self) -> Result<()> { + let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec; + + // Write new config + let pgdata_path = Path::new(&self.pgdata); + config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?; + + let mut client = Client::connect(self.connstr.as_str(), NoTls)?; + self.pg_reload_conf(&mut client)?; + + // Proceed with post-startup configuration. Note, that order of operations is important. + handle_roles(&spec, &mut client)?; + handle_databases(&spec, &mut client)?; + handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?; + handle_grants(&spec, self.connstr.as_str(), &mut client)?; + handle_extensions(&spec, &mut client)?; + + // 'Close' connection + drop(client); + + let unknown_op = "unknown".to_string(); + let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op); + info!( + "finished reconfiguration of compute node for operation {}", + op_id ); Ok(()) @@ -327,19 +452,20 @@ impl ComputeNode { #[instrument(skip(self))] pub fn start_compute(&self) -> Result { let compute_state = self.state.lock().unwrap().clone(); + let spec = compute_state.pspec.as_ref().expect("spec must be set"); info!( "starting compute for project {}, operation {}, tenant {}, timeline {}", - compute_state.spec.cluster.cluster_id, - compute_state.spec.operation_uuid.as_ref().unwrap(), - compute_state.tenant, - compute_state.timeline, + spec.spec.cluster.cluster_id, + spec.spec.operation_uuid.as_deref().unwrap_or("None"), + spec.tenant_id, + spec.timeline_id, ); self.prepare_pgdata(&compute_state)?; let start_time = Utc::now(); - let pg = self.start_postgres(compute_state.storage_auth_token.clone())?; + let pg = self.start_postgres(spec.storage_auth_token.clone())?; self.apply_config(&compute_state)?; @@ -352,7 +478,7 @@ impl ComputeNode { .unwrap() .as_millis() as u64; state.metrics.total_startup_ms = startup_end_time - .signed_duration_since(self.start_time) + .signed_duration_since(compute_state.start_time) .to_std() .unwrap() .as_millis() as u64; diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs new file mode 100644 index 0000000000..a07fd0b8cd --- /dev/null +++ b/compute_tools/src/configurator.rs @@ -0,0 +1,54 @@ +use std::sync::Arc; +use std::thread; + +use anyhow::Result; +use tracing::{error, info, instrument}; + +use compute_api::responses::ComputeStatus; + +use crate::compute::ComputeNode; + +#[instrument(skip(compute))] +fn configurator_main_loop(compute: &Arc) { + info!("waiting for reconfiguration requests"); + loop { + let state = compute.state.lock().unwrap(); + let mut state = compute.state_changed.wait(state).unwrap(); + + if state.status == ComputeStatus::ConfigurationPending { + info!("got configuration request"); + state.status = ComputeStatus::Configuration; + compute.state_changed.notify_all(); + drop(state); + + let mut new_status = ComputeStatus::Failed; + if let Err(e) = compute.reconfigure() { + error!("could not configure compute node: {}", e); + } else { + new_status = ComputeStatus::Running; + info!("compute node configured"); + } + + // XXX: used to test that API is blocking + // std::thread::sleep(std::time::Duration::from_millis(10000)); + + compute.set_status(new_status); + } else if state.status == ComputeStatus::Failed { + info!("compute node is now in Failed state, exiting"); + break; + } else { + info!("woken up for compute status: {:?}, sleeping", state.status); + } + } +} + +pub fn launch_configurator(compute: &Arc) -> Result> { + let compute = Arc::clone(compute); + + Ok(thread::Builder::new() + .name("compute-configurator".into()) + .spawn(move || { + configurator_main_loop(&compute); + info!("configurator thread is exited"); + })?) +} diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs index cea45dc596..4468f6f5e4 100644 --- a/compute_tools/src/http/api.rs +++ b/compute_tools/src/http/api.rs @@ -3,7 +3,7 @@ use std::net::SocketAddr; use std::sync::Arc; use std::thread; -use crate::compute::{ComputeNode, ComputeState}; +use crate::compute::{ComputeNode, ComputeState, ParsedSpec}; use compute_api::requests::ConfigurationRequest; use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError}; @@ -18,8 +18,15 @@ use tracing_utils::http::OtelName; fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse { ComputeStatusResponse { - tenant: state.tenant.clone(), - timeline: state.timeline.clone(), + start_time: state.start_time, + tenant: state + .pspec + .as_ref() + .map(|pspec| pspec.tenant_id.to_string()), + timeline: state + .pspec + .as_ref() + .map(|pspec| pspec.timeline_id.to_string()), status: state.status, last_active: state.last_active, error: state.error.clone(), @@ -79,7 +86,10 @@ async fn routes(req: Request, compute: &Arc) -> Response Response::new(Body::from("true")), - Err(e) => Response::new(Body::from(e.to_string())), + Err(e) => { + error!("check_writability failed: {}", e); + Response::new(Body::from(e.to_string())) + } } } @@ -135,6 +145,12 @@ async fn handle_configure_request( let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap(); if let Ok(request) = serde_json::from_str::(&spec_raw) { let spec = request.spec; + + let parsed_spec = match ParsedSpec::try_from(spec) { + Ok(ps) => ps, + Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)), + }; + // XXX: wrap state update under lock in code blocks. Otherwise, // we will try to `Send` `mut state` into the spawned thread // bellow, which will cause error: @@ -143,14 +159,14 @@ async fn handle_configure_request( // ``` { let mut state = compute.state.lock().unwrap(); - if state.status != ComputeStatus::Empty { + if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running { let msg = format!( "invalid compute status for configuration request: {:?}", state.status.clone() ); return Err((msg, StatusCode::PRECONDITION_FAILED)); } - state.spec = spec; + state.pspec = Some(parsed_spec); state.status = ComputeStatus::ConfigurationPending; compute.state_changed.notify_all(); drop(state); diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml index bdb09d4a6b..cc8f074a50 100644 --- a/compute_tools/src/http/openapi_spec.yaml +++ b/compute_tools/src/http/openapi_spec.yaml @@ -152,11 +152,14 @@ components: type: object description: Compute startup metrics. required: + - wait_for_spec_ms - sync_safekeepers_ms - basebackup_ms - config_ms - total_startup_ms properties: + wait_for_spec_ms: + type: integer sync_safekeepers_ms: type: integer basebackup_ms: @@ -181,6 +184,13 @@ components: - status - last_active properties: + start_time: + type: string + description: | + Time when compute was started. If initially compute was started in the `empty` + state and then provided with valid spec, `start_time` will be reset to the + moment, when spec was received. + example: "2022-10-12T07:20:50.52Z" status: $ref: '#/components/schemas/ComputeStatus' last_active: diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index aee6b53e6a..24811f75ee 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -4,6 +4,7 @@ //! pub mod checker; pub mod config; +pub mod configurator; pub mod http; #[macro_use] pub mod logger; diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index bb787d0506..40dbea6907 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -94,6 +94,7 @@ impl PgOptionsSerialize for GenericOptions { pub trait GenericOptionsSearch { fn find(&self, name: &str) -> Option; + fn find_ref(&self, name: &str) -> Option<&GenericOption>; } impl GenericOptionsSearch for GenericOptions { @@ -103,6 +104,12 @@ impl GenericOptionsSearch for GenericOptions { let op = ops.iter().find(|s| s.name == name)?; op.value.clone() } + + /// Lookup option by name, returning ref + fn find_ref(&self, name: &str) -> Option<&GenericOption> { + let ops = self.as_ref()?; + ops.iter().find(|s| s.name == name) + } } pub trait RoleExt { diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 2350113c39..bf3c407202 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,38 +1,121 @@ +use std::fs::File; use std::path::Path; use std::str::FromStr; -use anyhow::Result; +use anyhow::{anyhow, bail, Result}; use postgres::config::Config; use postgres::{Client, NoTls}; -use tracing::{info, info_span, instrument, span_enabled, warn, Level}; +use reqwest::StatusCode; +use tracing::{error, info, info_span, instrument, span_enabled, warn, Level}; use crate::config; use crate::params::PG_HBA_ALL_MD5; use crate::pg_helpers::*; +use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse}; use compute_api::spec::{ComputeSpec, Database, PgIdent, Role}; +// Do control plane request and return response if any. In case of error it +// returns a bool flag indicating whether it makes sense to retry the request +// and a string with error message. +fn do_control_plane_request( + uri: &str, + jwt: &str, +) -> Result { + let resp = reqwest::blocking::Client::new() + .get(uri) + .header("Authorization", jwt) + .send() + .map_err(|e| { + ( + true, + format!("could not perform spec request to control plane: {}", e), + ) + })?; + + match resp.status() { + StatusCode::OK => match resp.json::() { + Ok(spec_resp) => Ok(spec_resp), + Err(e) => Err(( + true, + format!("could not deserialize control plane response: {}", e), + )), + }, + StatusCode::SERVICE_UNAVAILABLE => { + Err((true, "control plane is temporarily unavailable".to_string())) + } + StatusCode::BAD_GATEWAY => { + // We have a problem with intermittent 502 errors now + // https://github.com/neondatabase/cloud/issues/2353 + // It's fine to retry GET request in this case. + Err((true, "control plane request failed with 502".to_string())) + } + // Another code, likely 500 or 404, means that compute is unknown to the control plane + // or some internal failure happened. Doesn't make much sense to retry in this case. + _ => Err(( + false, + format!( + "unexpected control plane response status code: {}", + resp.status() + ), + )), + } +} + /// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT` /// env variable is set, it will be used for authorization. -pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result { +pub fn get_spec_from_control_plane( + base_uri: &str, + compute_id: &str, +) -> Result> { let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec"); - let jwt: String = match std::env::var("NEON_CONSOLE_JWT") { + let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") { Ok(v) => v, Err(_) => "".to_string(), }; + let mut attempt = 1; + let mut spec: Result> = Ok(None); + info!("getting spec from control plane: {}", cp_uri); - // TODO: check the response. We should distinguish cases when it's - // - network error, then retry - // - no spec for compute yet, then wait - // - compute id is unknown or any other error, then bail out - let spec = reqwest::blocking::Client::new() - .get(cp_uri) - .header("Authorization", jwt) - .send()? - .json()?; + // Do 3 attempts to get spec from the control plane using the following logic: + // - network error -> then retry + // - compute id is unknown or any other error -> bail out + // - no spec for compute yet (Empty state) -> return Ok(None) + // - got spec -> return Ok(Some(spec)) + while attempt < 4 { + spec = match do_control_plane_request(&cp_uri, &jwt) { + Ok(spec_resp) => match spec_resp.status { + ControlPlaneComputeStatus::Empty => Ok(None), + ControlPlaneComputeStatus::Attached => { + if let Some(spec) = spec_resp.spec { + Ok(Some(spec)) + } else { + bail!("compute is attached, but spec is empty") + } + } + }, + Err((retry, msg)) => { + if retry { + Err(anyhow!(msg)) + } else { + bail!(msg); + } + } + }; - Ok(spec) + if let Err(e) = &spec { + error!("attempt {} to get spec failed with: {}", attempt, e); + } else { + return spec; + } + + attempt += 1; + std::thread::sleep(std::time::Duration::from_millis(100)); + } + + // All attempts failed, return error. + spec } /// It takes cluster specification and does the following: @@ -63,6 +146,21 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { Ok(()) } +/// Create a standby.signal file +pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { + // XXX: consider making it a part of spec.json + info!("adding standby.signal"); + let signalfile = pgdata_path.join("standby.signal"); + + if !signalfile.exists() { + info!("created standby.signal"); + File::create(signalfile)?; + } else { + info!("reused pre-existing standby.signal"); + } + Ok(()) +} + /// Given a cluster spec json and open transaction it handles roles creation, /// deletion and update. #[instrument(skip_all)] diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index a9b66f479a..09278e1726 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -7,7 +7,8 @@ //! use anyhow::{anyhow, bail, Context, Result}; use clap::{value_parser, Arg, ArgAction, ArgMatches, Command}; -use control_plane::compute::ComputeControlPlane; +use control_plane::endpoint::ComputeControlPlane; +use control_plane::endpoint::Replication; use control_plane::local_env::LocalEnv; use control_plane::pageserver::PageServerNode; use control_plane::safekeeper::SafekeeperNode; @@ -106,8 +107,9 @@ fn main() -> Result<()> { "start" => handle_start_all(sub_args, &env), "stop" => handle_stop_all(sub_args, &env), "pageserver" => handle_pageserver(sub_args, &env), - "pg" => handle_pg(sub_args, &env), "safekeeper" => handle_safekeeper(sub_args, &env), + "endpoint" => handle_endpoint(sub_args, &env), + "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"), _ => bail!("unexpected subcommand {sub_name}"), }; @@ -470,10 +472,17 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - let mut cplane = ComputeControlPlane::load(env.clone())?; println!("Importing timeline into pageserver ..."); pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?; - println!("Creating node for imported timeline ..."); env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?; - cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?; + println!("Creating endpoint for imported timeline ..."); + cplane.new_endpoint( + tenant_id, + name, + timeline_id, + None, + pg_version, + Replication::Primary, + )?; println!("Done"); } Some(("branch", branch_match)) => { @@ -521,10 +530,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) - Ok(()) } -fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { - let (sub_name, sub_args) = match pg_match.subcommand() { - Some(pg_subcommand_data) => pg_subcommand_data, - None => bail!("no pg subcommand provided"), +fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { + let (sub_name, sub_args) = match ep_match.subcommand() { + Some(ep_subcommand_data) => ep_subcommand_data, + None => bail!("no endpoint subcommand provided"), }; let mut cplane = ComputeControlPlane::load(env.clone())?; @@ -546,7 +555,7 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { table.load_preset(comfy_table::presets::NOTHING); table.set_header([ - "NODE", + "ENDPOINT", "ADDRESS", "TIMELINE", "BRANCH NAME", @@ -554,39 +563,39 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { "STATUS", ]); - for ((_, node_name), node) in cplane - .nodes + for (endpoint_id, endpoint) in cplane + .endpoints .iter() - .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id) + .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id) { - let lsn_str = match node.lsn { - None => { - // -> primary node - // Use the LSN at the end of the timeline. - timeline_infos - .get(&node.timeline_id) - .map(|bi| bi.last_record_lsn.to_string()) - .unwrap_or_else(|| "?".to_string()) - } - Some(lsn) => { - // -> read-only node + let lsn_str = match endpoint.replication { + Replication::Static(lsn) => { + // -> read-only endpoint // Use the node's LSN. lsn.to_string() } + _ => { + // -> primary endpoint or hot replica + // Use the LSN at the end of the timeline. + timeline_infos + .get(&endpoint.timeline_id) + .map(|bi| bi.last_record_lsn.to_string()) + .unwrap_or_else(|| "?".to_string()) + } }; let branch_name = timeline_name_mappings - .get(&TenantTimelineId::new(tenant_id, node.timeline_id)) + .get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id)) .map(|name| name.as_str()) .unwrap_or("?"); table.add_row([ - node_name.as_str(), - &node.address.to_string(), - &node.timeline_id.to_string(), + endpoint_id.as_str(), + &endpoint.address.to_string(), + &endpoint.timeline_id.to_string(), branch_name, lsn_str.as_str(), - node.status(), + endpoint.status(), ]); } @@ -597,10 +606,10 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .get_one::("branch-name") .map(|s| s.as_str()) .unwrap_or(DEFAULT_BRANCH_NAME); - let node_name = sub_args - .get_one::("node") - .map(|node_name| node_name.to_string()) - .unwrap_or_else(|| format!("{branch_name}_node")); + let endpoint_id = sub_args + .get_one::("endpoint_id") + .map(String::to_string) + .unwrap_or_else(|| format!("ep-{branch_name}")); let lsn = sub_args .get_one::("lsn") @@ -618,15 +627,34 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .copied() .context("Failed to parse postgres version from the argument string")?; - cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?; + let hot_standby = sub_args + .get_one::("hot-standby") + .copied() + .unwrap_or(false); + + let replication = match (lsn, hot_standby) { + (Some(lsn), false) => Replication::Static(lsn), + (None, true) => Replication::Replica, + (None, false) => Replication::Primary, + (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"), + }; + + cplane.new_endpoint( + tenant_id, + &endpoint_id, + timeline_id, + port, + pg_version, + replication, + )?; } "start" => { let port: Option = sub_args.get_one::("port").copied(); - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to start"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?; - let node = cplane.nodes.get(&(tenant_id, node_name.to_string())); + let endpoint = cplane.endpoints.get(endpoint_id.as_str()); let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) { let claims = Claims::new(Some(tenant_id), Scope::Tenant); @@ -636,9 +664,23 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { None }; - if let Some(node) = node { - println!("Starting existing postgres {node_name}..."); - node.start(&auth_token)?; + let hot_standby = sub_args + .get_one::("hot-standby") + .copied() + .unwrap_or(false); + + if let Some(endpoint) = endpoint { + match (&endpoint.replication, hot_standby) { + (Replication::Static(_), true) => { + bail!("Cannot start a node in hot standby mode when it is already configured as a static replica") + } + (Replication::Primary, true) => { + bail!("Cannot start a node as a hot standby replica, it is already configured as primary node") + } + _ => {} + } + println!("Starting existing endpoint {endpoint_id}..."); + endpoint.start(&auth_token)?; } else { let branch_name = sub_args .get_one::("branch-name") @@ -658,32 +700,46 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { .get_one::("pg-version") .copied() .context("Failed to `pg-version` from the argument string")?; + + let replication = match (lsn, hot_standby) { + (Some(lsn), false) => Replication::Static(lsn), + (None, true) => Replication::Replica, + (None, false) => Replication::Primary, + (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"), + }; + // when used with custom port this results in non obvious behaviour // port is remembered from first start command, i e // start --port X // stop // start <-- will also use port X even without explicit port argument - println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ..."); + println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ..."); - let node = - cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?; - node.start(&auth_token)?; + let ep = cplane.new_endpoint( + tenant_id, + endpoint_id, + timeline_id, + port, + pg_version, + replication, + )?; + ep.start(&auth_token)?; } } "stop" => { - let node_name = sub_args - .get_one::("node") - .ok_or_else(|| anyhow!("No node name was provided to stop"))?; + let endpoint_id = sub_args + .get_one::("endpoint_id") + .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?; let destroy = sub_args.get_flag("destroy"); - let node = cplane - .nodes - .get(&(tenant_id, node_name.to_string())) - .with_context(|| format!("postgres {node_name} is not found"))?; - node.stop(destroy)?; + let endpoint = cplane + .endpoints + .get(endpoint_id.as_str()) + .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?; + endpoint.stop(destroy)?; } - _ => bail!("Unexpected pg subcommand '{sub_name}'"), + _ => bail!("Unexpected endpoint subcommand '{sub_name}'"), } Ok(()) @@ -802,7 +858,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul } fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> { - // Postgres nodes are not started automatically + // Endpoints are not started automatically broker::start_broker_process(env)?; @@ -836,10 +892,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result< fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) { let pageserver = PageServerNode::from_env(env); - // Stop all compute nodes + // Stop all endpoints match ComputeControlPlane::load(env.clone()) { Ok(cplane) => { - for (_k, node) in cplane.nodes { + for (_k, node) in cplane.endpoints { if let Err(e) = node.stop(false) { eprintln!("postgres stop failed: {e:#}"); } @@ -872,7 +928,9 @@ fn cli() -> Command { .help("Name of the branch to be created or used as an alias for other services") .required(false); - let pg_node_arg = Arg::new("node").help("Postgres node name").required(false); + let endpoint_id_arg = Arg::new("endpoint_id") + .help("Postgres endpoint id") + .required(false); let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false); @@ -919,6 +977,12 @@ fn cli() -> Command { .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.") .required(false); + let hot_standby_arg = Arg::new("hot-standby") + .value_parser(value_parser!(bool)) + .long("hot-standby") + .help("If set, the node will be a hot replica on the specified timeline") + .required(false); + Command::new("Neon CLI") .arg_required_else_help(true) .version(GIT_VERSION) @@ -1026,37 +1090,39 @@ fn cli() -> Command { ) ) .subcommand( - Command::new("pg") + Command::new("endpoint") .arg_required_else_help(true) .about("Manage postgres instances") .subcommand(Command::new("list").arg(tenant_id_arg.clone())) .subcommand(Command::new("create") - .about("Create a postgres compute node") - .arg(pg_node_arg.clone()) + .about("Create a compute endpoint") + .arg(endpoint_id_arg.clone()) .arg(branch_name_arg.clone()) .arg(tenant_id_arg.clone()) .arg(lsn_arg.clone()) .arg(port_arg.clone()) .arg( Arg::new("config-only") - .help("Don't do basebackup, create compute node with only config files") + .help("Don't do basebackup, create endpoint directory with only config files") .long("config-only") .required(false)) .arg(pg_version_arg.clone()) + .arg(hot_standby_arg.clone()) ) .subcommand(Command::new("start") - .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files") - .arg(pg_node_arg.clone()) + .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.") + .arg(endpoint_id_arg.clone()) .arg(tenant_id_arg.clone()) .arg(branch_name_arg) .arg(timeline_id_arg) .arg(lsn_arg) .arg(port_arg) .arg(pg_version_arg) + .arg(hot_standby_arg) ) .subcommand( Command::new("stop") - .arg(pg_node_arg) + .arg(endpoint_id_arg) .arg(tenant_id_arg) .arg( Arg::new("destroy") @@ -1068,6 +1134,13 @@ fn cli() -> Command { ) ) + // Obsolete old name for 'endpoint'. We now just print an error if it's used. + .subcommand( + Command::new("pg") + .hide(true) + .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false)) + .trailing_var_arg(true) + ) .subcommand( Command::new("start") .about("Start page server and safekeepers") diff --git a/control_plane/src/compute.rs b/control_plane/src/endpoint.rs similarity index 61% rename from control_plane/src/compute.rs rename to control_plane/src/endpoint.rs index bc81107706..7d3485518f 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/endpoint.rs @@ -25,107 +25,117 @@ use crate::postgresql_conf::PostgresConf; // pub struct ComputeControlPlane { base_port: u16, - pageserver: Arc, - pub nodes: BTreeMap<(TenantId, String), Arc>, + + // endpoint ID is the key + pub endpoints: BTreeMap>, + env: LocalEnv, + pageserver: Arc, } impl ComputeControlPlane { - // Load current nodes with ports from data directories on disk - // Directory structure has the following layout: - // pgdatadirs - // |- tenants - // | |- - // | | |- + // Load current endpoints from the endpoints/ subdirectories pub fn load(env: LocalEnv) -> Result { let pageserver = Arc::new(PageServerNode::from_env(&env)); - let mut nodes = BTreeMap::default(); - let pgdatadirspath = &env.pg_data_dirs_path(); - - for tenant_dir in fs::read_dir(pgdatadirspath) - .with_context(|| format!("failed to list {}", pgdatadirspath.display()))? + let mut endpoints = BTreeMap::default(); + for endpoint_dir in fs::read_dir(env.endpoints_path()) + .with_context(|| format!("failed to list {}", env.endpoints_path().display()))? { - let tenant_dir = tenant_dir?; - for timeline_dir in fs::read_dir(tenant_dir.path()) - .with_context(|| format!("failed to list {}", tenant_dir.path().display()))? - { - let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?; - nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node)); - } + let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?; + endpoints.insert(ep.name.clone(), Arc::new(ep)); } Ok(ComputeControlPlane { base_port: 55431, - pageserver, - nodes, + endpoints, env, + pageserver, }) } fn get_port(&mut self) -> u16 { 1 + self - .nodes + .endpoints .values() - .map(|node| node.address.port()) + .map(|ep| ep.address.port()) .max() .unwrap_or(self.base_port) } - pub fn new_node( + pub fn new_endpoint( &mut self, tenant_id: TenantId, name: &str, timeline_id: TimelineId, - lsn: Option, port: Option, pg_version: u32, - ) -> Result> { + replication: Replication, + ) -> Result> { let port = port.unwrap_or_else(|| self.get_port()); - let node = Arc::new(PostgresNode { + + let ep = Arc::new(Endpoint { name: name.to_owned(), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), env: self.env.clone(), pageserver: Arc::clone(&self.pageserver), timeline_id, - lsn, + replication, tenant_id, pg_version, }); - node.create_pgdata()?; - node.setup_pg_conf()?; + ep.create_pgdata()?; + ep.setup_pg_conf()?; - self.nodes - .insert((tenant_id, node.name.clone()), Arc::clone(&node)); + self.endpoints.insert(ep.name.clone(), Arc::clone(&ep)); - Ok(node) + Ok(ep) } } /////////////////////////////////////////////////////////////////////////////// -#[derive(Debug)] -pub struct PostgresNode { - pub address: SocketAddr, - name: String, - pub env: LocalEnv, - pageserver: Arc, - pub timeline_id: TimelineId, - pub lsn: Option, // if it's a read-only node. None for primary - pub tenant_id: TenantId, - pg_version: u32, +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum Replication { + // Regular read-write node + Primary, + // if recovery_target_lsn is provided, and we want to pin the node to a specific LSN + Static(Lsn), + // Hot standby; read-only replica. + // Future versions may want to distinguish between replicas with hot standby + // feedback and other kinds of replication configurations. + Replica, } -impl PostgresNode { +#[derive(Debug)] +pub struct Endpoint { + /// used as the directory name + name: String, + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary. + pub replication: Replication, + + // port and address of the Postgres server + pub address: SocketAddr, + pg_version: u32, + + // These are not part of the endpoint as such, but the environment + // the endpoint runs in. + pub env: LocalEnv, + pageserver: Arc, +} + +impl Endpoint { fn from_dir_entry( entry: std::fs::DirEntry, env: &LocalEnv, pageserver: &Arc, - ) -> Result { + ) -> Result { if !entry.file_type()?.is_dir() { anyhow::bail!( - "PostgresNode::from_dir_entry failed: '{}' is not a directory", + "Endpoint::from_dir_entry failed: '{}' is not a directory", entry.path().display() ); } @@ -135,7 +145,7 @@ impl PostgresNode { let name = fname.to_str().unwrap().to_string(); // Read config file into memory - let cfg_path = entry.path().join("postgresql.conf"); + let cfg_path = entry.path().join("pgdata").join("postgresql.conf"); let cfg_path_str = cfg_path.to_string_lossy(); let mut conf_file = File::open(&cfg_path) .with_context(|| format!("failed to open config file in {}", cfg_path_str))?; @@ -156,18 +166,26 @@ impl PostgresNode { fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string()); let pg_version = u32::from_str(&pg_version_str)?; - // parse recovery_target_lsn, if any - let recovery_target_lsn: Option = - conf.parse_field_optional("recovery_target_lsn", &context)?; + // parse recovery_target_lsn and primary_conninfo into Recovery Target, if any + let replication = if let Some(lsn_str) = conf.get("recovery_target_lsn") { + Replication::Static(Lsn::from_str(lsn_str)?) + } else if let Some(slot_name) = conf.get("primary_slot_name") { + let slot_name = slot_name.to_string(); + let prefix = format!("repl_{}_", timeline_id); + assert!(slot_name.starts_with(&prefix)); + Replication::Replica + } else { + Replication::Primary + }; // ok now - Ok(PostgresNode { + Ok(Endpoint { address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), name, env: env.clone(), pageserver: Arc::clone(pageserver), timeline_id, - lsn: recovery_target_lsn, + replication, tenant_id, pg_version, }) @@ -269,7 +287,7 @@ impl PostgresNode { } // Write postgresql.conf with default configuration - // and PG_VERSION file to the data directory of a new node. + // and PG_VERSION file to the data directory of a new endpoint. fn setup_pg_conf(&self) -> Result<()> { let mut conf = PostgresConf::new(); conf.append("max_wal_senders", "10"); @@ -289,7 +307,7 @@ impl PostgresNode { // walproposer panics when basebackup is invalid, it is pointless to restart in this case. conf.append("restart_after_crash", "off"); - // Configure the node to fetch pages from pageserver + // Configure the Neon Postgres extension to fetch pages from pageserver let pageserver_connstr = { let config = &self.pageserver.pg_connection_config; let (host, port) = (config.host(), config.port()); @@ -302,50 +320,83 @@ impl PostgresNode { conf.append("neon.pageserver_connstring", &pageserver_connstr); conf.append("neon.tenant_id", &self.tenant_id.to_string()); conf.append("neon.timeline_id", &self.timeline_id.to_string()); - if let Some(lsn) = self.lsn { - conf.append("recovery_target_lsn", &lsn.to_string()); - } conf.append_line(""); - // Configure backpressure - // - Replication write lag depends on how fast the walreceiver can process incoming WAL. - // This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec, - // so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB. - // Actually latency should be much smaller (better if < 1sec). But we assume that recently - // updates pages are not requested from pageserver. - // - Replication flush lag depends on speed of persisting data by checkpointer (creation of - // delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to - // remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long - // recovery time (in case of pageserver crash) and disk space overflow at safekeepers. - // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread. - // To be able to restore database in case of pageserver node crash, safekeeper should not - // remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers - // (if they are not able to upload WAL to S3). - conf.append("max_replication_write_lag", "15MB"); - conf.append("max_replication_flush_lag", "10GB"); + // Replication-related configurations, such as WAL sending + match &self.replication { + Replication::Primary => { + // Configure backpressure + // - Replication write lag depends on how fast the walreceiver can process incoming WAL. + // This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec, + // so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB. + // Actually latency should be much smaller (better if < 1sec). But we assume that recently + // updates pages are not requested from pageserver. + // - Replication flush lag depends on speed of persisting data by checkpointer (creation of + // delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to + // remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long + // recovery time (in case of pageserver crash) and disk space overflow at safekeepers. + // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread. + // To be able to restore database in case of pageserver node crash, safekeeper should not + // remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers + // (if they are not able to upload WAL to S3). + conf.append("max_replication_write_lag", "15MB"); + conf.append("max_replication_flush_lag", "10GB"); - if !self.env.safekeepers.is_empty() { - // Configure the node to connect to the safekeepers - conf.append("synchronous_standby_names", "walproposer"); + if !self.env.safekeepers.is_empty() { + // Configure Postgres to connect to the safekeepers + conf.append("synchronous_standby_names", "walproposer"); - let safekeepers = self - .env - .safekeepers - .iter() - .map(|sk| format!("localhost:{}", sk.pg_port)) - .collect::>() - .join(","); - conf.append("neon.safekeepers", &safekeepers); - } else { - // We only use setup without safekeepers for tests, - // and don't care about data durability on pageserver, - // so set more relaxed synchronous_commit. - conf.append("synchronous_commit", "remote_write"); + let safekeepers = self + .env + .safekeepers + .iter() + .map(|sk| format!("localhost:{}", sk.pg_port)) + .collect::>() + .join(","); + conf.append("neon.safekeepers", &safekeepers); + } else { + // We only use setup without safekeepers for tests, + // and don't care about data durability on pageserver, + // so set more relaxed synchronous_commit. + conf.append("synchronous_commit", "remote_write"); - // Configure the node to stream WAL directly to the pageserver - // This isn't really a supported configuration, but can be useful for - // testing. - conf.append("synchronous_standby_names", "pageserver"); + // Configure the node to stream WAL directly to the pageserver + // This isn't really a supported configuration, but can be useful for + // testing. + conf.append("synchronous_standby_names", "pageserver"); + } + } + Replication::Static(lsn) => { + conf.append("recovery_target_lsn", &lsn.to_string()); + } + Replication::Replica => { + assert!(!self.env.safekeepers.is_empty()); + + // TODO: use future host field from safekeeper spec + // Pass the list of safekeepers to the replica so that it can connect to any of them, + // whichever is availiable. + let sk_ports = self + .env + .safekeepers + .iter() + .map(|x| x.pg_port.to_string()) + .collect::>() + .join(","); + let sk_hosts = vec!["localhost"; self.env.safekeepers.len()].join(","); + + let connstr = format!( + "host={} port={} options='-c timeline_id={} tenant_id={}' application_name=replica replication=true", + sk_hosts, + sk_ports, + &self.timeline_id.to_string(), + &self.tenant_id.to_string(), + ); + + let slot_name = format!("repl_{}_", self.timeline_id); + conf.append("primary_conninfo", connstr.as_str()); + conf.append("primary_slot_name", slot_name.as_str()); + conf.append("hot_standby", "on"); + } } let mut file = File::create(self.pgdata().join("postgresql.conf"))?; @@ -358,21 +409,27 @@ impl PostgresNode { } fn load_basebackup(&self, auth_token: &Option) -> Result<()> { - let backup_lsn = if let Some(lsn) = self.lsn { - Some(lsn) - } else if !self.env.safekeepers.is_empty() { - // LSN 0 means that it is bootstrap and we need to download just - // latest data from the pageserver. That is a bit clumsy but whole bootstrap - // procedure evolves quite actively right now, so let's think about it again - // when things would be more stable (TODO). - let lsn = self.sync_safekeepers(auth_token, self.pg_version)?; - if lsn == Lsn(0) { - None - } else { - Some(lsn) + let backup_lsn = match &self.replication { + Replication::Primary => { + if !self.env.safekeepers.is_empty() { + // LSN 0 means that it is bootstrap and we need to download just + // latest data from the pageserver. That is a bit clumsy but whole bootstrap + // procedure evolves quite actively right now, so let's think about it again + // when things would be more stable (TODO). + let lsn = self.sync_safekeepers(auth_token, self.pg_version)?; + if lsn == Lsn(0) { + None + } else { + Some(lsn) + } + } else { + None + } + } + Replication::Static(lsn) => Some(*lsn), + Replication::Replica => { + None // Take the latest snapshot available to start with } - } else { - None }; self.do_basebackup(backup_lsn)?; @@ -380,8 +437,12 @@ impl PostgresNode { Ok(()) } + pub fn endpoint_path(&self) -> PathBuf { + self.env.endpoints_path().join(&self.name) + } + pub fn pgdata(&self) -> PathBuf { - self.env.pg_data_dir(&self.tenant_id, &self.name) + self.endpoint_path().join("pgdata") } pub fn status(&self) -> &str { @@ -443,12 +504,11 @@ impl PostgresNode { } pub fn start(&self, auth_token: &Option) -> Result<()> { - // Bail if the node already running. if self.status() == "running" { - anyhow::bail!("The node is already running"); + anyhow::bail!("The endpoint is already running"); } - // 1. We always start compute node from scratch, so + // 1. We always start Postgres from scratch, so // if old dir exists, preserve 'postgresql.conf' and drop the directory let postgresql_conf_path = self.pgdata().join("postgresql.conf"); let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| { @@ -466,12 +526,12 @@ impl PostgresNode { // 3. Load basebackup self.load_basebackup(auth_token)?; - if self.lsn.is_some() { + if self.replication != Replication::Primary { File::create(self.pgdata().join("standby.signal"))?; } - // 4. Finally start the compute node postgres - println!("Starting postgres node at '{}'", self.connstr()); + // 4. Finally start postgres + println!("Starting postgres at '{}'", self.connstr()); self.pg_ctl(&["start"], auth_token) } @@ -480,7 +540,7 @@ impl PostgresNode { // use immediate shutdown mode, otherwise, // shutdown gracefully to leave the data directory sane. // - // Compute node always starts from scratch, so stop + // Postgres is always started from scratch, so stop // without destroy only used for testing and debugging. // if destroy { @@ -489,7 +549,7 @@ impl PostgresNode { "Destroying postgres data directory '{}'", self.pgdata().to_str().unwrap() ); - fs::remove_dir_all(self.pgdata())?; + fs::remove_dir_all(self.endpoint_path())?; } else { self.pg_ctl(&["stop"], &None)?; } diff --git a/control_plane/src/lib.rs b/control_plane/src/lib.rs index 6829479ad5..a773b8dcc3 100644 --- a/control_plane/src/lib.rs +++ b/control_plane/src/lib.rs @@ -9,7 +9,7 @@ mod background_process; pub mod broker; -pub mod compute; +pub mod endpoint; pub mod local_env; pub mod pageserver; pub mod postgresql_conf; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 8cc6329ce6..2b1eec7c4b 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -200,14 +200,8 @@ impl LocalEnv { self.neon_distrib_dir.join("storage_broker") } - pub fn pg_data_dirs_path(&self) -> PathBuf { - self.base_data_dir.join("pgdatadirs").join("tenants") - } - - pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf { - self.pg_data_dirs_path() - .join(tenant_id.to_string()) - .join(branch_name) + pub fn endpoints_path(&self) -> PathBuf { + self.base_data_dir.join("endpoints") } // TODO: move pageserver files into ./pageserver @@ -427,7 +421,7 @@ impl LocalEnv { } } - fs::create_dir_all(self.pg_data_dirs_path())?; + fs::create_dir_all(self.endpoints_path())?; for safekeeper in &self.safekeepers { fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?; diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 094069e4c0..75991045a4 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -359,8 +359,8 @@ impl PageServerNode { .transpose() .context("Failed to parse 'trace_read_requests' as bool")?, eviction_policy: settings - .get("eviction_policy") - .map(|x| serde_json::from_str(x)) + .remove("eviction_policy") + .map(serde_json::from_str) .transpose() .context("Failed to parse 'eviction_policy' json")?, min_resident_size_override: settings @@ -368,6 +368,9 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'min_resident_size_override' as integer")?, + evictions_low_residence_duration_metric_threshold: settings + .remove("evictions_low_residence_duration_metric_threshold") + .map(|x| x.to_string()), }; if !settings.is_empty() { bail!("Unrecognized tenant settings: {settings:?}") @@ -445,6 +448,9 @@ impl PageServerNode { .map(|x| x.parse::()) .transpose() .context("Failed to parse 'min_resident_size_override' as an integer")?, + evictions_low_residence_duration_metric_threshold: settings + .get("evictions_low_residence_duration_metric_threshold") + .map(|x| x.to_string()), }) .send()? .error_from_body()?; diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs index 34dc769e78..638575eb82 100644 --- a/control_plane/src/postgresql_conf.rs +++ b/control_plane/src/postgresql_conf.rs @@ -13,7 +13,7 @@ use std::io::BufRead; use std::str::FromStr; /// In-memory representation of a postgresql.conf file -#[derive(Default)] +#[derive(Default, Debug)] pub struct PostgresConf { lines: Vec, hash: HashMap, diff --git a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json index 10ae0b0ecf..565e5e368e 100644 --- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json +++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json @@ -28,11 +28,6 @@ "value": "replica", "vartype": "enum" }, - { - "name": "hot_standby", - "value": "on", - "vartype": "bool" - }, { "name": "wal_log_hints", "value": "on", diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index 43289a5e3e..c409563b56 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -1,7 +1,9 @@ //! Structs representing the JSON formats used in the compute_ctl's HTTP API. use chrono::{DateTime, Utc}; -use serde::{Serialize, Serializer}; +use serde::{Deserialize, Serialize, Serializer}; + +use crate::spec::ComputeSpec; #[derive(Serialize, Debug)] pub struct GenericAPIError { @@ -12,8 +14,9 @@ pub struct GenericAPIError { #[derive(Serialize, Debug)] #[serde(rename_all = "snake_case")] pub struct ComputeStatusResponse { - pub tenant: String, - pub timeline: String, + pub start_time: DateTime, + pub tenant: Option, + pub timeline: Option, pub status: ComputeStatus, #[serde(serialize_with = "rfc3339_serialize")] pub last_active: DateTime, @@ -43,6 +46,8 @@ pub enum ComputeStatus { Init, // Compute is configured and running. Running, + // New spec is being applied. + Configuration, // Either startup or configuration failed, // compute will exit soon or is waiting for // control-plane to terminate it. @@ -59,8 +64,29 @@ where /// Response of the /metrics.json API #[derive(Clone, Debug, Default, Serialize)] pub struct ComputeMetrics { + pub wait_for_spec_ms: u64, pub sync_safekeepers_ms: u64, pub basebackup_ms: u64, pub config_ms: u64, pub total_startup_ms: u64, } + +/// Response of the `/computes/{compute_id}/spec` control-plane API. +/// This is not actually a compute API response, so consider moving +/// to a different place. +#[derive(Deserialize, Debug)] +pub struct ControlPlaneSpecResponse { + pub spec: Option, + pub status: ControlPlaneComputeStatus, +} + +#[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ControlPlaneComputeStatus { + // Compute is known to control-plane, but it's not + // yet attached to any timeline / endpoint. + Empty, + // Compute is attached to some timeline / endpoint and + // should be able to start with provided spec. + Attached, +} diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 37fe133b68..f771910329 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -15,7 +15,10 @@ pub type PgIdent = String; #[derive(Clone, Debug, Default, Deserialize)] pub struct ComputeSpec { pub format_version: f32, - pub timestamp: String, + + // The control plane also includes a 'timestamp' field in the JSON document, + // but we don't use it for anything. Serde will ignore missing fields when + // deserializing it. pub operation_uuid: Option, /// Expected cluster state at the end of transition process. pub cluster: Cluster, diff --git a/libs/consumption_metrics/Cargo.toml b/libs/consumption_metrics/Cargo.toml index f26aa2fbc5..3f290821c2 100644 --- a/libs/consumption_metrics/Cargo.toml +++ b/libs/consumption_metrics/Cargo.toml @@ -4,13 +4,12 @@ version = "0.1.0" edition = "2021" license = "Apache-2.0" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] -anyhow = "1.0.68" -chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] } -rand = "0.8.3" -serde = "1.0.152" -serde_with = "2.1.0" -utils = { version = "0.1.0", path = "../utils" } -workspace_hack = { version = "0.1.0", path = "../../workspace_hack" } +anyhow.workspace = true +chrono.workspace = true +rand.workspace = true +serde.workspace = true +serde_with.workspace = true +utils.workspace = true + +workspace_hack.workspace = true diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index 7709da1072..f97ec54e91 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] serde.workspace = true serde_with.workspace = true +serde_json.workspace = true const_format.workspace = true anyhow.workspace = true bytes.workspace = true @@ -14,6 +15,7 @@ byteorder.workspace = true utils.workspace = true postgres_ffi.workspace = true enum-map.workspace = true -serde_json.workspace = true +strum.workspace = true +strum_macros.workspace = true workspace_hack.workspace = true diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 98a4b56858..15c37b9453 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -7,6 +7,7 @@ use std::{ use byteorder::{BigEndian, ReadBytesExt}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DisplayFromStr}; +use strum_macros; use utils::{ history_buffer::HistoryBufferWithDropCounter, id::{NodeId, TenantId, TimelineId}, @@ -18,11 +19,23 @@ use anyhow::bail; use bytes::{BufMut, Bytes, BytesMut}; /// A state of a tenant in pageserver's memory. -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[derive( + Clone, + PartialEq, + Eq, + serde::Serialize, + serde::Deserialize, + strum_macros::Display, + strum_macros::EnumString, + strum_macros::EnumVariantNames, + strum_macros::AsRefStr, + strum_macros::IntoStaticStr, +)] +#[serde(tag = "slug", content = "data")] pub enum TenantState { - // This tenant is being loaded from local disk + /// This tenant is being loaded from local disk Loading, - // This tenant is being downloaded from cloud storage. + /// This tenant is being downloaded from cloud storage. Attaching, /// Tenant is fully operational Active, @@ -31,15 +44,7 @@ pub enum TenantState { Stopping, /// A tenant is recognized by the pageserver, but can no longer be used for /// any operations, because it failed to be activated. - Broken, -} - -pub mod state { - pub const LOADING: &str = "loading"; - pub const ATTACHING: &str = "attaching"; - pub const ACTIVE: &str = "active"; - pub const STOPPING: &str = "stopping"; - pub const BROKEN: &str = "broken"; + Broken { reason: String, backtrace: String }, } impl TenantState { @@ -49,17 +54,26 @@ impl TenantState { Self::Attaching => true, Self::Active => false, Self::Stopping => false, - Self::Broken => false, + Self::Broken { .. } => false, } } - pub fn as_str(&self) -> &'static str { + pub fn broken_from_reason(reason: String) -> Self { + let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture()); + Self::Broken { + reason, + backtrace: backtrace_str, + } + } +} + +impl std::fmt::Debug for TenantState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - TenantState::Loading => state::LOADING, - TenantState::Attaching => state::ATTACHING, - TenantState::Active => state::ACTIVE, - TenantState::Stopping => state::STOPPING, - TenantState::Broken => state::BROKEN, + Self::Broken { reason, backtrace } if !reason.is_empty() => { + write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}") + } + _ => write!(f, "{self}"), } } } @@ -121,6 +135,7 @@ pub struct TenantCreateRequest { // For now, this field is not even documented in the openapi_spec.yml. pub eviction_policy: Option, pub min_resident_size_override: Option, + pub evictions_low_residence_duration_metric_threshold: Option, } #[serde_as] @@ -167,6 +182,7 @@ pub struct TenantConfigRequest { // For now, this field is not even documented in the openapi_spec.yml. pub eviction_policy: Option, pub min_resident_size_override: Option, + pub evictions_low_residence_duration_metric_threshold: Option, } impl TenantConfigRequest { @@ -188,6 +204,7 @@ impl TenantConfigRequest { trace_read_requests: None, eviction_policy: None, min_resident_size_override: None, + evictions_low_residence_duration_metric_threshold: None, } } } @@ -615,6 +632,7 @@ impl PagestreamBeMessage { #[cfg(test)] mod tests { use bytes::Buf; + use serde_json::json; use super::*; @@ -665,4 +683,57 @@ mod tests { assert!(msg == reconstructed); } } + + #[test] + fn test_tenantinfo_serde() { + // Test serialization/deserialization of TenantInfo + let original_active = TenantInfo { + id: TenantId::generate(), + state: TenantState::Active, + current_physical_size: Some(42), + has_in_progress_downloads: Some(false), + }; + let expected_active = json!({ + "id": original_active.id.to_string(), + "state": { + "slug": "Active", + }, + "current_physical_size": 42, + "has_in_progress_downloads": false, + }); + + let original_broken = TenantInfo { + id: TenantId::generate(), + state: TenantState::Broken { + reason: "reason".into(), + backtrace: "backtrace info".into(), + }, + current_physical_size: Some(42), + has_in_progress_downloads: Some(false), + }; + let expected_broken = json!({ + "id": original_broken.id.to_string(), + "state": { + "slug": "Broken", + "data": { + "backtrace": "backtrace info", + "reason": "reason", + } + }, + "current_physical_size": 42, + "has_in_progress_downloads": false, + }); + + assert_eq!( + serde_json::to_value(&original_active).unwrap(), + expected_active + ); + + assert_eq!( + serde_json::to_value(&original_broken).unwrap(), + expected_broken + ); + assert!(format!("{:?}", &original_broken.state).contains("reason")); + assert!(format!("{:?}", &original_broken.state).contains("backtrace info")); + } } diff --git a/libs/postgres_ffi/build.rs b/libs/postgres_ffi/build.rs index 66221af522..f7e39751ef 100644 --- a/libs/postgres_ffi/build.rs +++ b/libs/postgres_ffi/build.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use std::process::Command; use anyhow::{anyhow, Context}; -use bindgen::callbacks::ParseCallbacks; +use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; #[derive(Debug)] struct PostgresFfiCallbacks; @@ -20,7 +20,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { // Add any custom #[derive] attributes to the data structures that bindgen // creates. - fn add_derives(&self, name: &str) -> Vec { + fn add_derives(&self, derive_info: &DeriveInfo) -> Vec { // This is the list of data structures that we want to serialize/deserialize. let serde_list = [ "XLogRecord", @@ -31,7 +31,7 @@ impl ParseCallbacks for PostgresFfiCallbacks { "ControlFileData", ]; - if serde_list.contains(&name) { + if serde_list.contains(&derive_info.name) { vec![ "Default".into(), // Default allows us to easily fill the padding fields with 0. "Serialize".into(), diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs index 492ec9748a..b8eb469cb0 100644 --- a/libs/postgres_ffi/src/lib.rs +++ b/libs/postgres_ffi/src/lib.rs @@ -95,10 +95,13 @@ pub fn generate_wal_segment( segno: u64, system_id: u64, pg_version: u32, + lsn: Lsn, ) -> Result { + assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE)); + match pg_version { - 14 => v14::xlog_utils::generate_wal_segment(segno, system_id), - 15 => v15::xlog_utils::generate_wal_segment(segno, system_id), + 14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn), + 15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn), _ => Err(SerializeError::BadInput), } } diff --git a/libs/postgres_ffi/src/pg_constants.rs b/libs/postgres_ffi/src/pg_constants.rs index 09678353af..6bc89ed37e 100644 --- a/libs/postgres_ffi/src/pg_constants.rs +++ b/libs/postgres_ffi/src/pg_constants.rs @@ -195,6 +195,7 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384; pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00; pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10; +pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001; pub const XLP_LONG_HEADER: u16 = 0x0002; /* From fsm_internals.h */ diff --git a/libs/postgres_ffi/src/xlog_utils.rs b/libs/postgres_ffi/src/xlog_utils.rs index 272c4d6dcc..4d7bb61883 100644 --- a/libs/postgres_ffi/src/xlog_utils.rs +++ b/libs/postgres_ffi/src/xlog_utils.rs @@ -270,6 +270,11 @@ impl XLogPageHeaderData { use utils::bin_ser::LeSer; XLogPageHeaderData::des_from(&mut buf.reader()) } + + pub fn encode(&self) -> Result { + use utils::bin_ser::LeSer; + self.ser().map(|b| b.into()) + } } impl XLogLongPageHeaderData { @@ -328,22 +333,32 @@ impl CheckPoint { } } -// -// Generate new, empty WAL segment. -// We need this segment to start compute node. -// -pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result { +/// Generate new, empty WAL segment, with correct block headers at the first +/// page of the segment and the page that contains the given LSN. +/// We need this segment to start compute node. +pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result { let mut seg_buf = BytesMut::with_capacity(WAL_SEGMENT_SIZE); let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, WAL_SEGMENT_SIZE); + + let page_off = lsn.block_offset(); + let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE); + + let first_page_only = seg_off < XLOG_BLCKSZ; + let (shdr_rem_len, infoflags) = if first_page_only { + (seg_off, pg_constants::XLP_FIRST_IS_CONTRECORD) + } else { + (0, 0) + }; + let hdr = XLogLongPageHeaderData { std: { XLogPageHeaderData { xlp_magic: XLOG_PAGE_MAGIC as u16, - xlp_info: pg_constants::XLP_LONG_HEADER, + xlp_info: pg_constants::XLP_LONG_HEADER | infoflags, xlp_tli: PG_TLI, xlp_pageaddr: pageaddr, - xlp_rem_len: 0, + xlp_rem_len: shdr_rem_len as u32, ..Default::default() // Put 0 in padding fields. } }, @@ -357,6 +372,33 @@ pub fn generate_wal_segment(segno: u64, system_id: u64) -> Result= pg_constants::SIZE_OF_PAGE_HEADER as u64 { + pg_constants::XLP_FIRST_IS_CONTRECORD + } else { + 0 + }, + xlp_tli: PG_TLI, + xlp_pageaddr: lsn.page_lsn().0, + xlp_rem_len: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 { + page_off as u32 + } else { + 0u32 + }, + ..Default::default() // Put 0 in padding fields. + }; + let hdr_bytes = header.encode()?; + + debug_assert!(seg_buf.len() > block_offset + hdr_bytes.len()); + debug_assert_ne!(block_offset, 0); + + seg_buf[block_offset..block_offset + hdr_bytes.len()].copy_from_slice(&hdr_bytes[..]); + } + Ok(seg_buf.freeze()) } diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs index 969befc8e7..88ae41c636 100644 --- a/libs/postgres_ffi/wal_craft/src/lib.rs +++ b/libs/postgres_ffi/wal_craft/src/lib.rs @@ -6,9 +6,8 @@ use postgres::Client; use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ}; use postgres_ffi::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD}; use std::cmp::Ordering; -use std::fs; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; +use std::process::Command; use std::time::Instant; use tempfile::{tempdir, TempDir}; @@ -95,12 +94,6 @@ impl Conf { pub fn start_server(&self) -> Result { info!("Starting Postgres server in {:?}", self.datadir); - let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| { - format!( - "Failed to create pg.log file in directory {}", - self.datadir.display() - ) - })?; let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols) let unix_socket_dir_path = unix_socket_dir.path().to_owned(); let server_process = self @@ -110,9 +103,7 @@ impl Conf { .arg(unix_socket_dir_path.as_os_str()) .arg("-D") .arg(self.datadir.as_os_str()) - .args(["-c", "logging_collector=on"]) // stderr will mess up with tests output .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg])) - .stderr(Stdio::from(log_file)) .spawn()?; let server = PostgresServer { process: server_process, @@ -121,7 +112,7 @@ impl Conf { let mut c = postgres::Config::new(); c.host_path(&unix_socket_dir_path); c.user("postgres"); - c.connect_timeout(Duration::from_millis(1000)); + c.connect_timeout(Duration::from_millis(10000)); c }, }; diff --git a/libs/pq_proto/Cargo.toml b/libs/pq_proto/Cargo.toml index 76b71729ed..b286eb0358 100644 --- a/libs/pq_proto/Cargo.toml +++ b/libs/pq_proto/Cargo.toml @@ -10,7 +10,6 @@ byteorder.workspace = true pin-project-lite.workspace = true postgres-protocol.workspace = true rand.workspace = true -serde.workspace = true tokio.workspace = true tracing.workspace = true thiserror.workspace = true diff --git a/libs/pq_proto/src/lib.rs b/libs/pq_proto/src/lib.rs index ed0239072a..2143ad2530 100644 --- a/libs/pq_proto/src/lib.rs +++ b/libs/pq_proto/src/lib.rs @@ -6,15 +6,10 @@ pub mod framed; use byteorder::{BigEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; -use postgres_protocol::PG_EPOCH; -use serde::{Deserialize, Serialize}; -use std::{ - borrow::Cow, - collections::HashMap, - fmt, io, str, - time::{Duration, SystemTime}, -}; -use tracing::{trace, warn}; +use std::{borrow::Cow, collections::HashMap, fmt, io, str}; + +// re-export for use in utils pageserver_feedback.rs +pub use postgres_protocol::PG_EPOCH; pub type Oid = u32; pub type SystemId = u64; @@ -664,7 +659,7 @@ fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), ProtocolErr } /// Read cstring from buf, advancing it. -fn read_cstr(buf: &mut Bytes) -> Result { +pub fn read_cstr(buf: &mut Bytes) -> Result { let pos = buf .iter() .position(|x| *x == 0) @@ -939,175 +934,10 @@ impl<'a> BeMessage<'a> { } } -/// Feedback pageserver sends to safekeeper and safekeeper resends to compute. -/// Serialized in custom flexible key/value format. In replication protocol, it -/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres -/// Standby status update / Hot standby feedback messages. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -pub struct PageserverFeedback { - /// Last known size of the timeline. Used to enforce timeline size limit. - pub current_timeline_size: u64, - /// LSN last received and ingested by the pageserver. - pub last_received_lsn: u64, - /// LSN up to which data is persisted by the pageserver to its local disc. - pub disk_consistent_lsn: u64, - /// LSN up to which data is persisted by the pageserver on s3; safekeepers - /// consider WAL before it can be removed. - pub remote_consistent_lsn: u64, - pub replytime: SystemTime, -} - -// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback. -// Do not remove previously available fields because this might be backwards incompatible. -pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5; - -impl PageserverFeedback { - pub fn empty() -> PageserverFeedback { - PageserverFeedback { - current_timeline_size: 0, - last_received_lsn: 0, - remote_consistent_lsn: 0, - disk_consistent_lsn: 0, - replytime: SystemTime::now(), - } - } - - // Serialize PageserverFeedback using custom format - // to support protocol extensibility. - // - // Following layout is used: - // char - number of key-value pairs that follow. - // - // key-value pairs: - // null-terminated string - key, - // uint32 - value length in bytes - // value itself - // - // TODO: change serialized fields names once all computes migrate to rename. - pub fn serialize(&self, buf: &mut BytesMut) { - buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys - buf.put_slice(b"current_timeline_size\0"); - buf.put_i32(8); - buf.put_u64(self.current_timeline_size); - - buf.put_slice(b"ps_writelsn\0"); - buf.put_i32(8); - buf.put_u64(self.last_received_lsn); - buf.put_slice(b"ps_flushlsn\0"); - buf.put_i32(8); - buf.put_u64(self.disk_consistent_lsn); - buf.put_slice(b"ps_applylsn\0"); - buf.put_i32(8); - buf.put_u64(self.remote_consistent_lsn); - - let timestamp = self - .replytime - .duration_since(*PG_EPOCH) - .expect("failed to serialize pg_replytime earlier than PG_EPOCH") - .as_micros() as i64; - - buf.put_slice(b"ps_replytime\0"); - buf.put_i32(8); - buf.put_i64(timestamp); - } - - // Deserialize PageserverFeedback message - // TODO: change serialized fields names once all computes migrate to rename. - pub fn parse(mut buf: Bytes) -> PageserverFeedback { - let mut rf = PageserverFeedback::empty(); - let nfields = buf.get_u8(); - for _ in 0..nfields { - let key = read_cstr(&mut buf).unwrap(); - match key.as_ref() { - b"current_timeline_size" => { - let len = buf.get_i32(); - assert_eq!(len, 8); - rf.current_timeline_size = buf.get_u64(); - } - b"ps_writelsn" => { - let len = buf.get_i32(); - assert_eq!(len, 8); - rf.last_received_lsn = buf.get_u64(); - } - b"ps_flushlsn" => { - let len = buf.get_i32(); - assert_eq!(len, 8); - rf.disk_consistent_lsn = buf.get_u64(); - } - b"ps_applylsn" => { - let len = buf.get_i32(); - assert_eq!(len, 8); - rf.remote_consistent_lsn = buf.get_u64(); - } - b"ps_replytime" => { - let len = buf.get_i32(); - assert_eq!(len, 8); - let raw_time = buf.get_i64(); - if raw_time > 0 { - rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64); - } else { - rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64); - } - } - _ => { - let len = buf.get_i32(); - warn!( - "PageserverFeedback parse. unknown key {} of len {len}. Skip it.", - String::from_utf8_lossy(key.as_ref()) - ); - buf.advance(len as usize); - } - } - } - trace!("PageserverFeedback parsed is {:?}", rf); - rf - } -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn test_replication_feedback_serialization() { - let mut rf = PageserverFeedback::empty(); - // Fill rf with some values - rf.current_timeline_size = 12345678; - // Set rounded time to be able to compare it with deserialized value, - // because it is rounded up to microseconds during serialization. - rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000); - let mut data = BytesMut::new(); - rf.serialize(&mut data); - - let rf_parsed = PageserverFeedback::parse(data.freeze()); - assert_eq!(rf, rf_parsed); - } - - #[test] - fn test_replication_feedback_unknown_key() { - let mut rf = PageserverFeedback::empty(); - // Fill rf with some values - rf.current_timeline_size = 12345678; - // Set rounded time to be able to compare it with deserialized value, - // because it is rounded up to microseconds during serialization. - rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000); - let mut data = BytesMut::new(); - rf.serialize(&mut data); - - // Add an extra field to the buffer and adjust number of keys - if let Some(first) = data.first_mut() { - *first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1; - } - - data.put_slice(b"new_field_one\0"); - data.put_i32(8); - data.put_u64(42); - - // Parse serialized data and check that new field is not parsed - let rf_parsed = PageserverFeedback::parse(data.freeze()); - assert_eq!(rf, rf_parsed); - } - #[test] fn test_startup_message_params_options_escaped() { fn split_options(params: &StartupMessageParams) -> Vec> { diff --git a/libs/remote_storage/tests/pagination_tests.rs b/libs/remote_storage/tests/pagination_tests.rs index eb52409c44..86a6888f98 100644 --- a/libs/remote_storage/tests/pagination_tests.rs +++ b/libs/remote_storage/tests/pagination_tests.rs @@ -99,7 +99,11 @@ struct S3WithTestBlobs { #[async_trait::async_trait] impl AsyncTestContext for MaybeEnabledS3 { async fn setup() -> Self { - utils::logging::init(utils::logging::LogFormat::Test).expect("logging init failed"); + utils::logging::init( + utils::logging::LogFormat::Test, + utils::logging::TracingErrorLayerEnablement::Disabled, + ) + .expect("logging init failed"); if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() { info!( "`{}` env variable is not set, skipping the test", @@ -204,12 +208,7 @@ async fn upload_s3_data( let data = format!("remote blob data {i}").into_bytes(); let data_len = data.len(); task_client - .upload( - Box::new(std::io::Cursor::new(data)), - data_len, - &blob_path, - None, - ) + .upload(std::io::Cursor::new(data), data_len, &blob_path, None) .await?; Ok::<_, anyhow::Error>((blob_prefix, blob_path)) diff --git a/libs/tracing-utils/Cargo.toml b/libs/tracing-utils/Cargo.toml index 8c3d3f9063..b285c9b5b0 100644 --- a/libs/tracing-utils/Cargo.toml +++ b/libs/tracing-utils/Cargo.toml @@ -14,4 +14,5 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tracing.workspace = true tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true -workspace_hack = { version = "0.1", path = "../../workspace_hack" } + +workspace_hack.workspace = true diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 391bc52a80..8239ffff57 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -11,6 +11,7 @@ async-trait.workspace = true anyhow.workspace = true bincode.workspace = true bytes.workspace = true +chrono.workspace = true heapless.workspace = true hex = { workspace = true, features = ["serde"] } hyper = { workspace = true, features = ["full"] } @@ -27,14 +28,16 @@ signal-hook.workspace = true thiserror.workspace = true tokio.workspace = true tracing.workspace = true -tracing-subscriber = { workspace = true, features = ["json"] } +tracing-error.workspace = true +tracing-subscriber = { workspace = true, features = ["json", "registry"] } rand.workspace = true serde_with.workspace = true strum.workspace = true strum_macros.workspace = true url.workspace = true -uuid = { version = "1.2", features = ["v4", "serde"] } +uuid.workspace = true +pq_proto.workspace = true metrics.workspace = true workspace_hack.workspace = true diff --git a/libs/utils/src/http/endpoint.rs b/libs/utils/src/http/endpoint.rs index 616f2b8468..b11aef9892 100644 --- a/libs/utils/src/http/endpoint.rs +++ b/libs/utils/src/http/endpoint.rs @@ -76,6 +76,7 @@ where let log_quietly = method == Method::GET; async move { + let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding(); if log_quietly { debug!("Handling request"); } else { @@ -87,7 +88,11 @@ where // Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call. // // Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally. - match (self.0)(request).await { + let res = (self.0)(request).await; + + cancellation_guard.disarm(); + + match res { Ok(response) => { let response_status = response.status(); if log_quietly && response_status.is_success() { @@ -105,6 +110,38 @@ where } } +/// Drop guard to WARN in case the request was dropped before completion. +struct RequestCancelled { + warn: Option, +} + +impl RequestCancelled { + /// Create the drop guard using the [`tracing::Span::current`] as the span. + fn warn_when_dropped_without_responding() -> Self { + RequestCancelled { + warn: Some(tracing::Span::current()), + } + } + + /// Consume the drop guard without logging anything. + fn disarm(mut self) { + self.warn = None; + } +} + +impl Drop for RequestCancelled { + fn drop(&mut self) { + if let Some(span) = self.warn.take() { + // the span has all of the info already, but the outer `.instrument(span)` has already + // been dropped, so we need to manually re-enter it for this message. + // + // this is what the instrument would do before polling so it is fine. + let _g = span.entered(); + warn!("request was dropped before completing"); + } + } +} + async fn prometheus_metrics_handler(_req: Request) -> Result, ApiError> { SERVE_METRICS_COUNT.inc(); diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs index 40e61e3d0c..8981fdd1dd 100644 --- a/libs/utils/src/http/json.rs +++ b/libs/utils/src/http/json.rs @@ -1,9 +1,7 @@ -use std::fmt::Display; - use anyhow::Context; use bytes::Buf; use hyper::{header, Body, Request, Response, StatusCode}; -use serde::{Deserialize, Serialize, Serializer}; +use serde::{Deserialize, Serialize}; use super::error::ApiError; @@ -33,12 +31,3 @@ pub fn json_response( .map_err(|e| ApiError::InternalServerError(e.into()))?; Ok(response) } - -/// Serialize through Display trait. -pub fn display_serialize(z: &F, s: S) -> Result -where - S: Serializer, - F: Display, -{ - s.serialize_str(&format!("{}", z)) -} diff --git a/libs/utils/src/id.rs b/libs/utils/src/id.rs index b27c5cda35..20b601f68d 100644 --- a/libs/utils/src/id.rs +++ b/libs/utils/src/id.rs @@ -265,6 +265,26 @@ impl fmt::Display for TenantTimelineId { } } +impl FromStr for TenantTimelineId { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let mut parts = s.split('/'); + let tenant_id = parts + .next() + .ok_or_else(|| anyhow::anyhow!("TenantTimelineId must contain tenant_id"))? + .parse()?; + let timeline_id = parts + .next() + .ok_or_else(|| anyhow::anyhow!("TenantTimelineId must contain timeline_id"))? + .parse()?; + if parts.next().is_some() { + anyhow::bail!("TenantTimelineId must contain only tenant_id and timeline_id"); + } + Ok(TenantTimelineId::new(tenant_id, timeline_id)) + } +} + // Unique ID of a storage node (safekeeper or pageserver). Supposed to be issued // by the console. #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)] diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index d4176911ac..82701ed4b0 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -54,6 +54,10 @@ pub mod measured_stream; pub mod serde_percent; pub mod serde_regex; +pub mod pageserver_feedback; + +pub mod tracing_span_assert; + /// use with fail::cfg("$name", "return(2000)") #[macro_export] macro_rules! failpoint_sleep_millis_async { diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index f770622a60..2b8c852d86 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -1,6 +1,7 @@ use std::str::FromStr; use anyhow::Context; +use once_cell::sync::Lazy; use strum_macros::{EnumString, EnumVariantNames}; #[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)] @@ -23,24 +24,81 @@ impl LogFormat { } } -pub fn init(log_format: LogFormat) -> anyhow::Result<()> { - let default_filter_str = "info"; +static TRACING_EVENT_COUNT: Lazy = Lazy::new(|| { + metrics::register_int_counter_vec!( + "libmetrics_tracing_event_count", + "Number of tracing events, by level", + &["level"] + ) + .expect("failed to define metric") +}); +struct TracingEventCountLayer(&'static metrics::IntCounterVec); + +impl tracing_subscriber::layer::Layer for TracingEventCountLayer +where + S: tracing::Subscriber, +{ + fn on_event( + &self, + event: &tracing::Event<'_>, + _ctx: tracing_subscriber::layer::Context<'_, S>, + ) { + let level = event.metadata().level(); + let level = match *level { + tracing::Level::ERROR => "error", + tracing::Level::WARN => "warn", + tracing::Level::INFO => "info", + tracing::Level::DEBUG => "debug", + tracing::Level::TRACE => "trace", + }; + self.0.with_label_values(&[level]).inc(); + } +} + +/// Whether to add the `tracing_error` crate's `ErrorLayer` +/// to the global tracing subscriber. +/// +pub enum TracingErrorLayerEnablement { + /// Do not add the `ErrorLayer`. + Disabled, + /// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset. + EnableWithRustLogFilter, +} + +pub fn init( + log_format: LogFormat, + tracing_error_layer_enablement: TracingErrorLayerEnablement, +) -> anyhow::Result<()> { // We fall back to printing all spans at info-level or above if // the RUST_LOG environment variable is not set. - let env_filter = tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str)); + let rust_log_env_filter = || { + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")) + }; - let base_logger = tracing_subscriber::fmt() - .with_env_filter(env_filter) - .with_target(false) - .with_ansi(atty::is(atty::Stream::Stdout)) - .with_writer(std::io::stdout); - - match log_format { - LogFormat::Json => base_logger.json().init(), - LogFormat::Plain => base_logger.init(), - LogFormat::Test => base_logger.with_test_writer().init(), + // NB: the order of the with() calls does not matter. + // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering + use tracing_subscriber::prelude::*; + let r = tracing_subscriber::registry(); + let r = r.with({ + let log_layer = tracing_subscriber::fmt::layer() + .with_target(false) + .with_ansi(atty::is(atty::Stream::Stdout)) + .with_writer(std::io::stdout); + let log_layer = match log_format { + LogFormat::Json => log_layer.json().boxed(), + LogFormat::Plain => log_layer.boxed(), + LogFormat::Test => log_layer.with_test_writer().boxed(), + }; + log_layer.with_filter(rust_log_env_filter()) + }); + let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter())); + match tracing_error_layer_enablement { + TracingErrorLayerEnablement::EnableWithRustLogFilter => r + .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter())) + .init(), + TracingErrorLayerEnablement::Disabled => r.init(), } Ok(()) @@ -157,3 +215,33 @@ impl std::fmt::Debug for PrettyLocation<'_, '_> { ::fmt(self, f) } } + +#[cfg(test)] +mod tests { + use metrics::{core::Opts, IntCounterVec}; + + use super::TracingEventCountLayer; + + #[test] + fn tracing_event_count_metric() { + let counter_vec = + IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap(); + let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static + let layer = TracingEventCountLayer(counter_vec); + use tracing_subscriber::prelude::*; + + tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || { + tracing::trace!("foo"); + tracing::debug!("foo"); + tracing::info!("foo"); + tracing::warn!("foo"); + tracing::error!("foo"); + }); + + assert_eq!(counter_vec.with_label_values(&["trace"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["debug"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["info"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["warn"]).get(), 1); + assert_eq!(counter_vec.with_label_values(&["error"]).get(), 1); + } +} diff --git a/libs/utils/src/lsn.rs b/libs/utils/src/lsn.rs index acf5ea28d7..0493d43088 100644 --- a/libs/utils/src/lsn.rs +++ b/libs/utils/src/lsn.rs @@ -62,29 +62,48 @@ impl Lsn { } /// Compute the offset into a segment + #[inline] pub fn segment_offset(self, seg_sz: usize) -> usize { (self.0 % seg_sz as u64) as usize } /// Compute LSN of the segment start. + #[inline] pub fn segment_lsn(self, seg_sz: usize) -> Lsn { Lsn(self.0 - (self.0 % seg_sz as u64)) } /// Compute the segment number + #[inline] pub fn segment_number(self, seg_sz: usize) -> u64 { self.0 / seg_sz as u64 } /// Compute the offset into a block + #[inline] pub fn block_offset(self) -> u64 { const BLCKSZ: u64 = XLOG_BLCKSZ as u64; self.0 % BLCKSZ } + /// Compute the block offset of the first byte of this Lsn within this + /// segment + #[inline] + pub fn page_lsn(self) -> Lsn { + Lsn(self.0 - self.block_offset()) + } + + /// Compute the block offset of the first byte of this Lsn within this + /// segment + #[inline] + pub fn page_offset_in_segment(self, seg_sz: usize) -> u64 { + (self.0 - self.block_offset()) - self.segment_lsn(seg_sz).0 + } + /// Compute the bytes remaining in this block /// /// If the LSN is already at the block boundary, it will return `XLOG_BLCKSZ`. + #[inline] pub fn remaining_in_block(self) -> u64 { const BLCKSZ: u64 = XLOG_BLCKSZ as u64; BLCKSZ - (self.0 % BLCKSZ) diff --git a/libs/utils/src/pageserver_feedback.rs b/libs/utils/src/pageserver_feedback.rs new file mode 100644 index 0000000000..a3b53201d3 --- /dev/null +++ b/libs/utils/src/pageserver_feedback.rs @@ -0,0 +1,214 @@ +use std::time::{Duration, SystemTime}; + +use bytes::{Buf, BufMut, Bytes, BytesMut}; +use pq_proto::{read_cstr, PG_EPOCH}; +use serde::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; +use tracing::{trace, warn}; + +use crate::lsn::Lsn; + +/// Feedback pageserver sends to safekeeper and safekeeper resends to compute. +/// Serialized in custom flexible key/value format. In replication protocol, it +/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres +/// Standby status update / Hot standby feedback messages. +/// +/// serde Serialize is used only for human readable dump to json (e.g. in +/// safekeepers debug_dump). +#[serde_as] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct PageserverFeedback { + /// Last known size of the timeline. Used to enforce timeline size limit. + pub current_timeline_size: u64, + /// LSN last received and ingested by the pageserver. Controls backpressure. + #[serde_as(as = "DisplayFromStr")] + pub last_received_lsn: Lsn, + /// LSN up to which data is persisted by the pageserver to its local disc. + /// Controls backpressure. + #[serde_as(as = "DisplayFromStr")] + pub disk_consistent_lsn: Lsn, + /// LSN up to which data is persisted by the pageserver on s3; safekeepers + /// consider WAL before it can be removed. + #[serde_as(as = "DisplayFromStr")] + pub remote_consistent_lsn: Lsn, + // Serialize with RFC3339 format. + #[serde(with = "serde_systemtime")] + pub replytime: SystemTime, +} + +// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback. +// Do not remove previously available fields because this might be backwards incompatible. +pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5; + +impl PageserverFeedback { + pub fn empty() -> PageserverFeedback { + PageserverFeedback { + current_timeline_size: 0, + last_received_lsn: Lsn::INVALID, + remote_consistent_lsn: Lsn::INVALID, + disk_consistent_lsn: Lsn::INVALID, + replytime: *PG_EPOCH, + } + } + + // Serialize PageserverFeedback using custom format + // to support protocol extensibility. + // + // Following layout is used: + // char - number of key-value pairs that follow. + // + // key-value pairs: + // null-terminated string - key, + // uint32 - value length in bytes + // value itself + // + // TODO: change serialized fields names once all computes migrate to rename. + pub fn serialize(&self, buf: &mut BytesMut) { + buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys + buf.put_slice(b"current_timeline_size\0"); + buf.put_i32(8); + buf.put_u64(self.current_timeline_size); + + buf.put_slice(b"ps_writelsn\0"); + buf.put_i32(8); + buf.put_u64(self.last_received_lsn.0); + buf.put_slice(b"ps_flushlsn\0"); + buf.put_i32(8); + buf.put_u64(self.disk_consistent_lsn.0); + buf.put_slice(b"ps_applylsn\0"); + buf.put_i32(8); + buf.put_u64(self.remote_consistent_lsn.0); + + let timestamp = self + .replytime + .duration_since(*PG_EPOCH) + .expect("failed to serialize pg_replytime earlier than PG_EPOCH") + .as_micros() as i64; + + buf.put_slice(b"ps_replytime\0"); + buf.put_i32(8); + buf.put_i64(timestamp); + } + + // Deserialize PageserverFeedback message + // TODO: change serialized fields names once all computes migrate to rename. + pub fn parse(mut buf: Bytes) -> PageserverFeedback { + let mut rf = PageserverFeedback::empty(); + let nfields = buf.get_u8(); + for _ in 0..nfields { + let key = read_cstr(&mut buf).unwrap(); + match key.as_ref() { + b"current_timeline_size" => { + let len = buf.get_i32(); + assert_eq!(len, 8); + rf.current_timeline_size = buf.get_u64(); + } + b"ps_writelsn" => { + let len = buf.get_i32(); + assert_eq!(len, 8); + rf.last_received_lsn = Lsn(buf.get_u64()); + } + b"ps_flushlsn" => { + let len = buf.get_i32(); + assert_eq!(len, 8); + rf.disk_consistent_lsn = Lsn(buf.get_u64()); + } + b"ps_applylsn" => { + let len = buf.get_i32(); + assert_eq!(len, 8); + rf.remote_consistent_lsn = Lsn(buf.get_u64()); + } + b"ps_replytime" => { + let len = buf.get_i32(); + assert_eq!(len, 8); + let raw_time = buf.get_i64(); + if raw_time > 0 { + rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64); + } else { + rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64); + } + } + _ => { + let len = buf.get_i32(); + warn!( + "PageserverFeedback parse. unknown key {} of len {len}. Skip it.", + String::from_utf8_lossy(key.as_ref()) + ); + buf.advance(len as usize); + } + } + } + trace!("PageserverFeedback parsed is {:?}", rf); + rf + } +} + +mod serde_systemtime { + use std::time::SystemTime; + + use chrono::{DateTime, Utc}; + use serde::{Deserialize, Deserializer, Serializer}; + + pub fn serialize(ts: &SystemTime, serializer: S) -> Result + where + S: Serializer, + { + let chrono_dt: DateTime = (*ts).into(); + serializer.serialize_str(&chrono_dt.to_rfc3339()) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let time: String = Deserialize::deserialize(deserializer)?; + Ok(DateTime::parse_from_rfc3339(&time) + .map_err(serde::de::Error::custom)? + .into()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_replication_feedback_serialization() { + let mut rf = PageserverFeedback::empty(); + // Fill rf with some values + rf.current_timeline_size = 12345678; + // Set rounded time to be able to compare it with deserialized value, + // because it is rounded up to microseconds during serialization. + rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000); + let mut data = BytesMut::new(); + rf.serialize(&mut data); + + let rf_parsed = PageserverFeedback::parse(data.freeze()); + assert_eq!(rf, rf_parsed); + } + + #[test] + fn test_replication_feedback_unknown_key() { + let mut rf = PageserverFeedback::empty(); + // Fill rf with some values + rf.current_timeline_size = 12345678; + // Set rounded time to be able to compare it with deserialized value, + // because it is rounded up to microseconds during serialization. + rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000); + let mut data = BytesMut::new(); + rf.serialize(&mut data); + + // Add an extra field to the buffer and adjust number of keys + if let Some(first) = data.first_mut() { + *first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1; + } + + data.put_slice(b"new_field_one\0"); + data.put_i32(8); + data.put_u64(42); + + // Parse serialized data and check that new field is not parsed + let rf_parsed = PageserverFeedback::parse(data.freeze()); + assert_eq!(rf, rf_parsed); + } +} diff --git a/libs/utils/src/tracing_span_assert.rs b/libs/utils/src/tracing_span_assert.rs new file mode 100644 index 0000000000..b9f7986442 --- /dev/null +++ b/libs/utils/src/tracing_span_assert.rs @@ -0,0 +1,287 @@ +//! Assert that the current [`tracing::Span`] has a given set of fields. +//! +//! # Usage +//! +//! ``` +//! use tracing_subscriber::prelude::*; +//! let registry = tracing_subscriber::registry() +//! .with(tracing_error::ErrorLayer::default()); +//! +//! // Register the registry as the global subscriber. +//! // In this example, we'll only use it as a thread-local subscriber. +//! let _guard = tracing::subscriber::set_default(registry); +//! +//! // Then, in the main code: +//! +//! let span = tracing::info_span!("TestSpan", test_id = 1); +//! let _guard = span.enter(); +//! +//! // ... down the call stack +//! +//! use utils::tracing_span_assert::{check_fields_present, MultiNameExtractor}; +//! let extractor = MultiNameExtractor::new("TestExtractor", ["test", "test_id"]); +//! match check_fields_present([&extractor]) { +//! Ok(()) => {}, +//! Err(missing) => { +//! panic!("Missing fields: {:?}", missing.into_iter().map(|f| f.name() ).collect::>()); +//! } +//! } +//! ``` +//! +//! Recommended reading: https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering +//! + +use std::{ + collections::HashSet, + fmt::{self}, + hash::{Hash, Hasher}, +}; + +pub enum ExtractionResult { + Present, + Absent, +} + +pub trait Extractor: Send + Sync + std::fmt::Debug { + fn name(&self) -> &str; + fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult; +} + +#[derive(Debug)] +pub struct MultiNameExtractor { + name: &'static str, + field_names: [&'static str; L], +} + +impl MultiNameExtractor { + pub fn new(name: &'static str, field_names: [&'static str; L]) -> MultiNameExtractor { + MultiNameExtractor { name, field_names } + } +} +impl Extractor for MultiNameExtractor { + fn name(&self) -> &str { + self.name + } + fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult { + if fields.iter().any(|f| self.field_names.contains(&f.name())) { + ExtractionResult::Present + } else { + ExtractionResult::Absent + } + } +} + +struct MemoryIdentity<'a>(&'a dyn Extractor); + +impl<'a> MemoryIdentity<'a> { + fn as_ptr(&self) -> *const () { + self.0 as *const _ as *const () + } +} +impl<'a> PartialEq for MemoryIdentity<'a> { + fn eq(&self, other: &Self) -> bool { + self.as_ptr() == other.as_ptr() + } +} +impl<'a> Eq for MemoryIdentity<'a> {} +impl<'a> Hash for MemoryIdentity<'a> { + fn hash(&self, state: &mut H) { + self.as_ptr().hash(state); + } +} +impl<'a> fmt::Debug for MemoryIdentity<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:p}: {}", self.as_ptr(), self.0.name()) + } +} + +/// The extractor names passed as keys to [`new`]. +pub fn check_fields_present( + must_be_present: [&dyn Extractor; L], +) -> Result<(), Vec<&dyn Extractor>> { + let mut missing: HashSet = + HashSet::from_iter(must_be_present.into_iter().map(|r| MemoryIdentity(r))); + let trace = tracing_error::SpanTrace::capture(); + trace.with_spans(|md, _formatted_fields| { + missing.retain(|extractor| match extractor.0.extract(md.fields()) { + ExtractionResult::Present => false, + ExtractionResult::Absent => true, + }); + !missing.is_empty() // continue walking up until we've found all missing + }); + if missing.is_empty() { + Ok(()) + } else { + Err(missing.into_iter().map(|mi| mi.0).collect()) + } +} + +#[cfg(test)] +mod tests { + + use tracing_subscriber::prelude::*; + + use super::*; + + struct Setup { + _current_thread_subscriber_guard: tracing::subscriber::DefaultGuard, + tenant_extractor: MultiNameExtractor<2>, + timeline_extractor: MultiNameExtractor<2>, + } + + fn setup_current_thread() -> Setup { + let tenant_extractor = MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]); + let timeline_extractor = MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]); + + let registry = tracing_subscriber::registry() + .with(tracing_subscriber::fmt::layer()) + .with(tracing_error::ErrorLayer::default()); + + let guard = tracing::subscriber::set_default(registry); + + Setup { + _current_thread_subscriber_guard: guard, + tenant_extractor, + timeline_extractor, + } + } + + fn assert_missing(missing: Vec<&dyn Extractor>, expected: Vec<&dyn Extractor>) { + let missing: HashSet = + HashSet::from_iter(missing.into_iter().map(MemoryIdentity)); + let expected: HashSet = + HashSet::from_iter(expected.into_iter().map(MemoryIdentity)); + assert_eq!(missing, expected); + } + + #[test] + fn positive_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1"); + let _guard = span.enter(); + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap(); + } + + #[test] + fn negative_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", timeline_id = "timeline-1"); + let _guard = span.enter(); + let missing = + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn positive_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", tenant_id = "tenant-1"); + let _guard = span.enter(); + + let span = tracing::info_span!("grandchild", timeline_id = "timeline-1"); + let _guard = span.enter(); + + check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap(); + } + + #[test] + fn negative_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", timeline_id = "timeline-1"); + let _guard = span.enter(); + + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn positive_subset_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1"); + let _guard = span.enter(); + check_fields_present([&setup.tenant_extractor]).unwrap(); + } + + #[test] + fn positive_subset_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", tenant_id = "tenant-1"); + let _guard = span.enter(); + + let span = tracing::info_span!("grandchild", timeline_id = "timeline-1"); + let _guard = span.enter(); + + check_fields_present([&setup.tenant_extractor]).unwrap(); + } + + #[test] + fn negative_subset_one_level() { + let setup = setup_current_thread(); + let span = tracing::info_span!("root", timeline_id = "timeline-1"); + let _guard = span.enter(); + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn negative_subset_multiple_levels() { + let setup = setup_current_thread(); + + let span = tracing::info_span!("root"); + let _guard = span.enter(); + + let span = tracing::info_span!("child", timeline_id = "timeline-1"); + let _guard = span.enter(); + + let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err(); + assert_missing(missing, vec![&setup.tenant_extractor]); + } + + #[test] + fn tracing_error_subscriber_not_set_up() { + // no setup + + let span = tracing::info_span!("foo", e = "some value"); + let _guard = span.enter(); + + let extractor = MultiNameExtractor::new("E", ["e"]); + let missing = check_fields_present([&extractor]).unwrap_err(); + assert_missing(missing, vec![&extractor]); + } + + #[test] + #[should_panic] + fn panics_if_tracing_error_subscriber_has_wrong_filter() { + let r = tracing_subscriber::registry().with({ + tracing_error::ErrorLayer::default().with_filter( + tracing_subscriber::filter::dynamic_filter_fn(|md, _| { + if md.is_span() && *md.level() == tracing::Level::INFO { + return false; + } + true + }), + ) + }); + + let _guard = tracing::subscriber::set_default(r); + + let span = tracing::info_span!("foo", e = "some value"); + let _guard = span.enter(); + + let extractor = MultiNameExtractor::new("E", ["e"]); + let missing = check_fields_present([&extractor]).unwrap_err(); + assert_missing(missing, vec![&extractor]); + } +} diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 0bc7eba95e..ea81544cbe 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -52,6 +52,7 @@ sync_wrapper.workspace = true tokio-tar.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] } +tokio-io-timeout.workspace = true tokio-postgres.workspace = true tokio-util.workspace = true toml_edit = { workspace = true, features = [ "serde" ] } diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 5edfa84d8a..ee5980212e 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -13,7 +13,7 @@ use std::time::Instant; use utils::lsn::Lsn; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; fn build_layer_map(filename_dump: PathBuf) -> LayerMap { let mut layer_map = LayerMap::::default(); @@ -114,7 +114,7 @@ fn bench_from_captest_env(c: &mut Criterion) { c.bench_function("captest_uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); @@ -122,11 +122,11 @@ fn bench_from_captest_env(c: &mut Criterion) { // test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs. c.bench_function("captest_rel_dir_query", |b| { b.iter(|| { - let result = layer_map.search( + let result = black_box(layer_map.search( Key::from_hex("000000067F00008000000000000000000001").unwrap(), // This LSN is higher than any of the LSNs in the tree Lsn::from_str("D0/80208AE1").unwrap(), - ); + )); result.unwrap(); }); }); @@ -183,7 +183,7 @@ fn bench_from_real_project(c: &mut Criterion) { group.bench_function("uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); @@ -232,7 +232,7 @@ fn bench_sequential(c: &mut Criterion) { group.bench_function("uniform_queries", |b| { b.iter(|| { for q in queries.clone().into_iter() { - layer_map.search(q.0, q.1); + black_box(layer_map.search(q.0, q.1)); } }); }); diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 41fa0a67bb..c666fc785c 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -463,9 +463,13 @@ where let wal_file_path = format!("pg_wal/{}", wal_file_name); let header = new_tar_header(&wal_file_path, WAL_SEGMENT_SIZE as u64)?; - let wal_seg = - postgres_ffi::generate_wal_segment(segno, system_identifier, self.timeline.pg_version) - .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?; + let wal_seg = postgres_ffi::generate_wal_segment( + segno, + system_identifier, + self.timeline.pg_version, + self.lsn, + ) + .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?; ensure!(wal_seg.len() == WAL_SEGMENT_SIZE); self.ar.append(&header, &wal_seg[..]).await?; Ok(()) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index ed23a18ee0..d843b01ed7 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -25,6 +25,7 @@ use pageserver::{ virtual_file, }; use postgres_backend::AuthType; +use utils::logging::TracingErrorLayerEnablement; use utils::signals::ShutdownSignals; use utils::{ auth::JwtAuth, logging, project_git_version, sentry_init::init_sentry, signals::Signal, @@ -86,8 +87,19 @@ fn main() -> anyhow::Result<()> { } }; - // Initialize logging, which must be initialized before the custom panic hook is installed. - logging::init(conf.log_format)?; + // Initialize logging. + // + // It must be initialized before the custom panic hook is installed below. + // + // Regarding tracing_error enablement: at this time, we only use the + // tracing_error crate to debug_assert that log spans contain tenant and timeline ids. + // See `debug_assert_current_span_has_tenant_and_timeline_id` in the timeline module + let tracing_error_layer_enablement = if cfg!(debug_assertions) { + TracingErrorLayerEnablement::EnableWithRustLogFilter + } else { + TracingErrorLayerEnablement::Disabled + }; + logging::init(conf.log_format, tracing_error_layer_enablement)?; // mind the order required here: 1. logging, 2. panic_hook, 3. sentry. // disarming this hook on pageserver, because we never tear down tracing. @@ -226,6 +238,7 @@ fn start_pageserver( ); set_build_info_metric(GIT_VERSION); set_launch_timestamp_metric(launch_ts); + pageserver::preinitialize_metrics(); // If any failpoints were set from FAILPOINTS environment variable, // print them to the log for debugging purposes diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 19f0f22815..9e341230cf 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -6,6 +6,7 @@ use anyhow::{anyhow, bail, ensure, Context, Result}; use remote_storage::{RemotePath, RemoteStorageConfig}; +use serde::de::IntoDeserializer; use std::env; use storage_broker::Uri; use utils::crashsafe::path_with_suffix_extension; @@ -62,7 +63,6 @@ pub mod defaults { pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour"; pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option = None; pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min"; - pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour"; /// /// Default built-in configuration file. @@ -91,7 +91,6 @@ pub mod defaults { #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}' #synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}' -#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}' #disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}} @@ -108,6 +107,7 @@ pub mod defaults { #pitr_interval = '{DEFAULT_PITR_INTERVAL}' #min_resident_size_override = .. # in bytes +#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}' # [remote_storage] @@ -182,9 +182,6 @@ pub struct PageServerConf { pub metric_collection_endpoint: Option, pub synthetic_size_calculation_interval: Duration, - // See the corresponding metric's help string. - pub evictions_low_residence_duration_metric_threshold: Duration, - pub disk_usage_based_eviction: Option, pub test_remote_failures: u64, @@ -257,8 +254,6 @@ struct PageServerConfigBuilder { metric_collection_endpoint: BuilderValue>, synthetic_size_calculation_interval: BuilderValue, - evictions_low_residence_duration_metric_threshold: BuilderValue, - disk_usage_based_eviction: BuilderValue>, test_remote_failures: BuilderValue, @@ -316,11 +311,6 @@ impl Default for PageServerConfigBuilder { .expect("cannot parse default synthetic size calculation interval")), metric_collection_endpoint: Set(DEFAULT_METRIC_COLLECTION_ENDPOINT), - evictions_low_residence_duration_metric_threshold: Set(humantime::parse_duration( - DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, - ) - .expect("cannot parse DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD")), - disk_usage_based_eviction: Set(None), test_remote_failures: Set(0), @@ -438,10 +428,6 @@ impl PageServerConfigBuilder { self.test_remote_failures = BuilderValue::Set(fail_first); } - pub fn evictions_low_residence_duration_metric_threshold(&mut self, value: Duration) { - self.evictions_low_residence_duration_metric_threshold = BuilderValue::Set(value); - } - pub fn disk_usage_based_eviction(&mut self, value: Option) { self.disk_usage_based_eviction = BuilderValue::Set(value); } @@ -525,11 +511,6 @@ impl PageServerConfigBuilder { synthetic_size_calculation_interval: self .synthetic_size_calculation_interval .ok_or(anyhow!("missing synthetic_size_calculation_interval"))?, - evictions_low_residence_duration_metric_threshold: self - .evictions_low_residence_duration_metric_threshold - .ok_or(anyhow!( - "missing evictions_low_residence_duration_metric_threshold" - ))?, disk_usage_based_eviction: self .disk_usage_based_eviction .ok_or(anyhow!("missing disk_usage_based_eviction"))?, @@ -721,12 +702,12 @@ impl PageServerConf { "synthetic_size_calculation_interval" => builder.synthetic_size_calculation_interval(parse_toml_duration(key, item)?), "test_remote_failures" => builder.test_remote_failures(parse_toml_u64(key, item)?), - "evictions_low_residence_duration_metric_threshold" => builder.evictions_low_residence_duration_metric_threshold(parse_toml_duration(key, item)?), "disk_usage_based_eviction" => { tracing::info!("disk_usage_based_eviction: {:#?}", &item); builder.disk_usage_based_eviction( - toml_edit::de::from_item(item.clone()) - .context("parse disk_usage_based_eviction")?) + deserialize_from_item("disk_usage_based_eviction", item) + .context("parse disk_usage_based_eviction")? + ) }, "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?), _ => bail!("unrecognized pageserver option '{key}'"), @@ -827,18 +808,25 @@ impl PageServerConf { if let Some(eviction_policy) = item.get("eviction_policy") { t_conf.eviction_policy = Some( - toml_edit::de::from_item(eviction_policy.clone()) + deserialize_from_item("eviction_policy", eviction_policy) .context("parse eviction_policy")?, ); } if let Some(item) = item.get("min_resident_size_override") { t_conf.min_resident_size_override = Some( - toml_edit::de::from_item(item.clone()) + deserialize_from_item("min_resident_size_override", item) .context("parse min_resident_size_override")?, ); } + if let Some(item) = item.get("evictions_low_residence_duration_metric_threshold") { + t_conf.evictions_low_residence_duration_metric_threshold = Some(parse_toml_duration( + "evictions_low_residence_duration_metric_threshold", + item, + )?); + } + Ok(t_conf) } @@ -877,10 +865,6 @@ impl PageServerConf { cached_metric_collection_interval: Duration::from_secs(60 * 60), metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT, synthetic_size_calculation_interval: Duration::from_secs(60), - evictions_low_residence_duration_metric_threshold: humantime::parse_duration( - defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, - ) - .unwrap(), disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, @@ -938,6 +922,18 @@ where }) } +fn deserialize_from_item(name: &str, item: &Item) -> anyhow::Result +where + T: serde::de::DeserializeOwned, +{ + // ValueDeserializer::new is not public, so use the ValueDeserializer's documented way + let deserializer = match item.clone().into_value() { + Ok(value) => value.into_deserializer(), + Err(item) => anyhow::bail!("toml_edit::Item '{item}' is not a toml_edit::Value"), + }; + T::deserialize(deserializer).with_context(|| format!("deserializing item for node {name}")) +} + /// Configurable semaphore permits setting. /// /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty @@ -1004,9 +1000,10 @@ mod tests { use remote_storage::{RemoteStorageKind, S3Config}; use tempfile::{tempdir, TempDir}; + use utils::serde_percent::Percent; use super::*; - use crate::DEFAULT_PG_VERSION; + use crate::{tenant::config::EvictionPolicy, DEFAULT_PG_VERSION}; const ALL_BASE_VALUES_TOML: &str = r#" # Initial configuration file created by 'pageserver --init' @@ -1029,8 +1026,6 @@ cached_metric_collection_interval = '22200 s' metric_collection_endpoint = 'http://localhost:80/metrics' synthetic_size_calculation_interval = '333 s' -evictions_low_residence_duration_metric_threshold = '444 s' - log_format = 'json' "#; @@ -1087,9 +1082,6 @@ log_format = 'json' synthetic_size_calculation_interval: humantime::parse_duration( defaults::DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL )?, - evictions_low_residence_duration_metric_threshold: humantime::parse_duration( - defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD - )?, disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, @@ -1144,7 +1136,6 @@ log_format = 'json' cached_metric_collection_interval: Duration::from_secs(22200), metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?), synthetic_size_calculation_interval: Duration::from_secs(333), - evictions_low_residence_duration_metric_threshold: Duration::from_secs(444), disk_usage_based_eviction: None, test_remote_failures: 0, ondemand_download_behavior_treat_error_as_warn: false, @@ -1310,6 +1301,71 @@ trace_read_requests = {trace_read_requests}"#, Ok(()) } + #[test] + fn eviction_pageserver_config_parse() -> anyhow::Result<()> { + let tempdir = tempdir()?; + let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?; + + let pageserver_conf_toml = format!( + r#"pg_distrib_dir = "{}" +metric_collection_endpoint = "http://sample.url" +metric_collection_interval = "10min" +id = 222 + +[disk_usage_based_eviction] +max_usage_pct = 80 +min_avail_bytes = 0 +period = "10s" + +[tenant_config] +evictions_low_residence_duration_metric_threshold = "20m" + +[tenant_config.eviction_policy] +kind = "LayerAccessThreshold" +period = "20m" +threshold = "20m" +"#, + pg_distrib_dir.display(), + ); + let toml: Document = pageserver_conf_toml.parse()?; + let conf = PageServerConf::parse_and_validate(&toml, &workdir)?; + + assert_eq!(conf.pg_distrib_dir, pg_distrib_dir); + assert_eq!( + conf.metric_collection_endpoint, + Some("http://sample.url".parse().unwrap()) + ); + assert_eq!( + conf.metric_collection_interval, + Duration::from_secs(10 * 60) + ); + assert_eq!( + conf.default_tenant_conf + .evictions_low_residence_duration_metric_threshold, + Duration::from_secs(20 * 60) + ); + assert_eq!(conf.id, NodeId(222)); + assert_eq!( + conf.disk_usage_based_eviction, + Some(DiskUsageEvictionTaskConfig { + max_usage_pct: Percent::new(80).unwrap(), + min_avail_bytes: 0, + period: Duration::from_secs(10), + #[cfg(feature = "testing")] + mock_statvfs: None, + }) + ); + match &conf.default_tenant_conf.eviction_policy { + EvictionPolicy::NoEviction => panic!("Unexpected eviction opolicy tenant settings"), + EvictionPolicy::LayerAccessThreshold(eviction_thresold) => { + assert_eq!(eviction_thresold.period, Duration::from_secs(20 * 60)); + assert_eq!(eviction_thresold.threshold, Duration::from_secs(20 * 60)); + } + } + + Ok(()) + } + fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> { let tempdir_path = tempdir.path(); diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 478e9d228a..95f6e96a5b 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -520,6 +520,43 @@ paths: schema: $ref: "#/components/schemas/Error" + /v1/tenant/{tenant_id}/synthetic_size: + parameters: + - name: tenant_id + in: path + required: true + schema: + type: string + format: hex + get: + description: | + Calculate tenant's synthetic size + responses: + "200": + description: Tenant's synthetic size + content: + application/json: + schema: + $ref: "#/components/schemas/SyntheticSizeResponse" + "401": + description: Unauthorized Error + content: + application/json: + schema: + $ref: "#/components/schemas/UnauthorizedError" + "403": + description: Forbidden Error + content: + application/json: + schema: + $ref: "#/components/schemas/ForbiddenError" + "500": + description: Generic operation error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /v1/tenant/{tenant_id}/size: parameters: - name: tenant_id @@ -829,12 +866,9 @@ components: type: object required: - id - - state properties: id: type: string - state: - type: string current_physical_size: type: integer has_in_progress_downloads: @@ -951,6 +985,84 @@ components: latest_gc_cutoff_lsn: type: string format: hex + + SyntheticSizeResponse: + type: object + required: + - id + - size + - segment_sizes + - inputs + properties: + id: + type: string + format: hex + size: + type: integer + segment_sizes: + type: array + items: + $ref: "#/components/schemas/SegmentSize" + inputs: + type: object + properties: + segments: + type: array + items: + $ref: "#/components/schemas/SegmentData" + timeline_inputs: + type: array + items: + $ref: "#/components/schemas/TimelineInput" + + SegmentSize: + type: object + required: + - method + - accum_size + properties: + method: + type: string + accum_size: + type: integer + + SegmentData: + type: object + required: + - segment + properties: + segment: + type: object + required: + - lsn + properties: + parent: + type: integer + lsn: + type: integer + size: + type: integer + needed: + type: boolean + timeline_id: + type: string + format: hex + kind: + type: string + + TimelineInput: + type: object + required: + - timeline_id + properties: + ancestor_id: + type: string + ancestor_lsn: + type: string + timeline_id: + type: string + format: hex + Error: type: object required: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 2db60f557d..b1251123b2 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -465,7 +465,7 @@ async fn tenant_list_handler(request: Request) -> Result, A .iter() .map(|(id, state)| TenantInfo { id: *id, - state: *state, + state: state.clone(), current_physical_size: None, has_in_progress_downloads: Some(state.has_in_progress_downloads()), }) @@ -490,7 +490,7 @@ async fn tenant_status(request: Request) -> Result, ApiErro let state = tenant.current_state(); Ok(TenantInfo { id: tenant_id, - state, + state: state.clone(), current_physical_size: Some(current_physical_size), has_in_progress_downloads: Some(state.has_in_progress_downloads()), }) @@ -781,6 +781,19 @@ async fn tenant_create_handler(mut request: Request) -> Result) -> Result, ApiErro .await .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?; - tenant.set_broken("broken from test"); + tenant.set_broken("broken from test".to_owned()); json_response(StatusCode::OK, ()) } @@ -1175,6 +1201,37 @@ async fn handler_404(_: Request) -> Result, ApiError> { ) } +#[cfg(feature = "testing")] +async fn post_tracing_event_handler(mut r: Request) -> Result, ApiError> { + #[derive(Debug, serde::Deserialize)] + #[serde(rename_all = "lowercase")] + enum Level { + Error, + Warn, + Info, + Debug, + Trace, + } + #[derive(Debug, serde::Deserialize)] + struct Request { + level: Level, + message: String, + } + let body: Request = json_request(&mut r) + .await + .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?; + + match body.level { + Level::Error => tracing::error!(?body.message), + Level::Warn => tracing::warn!(?body.message), + Level::Info => tracing::info!(?body.message), + Level::Debug => tracing::debug!(?body.message), + Level::Trace => tracing::trace!(?body.message), + } + + json_response(StatusCode::OK, ()) +} + pub fn make_router( conf: &'static PageServerConf, launch_ts: &'static LaunchTimestamp, @@ -1315,5 +1372,9 @@ pub fn make_router( testing_api!("set tenant state to broken", handle_tenant_break), ) .get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r)) + .post( + "/v1/tracing/event", + testing_api!("emit a tracing event", post_tracing_event_handler), + ) .any(handler_404)) } diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 39e434a023..936de35eb9 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -114,7 +114,7 @@ async fn import_rel( path: &Path, spcoid: Oid, dboid: Oid, - reader: &mut (impl AsyncRead + Send + Sync + Unpin), + reader: &mut (impl AsyncRead + Unpin), len: usize, ctx: &RequestContext, ) -> anyhow::Result<()> { @@ -200,7 +200,7 @@ async fn import_slru( modification: &mut DatadirModification<'_>, slru: SlruKind, path: &Path, - reader: &mut (impl AsyncRead + Send + Sync + Unpin), + reader: &mut (impl AsyncRead + Unpin), len: usize, ctx: &RequestContext, ) -> anyhow::Result<()> { @@ -612,8 +612,8 @@ async fn import_file( Ok(None) } -async fn read_all_bytes(reader: &mut (impl AsyncRead + Send + Sync + Unpin)) -> Result { +async fn read_all_bytes(reader: &mut (impl AsyncRead + Unpin)) -> Result { let mut buf: Vec = vec![]; reader.read_to_end(&mut buf).await?; - Ok(Bytes::copy_from_slice(&buf[..])) + Ok(Bytes::from(buf)) } diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 278658eba3..04863886cb 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -44,6 +44,8 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61; static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]); +pub use crate::metrics::preinitialize_metrics; + pub async fn shutdown_pageserver(exit_code: i32) { // Shut down the libpq endpoint task. This prevents new connections from // being accepted. diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 1f31e5a8fb..deb20f21f8 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1,12 +1,13 @@ use metrics::core::{AtomicU64, GenericCounter}; use metrics::{ register_counter_vec, register_histogram, register_histogram_vec, register_int_counter, - register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec, - Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, - UIntGauge, UIntGaugeVec, + register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec, Counter, CounterVec, + Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, + UIntGaugeVec, }; use once_cell::sync::Lazy; -use pageserver_api::models::state; +use pageserver_api::models::TenantState; +use strum::VariantNames; use utils::id::{TenantId, TimelineId}; /// Prometheus histogram buckets (in seconds) for operations in the critical @@ -147,15 +148,6 @@ static CURRENT_LOGICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define current logical size metric") }); -// Metrics collected on tenant states. -const TENANT_STATE_OPTIONS: &[&str] = &[ - state::LOADING, - state::ATTACHING, - state::ACTIVE, - state::STOPPING, - state::BROKEN, -]; - pub static TENANT_STATE_METRIC: Lazy = Lazy::new(|| { register_uint_gauge_vec!( "pageserver_tenant_states_count", @@ -213,6 +205,15 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy = Lazy::new(|| .expect("failed to define a metric") }); +pub static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_unexpected_ondemand_downloads_count", + "Number of unexpected on-demand downloads. \ + We log more context for each increment, so, forgo any labels in this metric.", + ) + .expect("failed to define a metric") +}); + /// Each [`Timeline`]'s [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric. #[derive(Debug)] pub struct EvictionsWithLowResidenceDuration { @@ -265,6 +266,22 @@ impl EvictionsWithLowResidenceDuration { } } + pub fn change_threshold( + &mut self, + tenant_id: &str, + timeline_id: &str, + new_threshold: Duration, + ) { + if new_threshold == self.threshold { + return; + } + let mut with_new = + EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold) + .build(tenant_id, timeline_id); + std::mem::swap(self, &mut with_new); + with_new.remove(tenant_id, timeline_id); + } + // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`. fn remove(&mut self, tenant_id: &str, timeline_id: &str) { let Some(_counter) = self.counter.take() else { @@ -342,11 +359,6 @@ pub static LIVE_CONNECTIONS_COUNT: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); -pub static NUM_ONDISK_LAYERS: Lazy = Lazy::new(|| { - register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk") - .expect("failed to define a metric") -}); - // remote storage metrics /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`]. @@ -377,6 +389,26 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy = Lazy::new .expect("failed to define a metric") }); +static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_remote_timeline_client_bytes_started", + "Incremented by the number of bytes associated with a remote timeline client operation. \ + The increment happens when the operation is scheduled.", + &["tenant_id", "timeline_id", "file_kind", "op_kind"], + ) + .expect("failed to define a metric") +}); + +static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_remote_timeline_client_bytes_finished", + "Incremented by the number of bytes associated with a remote timeline client operation. \ + The increment happens when the operation finishes (regardless of success/failure/shutdown).", + &["tenant_id", "timeline_id", "file_kind", "op_kind"], + ) + .expect("failed to define a metric") +}); + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RemoteOpKind { Upload, @@ -597,7 +629,7 @@ pub struct TimelineMetrics { pub num_persistent_files_created: IntCounter, pub persistent_bytes_written: IntCounter, pub evictions: IntCounter, - pub evictions_with_low_residence_duration: EvictionsWithLowResidenceDuration, + pub evictions_with_low_residence_duration: std::sync::RwLock, } impl TimelineMetrics { @@ -664,7 +696,9 @@ impl TimelineMetrics { num_persistent_files_created, persistent_bytes_written, evictions, - evictions_with_low_residence_duration, + evictions_with_low_residence_duration: std::sync::RwLock::new( + evictions_with_low_residence_duration, + ), } } } @@ -683,6 +717,8 @@ impl Drop for TimelineMetrics { let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]); let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]); self.evictions_with_low_residence_duration + .write() + .unwrap() .remove(tenant_id, timeline_id); for op in STORAGE_TIME_OPERATIONS { let _ = @@ -707,7 +743,7 @@ impl Drop for TimelineMetrics { pub fn remove_tenant_metrics(tenant_id: &TenantId) { let tid = tenant_id.to_string(); let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]); - for state in TENANT_STATE_OPTIONS { + for state in TenantState::VARIANTS { let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]); } } @@ -727,6 +763,8 @@ pub struct RemoteTimelineClientMetrics { remote_operation_time: Mutex>, calls_unfinished_gauge: Mutex>, calls_started_hist: Mutex>, + bytes_started_counter: Mutex>, + bytes_finished_counter: Mutex>, } impl RemoteTimelineClientMetrics { @@ -737,6 +775,8 @@ impl RemoteTimelineClientMetrics { remote_operation_time: Mutex::new(HashMap::default()), calls_unfinished_gauge: Mutex::new(HashMap::default()), calls_started_hist: Mutex::new(HashMap::default()), + bytes_started_counter: Mutex::new(HashMap::default()), + bytes_finished_counter: Mutex::new(HashMap::default()), remote_physical_size_gauge: Mutex::new(None), } } @@ -775,6 +815,7 @@ impl RemoteTimelineClientMetrics { }); metric.clone() } + fn calls_unfinished_gauge( &self, file_kind: &RemoteOpFileKind, @@ -816,32 +857,125 @@ impl RemoteTimelineClientMetrics { }); metric.clone() } + + fn bytes_started_counter( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> IntCounter { + // XXX would be nice to have an upgradable RwLock + let mut guard = self.bytes_started_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + let metric = guard.entry(key).or_insert_with(move || { + REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER + .get_metric_with_label_values(&[ + &self.tenant_id.to_string(), + &self.timeline_id.to_string(), + key.0, + key.1, + ]) + .unwrap() + }); + metric.clone() + } + + fn bytes_finished_counter( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> IntCounter { + // XXX would be nice to have an upgradable RwLock + let mut guard = self.bytes_finished_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + let metric = guard.entry(key).or_insert_with(move || { + REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER + .get_metric_with_label_values(&[ + &self.tenant_id.to_string(), + &self.timeline_id.to_string(), + key.0, + key.1, + ]) + .unwrap() + }); + metric.clone() + } +} + +#[cfg(test)] +impl RemoteTimelineClientMetrics { + pub fn get_bytes_started_counter_value( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> Option { + let guard = self.bytes_started_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + guard.get(&key).map(|counter| counter.get()) + } + + pub fn get_bytes_finished_counter_value( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + ) -> Option { + let guard = self.bytes_finished_counter.lock().unwrap(); + let key = (file_kind.as_str(), op_kind.as_str()); + guard.get(&key).map(|counter| counter.get()) + } } /// See [`RemoteTimelineClientMetrics::call_begin`]. #[must_use] -pub(crate) struct RemoteTimelineClientCallMetricGuard(Option); +pub(crate) struct RemoteTimelineClientCallMetricGuard { + /// Decremented on drop. + calls_unfinished_metric: Option, + /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop. + bytes_finished: Option<(IntCounter, u64)>, +} impl RemoteTimelineClientCallMetricGuard { - /// Consume this guard object without decrementing the metric. - /// The caller vouches to do this manually, so that the prior increment of the gauge will cancel out. + /// Consume this guard object without performing the metric updates it would do on `drop()`. + /// The caller vouches to do the metric updates manually. pub fn will_decrement_manually(mut self) { - self.0 = None; // prevent drop() from decrementing + let RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric, + bytes_finished, + } = &mut self; + calls_unfinished_metric.take(); + bytes_finished.take(); } } impl Drop for RemoteTimelineClientCallMetricGuard { fn drop(&mut self) { - if let RemoteTimelineClientCallMetricGuard(Some(guard)) = self { + let RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric, + bytes_finished, + } = self; + if let Some(guard) = calls_unfinished_metric.take() { guard.dec(); } + if let Some((bytes_finished_metric, value)) = bytes_finished { + bytes_finished_metric.inc_by(*value); + } } } +/// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to +/// track the byte size of this call in applicable metric(s). +pub(crate) enum RemoteTimelineClientMetricsCallTrackSize { + /// Do not account for this call's byte size in any metrics. + /// The `reason` field is there to make the call sites self-documenting + /// about why they don't need the metric. + DontTrackSize { reason: &'static str }, + /// Track the byte size of the call in applicable metric(s). + Bytes(u64), +} + impl RemoteTimelineClientMetrics { - /// Increment the metrics that track ongoing calls to the remote timeline client instance. + /// Update the metrics that change when a call to the remote timeline client instance starts. /// - /// Drop the returned guard object once the operation is finished to decrement the values. + /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions. /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`] if that /// is more suitable. /// Never do both. @@ -849,24 +983,51 @@ impl RemoteTimelineClientMetrics { &self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind, + size: RemoteTimelineClientMetricsCallTrackSize, ) -> RemoteTimelineClientCallMetricGuard { - let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); + let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); self.calls_started_hist(file_kind, op_kind) - .observe(unfinished_metric.get() as f64); - unfinished_metric.inc(); - RemoteTimelineClientCallMetricGuard(Some(unfinished_metric)) + .observe(calls_unfinished_metric.get() as f64); + calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric + + let bytes_finished = match size { + RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => { + // nothing to do + None + } + RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => { + self.bytes_started_counter(file_kind, op_kind).inc_by(size); + let finished_counter = self.bytes_finished_counter(file_kind, op_kind); + Some((finished_counter, size)) + } + }; + RemoteTimelineClientCallMetricGuard { + calls_unfinished_metric: Some(calls_unfinished_metric), + bytes_finished, + } } - /// Manually decrement the metric instead of using the guard object. + /// Manually udpate the metrics that track completions, instead of using the guard object. /// Using the guard object is generally preferable. /// See [`call_begin`] for more context. - pub(crate) fn call_end(&self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind) { - let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); + pub(crate) fn call_end( + &self, + file_kind: &RemoteOpFileKind, + op_kind: &RemoteOpKind, + size: RemoteTimelineClientMetricsCallTrackSize, + ) { + let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind); debug_assert!( - unfinished_metric.get() > 0, + calls_unfinished_metric.get() > 0, "begin and end should cancel out" ); - unfinished_metric.dec(); + calls_unfinished_metric.dec(); + match size { + RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {} + RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => { + self.bytes_finished_counter(file_kind, op_kind).inc_by(size); + } + } } } @@ -879,6 +1040,8 @@ impl Drop for RemoteTimelineClientMetrics { remote_operation_time, calls_unfinished_gauge, calls_started_hist, + bytes_started_counter, + bytes_finished_counter, } = self; for ((a, b, c), _) in remote_operation_time.get_mut().unwrap().drain() { let _ = REMOTE_OPERATION_TIME.remove_label_values(&[tenant_id, timeline_id, a, b, c]); @@ -899,6 +1062,22 @@ impl Drop for RemoteTimelineClientMetrics { b, ]); } + for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() { + let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[ + tenant_id, + timeline_id, + a, + b, + ]); + } + for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() { + let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[ + tenant_id, + timeline_id, + a, + b, + ]); + } { let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]); @@ -962,3 +1141,10 @@ impl>, O, E> Future for MeasuredRemoteOp { poll_result } } + +pub fn preinitialize_metrics() { + // We want to alert on this metric increasing. + // Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0. + assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0); + UNEXPECTED_ONDEMAND_DOWNLOADS.reset(); +} diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index c0e4a2a9cf..8b0795db3c 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -20,7 +20,6 @@ use pageserver_api::models::{ PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, }; -use postgres_backend::PostgresBackendTCP; use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError}; use pq_proto::framed::ConnectionError; use pq_proto::FeStartupPacket; @@ -32,6 +31,7 @@ use std::str; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; +use tokio::io::{AsyncRead, AsyncWrite}; use tokio_util::io::StreamReader; use tracing::*; use utils::id::ConnectionId; @@ -57,7 +57,10 @@ use crate::trace::Tracer; use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use postgres_ffi::BLCKSZ; -fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream> + '_ { +fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream> + '_ +where + IO: AsyncRead + AsyncWrite + Unpin, +{ async_stream::try_stream! { loop { let msg = tokio::select! { @@ -65,8 +68,8 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream { // We were requested to shut down. - let msg = format!("pageserver is shutting down"); - let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)); + let msg = "pageserver is shutting down"; + let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, None)); Err(QueryError::Other(anyhow::anyhow!(msg))) } @@ -125,7 +128,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream anyhow::Result<()> { +async fn read_tar_eof(mut reader: (impl AsyncRead + Unpin)) -> anyhow::Result<()> { use tokio::io::AsyncReadExt; let mut buf = [0u8; 512]; @@ -245,12 +248,23 @@ async fn page_service_conn_main( .set_nodelay(true) .context("could not set TCP_NODELAY")?; + let peer_addr = socket.peer_addr().context("get peer address")?; + + // setup read timeout of 10 minutes. the timeout is rather arbitrary for requirements: + // - long enough for most valid compute connections + // - less than infinite to stop us from "leaking" connections to long-gone computes + // + // no write timeout is used, because the kernel is assumed to error writes after some time. + let mut socket = tokio_io_timeout::TimeoutReader::new(socket); + socket.set_timeout(Some(std::time::Duration::from_secs(60 * 10))); + let socket = std::pin::pin!(socket); + // XXX: pgbackend.run() should take the connection_ctx, // and create a child per-query context when it invokes process_query. // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler // and create the per-query context in process_query ourselves. let mut conn_handler = PageServerHandler::new(conf, auth, connection_ctx); - let pgbackend = PostgresBackend::new(socket, auth_type, None)?; + let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?; match pgbackend .run(&mut conn_handler, task_mgr::shutdown_watcher) @@ -332,13 +346,16 @@ impl PageServerHandler { } #[instrument(skip(self, pgb, ctx))] - async fn handle_pagerequests( + async fn handle_pagerequests( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, ctx: RequestContext, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<()> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { // NOTE: pagerequests handler exits when connection is closed, // so there is no need to reset the association task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); @@ -436,16 +453,19 @@ impl PageServerHandler { #[allow(clippy::too_many_arguments)] #[instrument(skip(self, pgb, ctx))] - async fn handle_import_basebackup( + async fn handle_import_basebackup( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, base_lsn: Lsn, _end_lsn: Lsn, pg_version: u32, ctx: RequestContext, - ) -> Result<(), QueryError> { + ) -> Result<(), QueryError> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); // Create empty timeline info!("creating new timeline"); @@ -486,15 +506,18 @@ impl PageServerHandler { } #[instrument(skip(self, pgb, ctx))] - async fn handle_import_wal( + async fn handle_import_wal( &self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, start_lsn: Lsn, end_lsn: Lsn, ctx: RequestContext, - ) -> Result<(), QueryError> { + ) -> Result<(), QueryError> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { task_mgr::associate_with(Some(tenant_id), Some(timeline_id)); let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; @@ -690,16 +713,21 @@ impl PageServerHandler { #[allow(clippy::too_many_arguments)] #[instrument(skip(self, pgb, ctx))] - async fn handle_basebackup_request( + async fn handle_basebackup_request( &mut self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, tenant_id: TenantId, timeline_id: TimelineId, lsn: Option, prev_lsn: Option, full_backup: bool, ctx: RequestContext, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<()> + where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, + { + let started = std::time::Instant::now(); + // check that the timeline exists let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?; let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); @@ -712,6 +740,8 @@ impl PageServerHandler { .context("invalid basebackup lsn")?; } + let lsn_awaited_after = started.elapsed(); + // switch client to COPYOUT pgb.write_message_noflush(&BeMessage::CopyOutResponse)?; pgb.flush().await?; @@ -732,7 +762,17 @@ impl PageServerHandler { pgb.write_message_noflush(&BeMessage::CopyDone)?; pgb.flush().await?; - info!("basebackup complete"); + + let basebackup_after = started + .elapsed() + .checked_sub(lsn_awaited_after) + .unwrap_or(Duration::ZERO); + + info!( + lsn_await_millis = lsn_awaited_after.as_millis(), + basebackup_millis = basebackup_after.as_millis(), + "basebackup complete" + ); Ok(()) } @@ -756,10 +796,13 @@ impl PageServerHandler { } #[async_trait::async_trait] -impl postgres_backend::Handler for PageServerHandler { +impl postgres_backend::Handler for PageServerHandler +where + IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, +{ fn check_auth_jwt( &mut self, - _pgb: &mut PostgresBackendTCP, + _pgb: &mut PostgresBackend, jwt_response: &[u8], ) -> Result<(), QueryError> { // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT @@ -787,7 +830,7 @@ impl postgres_backend::Handler for PageServerHandler { fn startup( &mut self, - _pgb: &mut PostgresBackendTCP, + _pgb: &mut PostgresBackend, _sm: &FeStartupPacket, ) -> Result<(), QueryError> { Ok(()) @@ -795,7 +838,7 @@ impl postgres_backend::Handler for PageServerHandler { async fn process_query( &mut self, - pgb: &mut PostgresBackendTCP, + pgb: &mut PostgresBackend, query_string: &str, ) -> Result<(), QueryError> { let ctx = self.connection_ctx.attached_child(); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7fac7d2ac0..5cfc466111 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -118,6 +118,10 @@ pub struct Tenant { // Global pageserver config parameters pub conf: &'static PageServerConf, + /// The value creation timestamp, used to measure activation delay, see: + /// + loading_started_at: Instant, + state: watch::Sender, // Overridden tenant-specific config parameters. @@ -177,9 +181,9 @@ impl UninitializedTimeline<'_> { /// /// The new timeline is initialized in Active state, and its background jobs are /// started - pub fn initialize(self, _ctx: &RequestContext) -> anyhow::Result> { + pub fn initialize(self, ctx: &RequestContext) -> anyhow::Result> { let mut timelines = self.owning_tenant.timelines.lock().unwrap(); - self.initialize_with_lock(&mut timelines, true, true) + self.initialize_with_lock(ctx, &mut timelines, true, true) } /// Like `initialize`, but the caller is already holding lock on Tenant::timelines. @@ -189,6 +193,7 @@ impl UninitializedTimeline<'_> { /// been initialized. fn initialize_with_lock( mut self, + ctx: &RequestContext, timelines: &mut HashMap>, load_layer_map: bool, activate: bool, @@ -229,7 +234,9 @@ impl UninitializedTimeline<'_> { new_timeline.maybe_spawn_flush_loop(); if activate { - new_timeline.activate(); + new_timeline + .activate(ctx) + .context("initializing timeline activation")?; } } } @@ -469,7 +476,7 @@ impl Tenant { local_metadata: Option, ancestor: Option>, first_save: bool, - _ctx: &RequestContext, + ctx: &RequestContext, ) -> anyhow::Result<()> { let tenant_id = self.tenant_id; @@ -504,7 +511,7 @@ impl Tenant { // Do not start walreceiver here. We do need loaded layer map for reconcile_with_remote // But we shouldnt start walreceiver before we have all the data locally, because working walreceiver // will ingest data which may require looking at the layers which are not yet available locally - match timeline.initialize_with_lock(&mut timelines_accessor, true, false) { + match timeline.initialize_with_lock(ctx, &mut timelines_accessor, true, false) { Ok(new_timeline) => new_timeline, Err(e) => { error!("Failed to initialize timeline {tenant_id}/{timeline_id}: {e:?}"); @@ -616,7 +623,7 @@ impl Tenant { match tenant_clone.attach(ctx).await { Ok(_) => {} Err(e) => { - tenant_clone.set_broken(&e.to_string()); + tenant_clone.set_broken(e.to_string()); error!("error attaching tenant: {:?}", e); } } @@ -629,7 +636,7 @@ impl Tenant { /// /// Background task that downloads all data for a tenant and brings it to Active state. /// - #[instrument(skip(self, ctx), fields(tenant_id=%self.tenant_id))] + #[instrument(skip_all, fields(tenant_id=%self.tenant_id))] async fn attach(self: &Arc, ctx: RequestContext) -> anyhow::Result<()> { // Create directory with marker file to indicate attaching state. // The load_local_tenants() function in tenant::mgr relies on the marker file @@ -750,7 +757,7 @@ impl Tenant { // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate()?; + self.activate(&ctx)?; info!("Done"); @@ -824,7 +831,10 @@ impl Tenant { pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc { let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id)); Arc::new(Tenant::new( - TenantState::Broken, + TenantState::Broken { + reason: "create_broken_tenant".into(), + backtrace: String::new(), + }, conf, TenantConfOpt::default(), wal_redo_manager, @@ -885,7 +895,7 @@ impl Tenant { match tenant_clone.load(&ctx).await { Ok(()) => {} Err(err) => { - tenant_clone.set_broken(&err.to_string()); + tenant_clone.set_broken(err.to_string()); error!("could not load tenant {tenant_id}: {err:?}"); } } @@ -1022,7 +1032,7 @@ impl Tenant { // Start background operations and open the tenant for business. // The loops will shut themselves down when they notice that the tenant is inactive. - self.activate()?; + self.activate(ctx)?; info!("Done"); @@ -1358,12 +1368,7 @@ impl Tenant { // Stop the walreceiver first. debug!("waiting for wal receiver to shutdown"); - task_mgr::shutdown_tasks( - Some(TaskKind::WalReceiverManager), - Some(self.tenant_id), - Some(timeline_id), - ) - .await; + timeline.walreceiver.stop().await; debug!("wal receiver shutdown confirmed"); info!("waiting for timeline tasks to shutdown"); @@ -1442,7 +1447,7 @@ impl Tenant { } pub fn current_state(&self) -> TenantState { - *self.state.borrow() + self.state.borrow().clone() } pub fn is_active(&self) -> bool { @@ -1450,18 +1455,18 @@ impl Tenant { } /// Changes tenant status to active, unless shutdown was already requested. - fn activate(&self) -> anyhow::Result<()> { + fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> { let mut result = Ok(()); self.state.send_modify(|current_state| { - match *current_state { + match &*current_state { TenantState::Active => { // activate() was called on an already Active tenant. Shouldn't happen. result = Err(anyhow::anyhow!("Tenant is already active")); } - TenantState::Broken => { + TenantState::Broken { reason, .. } => { // This shouldn't happen either result = Err(anyhow::anyhow!( - "Could not activate tenant because it is in broken state" + "Could not activate tenant because it is in broken state due to: {reason}", )); } TenantState::Stopping => { @@ -1472,7 +1477,7 @@ impl Tenant { TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Active; - info!("Activating tenant {}", self.tenant_id); + debug!(tenant_id = %self.tenant_id, "Activating tenant"); let timelines_accessor = self.timelines.lock().unwrap(); let not_broken_timelines = timelines_accessor @@ -1483,9 +1488,47 @@ impl Tenant { // down when they notice that the tenant is inactive. tasks::start_background_loops(self.tenant_id); + let mut activated_timelines = 0; + let mut timelines_broken_during_activation = 0; + for timeline in not_broken_timelines { - timeline.activate(); + match timeline + .activate(ctx) + .context("timeline activation for activating tenant") + { + Ok(()) => { + activated_timelines += 1; + } + Err(e) => { + error!( + "Failed to activate timeline {}: {:#}", + timeline.timeline_id, e + ); + timeline.set_state(TimelineState::Broken); + *current_state = TenantState::broken_from_reason(format!( + "failed to activate timeline {}: {}", + timeline.timeline_id, e + )); + + timelines_broken_during_activation += 1; + } + } } + + let elapsed = self.loading_started_at.elapsed(); + let total_timelines = timelines_accessor.len(); + + // log a lot of stuff, because some tenants sometimes suffer from user-visible + // times to activate. see https://github.com/neondatabase/neon/issues/4025 + info!( + since_creation_millis = elapsed.as_millis(), + tenant_id = %self.tenant_id, + activated_timelines, + timelines_broken_during_activation, + total_timelines, + post_state = <&'static str>::from(&*current_state), + "activation attempt finished" + ); } } }); @@ -1495,7 +1538,7 @@ impl Tenant { /// Change tenant status to Stopping, to mark that it is being shut down pub fn set_stopping(&self) { self.state.send_modify(|current_state| { - match *current_state { + match current_state { TenantState::Active | TenantState::Loading | TenantState::Attaching => { *current_state = TenantState::Stopping; @@ -1511,8 +1554,8 @@ impl Tenant { timeline.set_state(TimelineState::Stopping); } } - TenantState::Broken => { - info!("Cannot set tenant to Stopping state, it is already in Broken state"); + TenantState::Broken { reason, .. } => { + info!("Cannot set tenant to Stopping state, it is in Broken state due to: {reason}"); } TenantState::Stopping => { // The tenant was detached, or system shutdown was requested, while we were @@ -1523,7 +1566,7 @@ impl Tenant { }); } - pub fn set_broken(&self, reason: &str) { + pub fn set_broken(&self, reason: String) { self.state.send_modify(|current_state| { match *current_state { TenantState::Active => { @@ -1531,24 +1574,24 @@ impl Tenant { // while loading or attaching a tenant. A tenant that has already been // activated should never be marked as broken. We cope with it the best // we can, but it shouldn't happen. - *current_state = TenantState::Broken; warn!("Changing Active tenant to Broken state, reason: {}", reason); + *current_state = TenantState::broken_from_reason(reason); } - TenantState::Broken => { + TenantState::Broken { .. } => { // This shouldn't happen either warn!("Tenant is already in Broken state"); } TenantState::Stopping => { // This shouldn't happen either - *current_state = TenantState::Broken; warn!( "Marking Stopping tenant as Broken state, reason: {}", reason ); + *current_state = TenantState::broken_from_reason(reason); } TenantState::Loading | TenantState::Attaching => { info!("Setting tenant as Broken state, reason: {}", reason); - *current_state = TenantState::Broken; + *current_state = TenantState::broken_from_reason(reason); } } }); @@ -1561,7 +1604,7 @@ impl Tenant { pub async fn wait_to_become_active(&self) -> anyhow::Result<()> { let mut receiver = self.state.subscribe(); loop { - let current_state = *receiver.borrow_and_update(); + let current_state = receiver.borrow_and_update().clone(); match current_state { TenantState::Loading | TenantState::Attaching => { // in these states, there's a chance that we can reach ::Active @@ -1570,12 +1613,12 @@ impl Tenant { TenantState::Active { .. } => { return Ok(()); } - TenantState::Broken | TenantState::Stopping => { + TenantState::Broken { .. } | TenantState::Stopping => { // There's no chance the tenant can transition back into ::Active anyhow::bail!( "Tenant {} will not become active. Current state: {:?}", self.tenant_id, - current_state, + ¤t_state, ); } } @@ -1715,6 +1758,13 @@ impl Tenant { pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) { *self.tenant_conf.write().unwrap() = new_tenant_conf; + // Don't hold self.timelines.lock() during the notifies. + // There's no risk of deadlock right now, but there could be if we consolidate + // mutexes in struct Timeline in the future. + let timelines = self.list_timelines(); + for timeline in timelines { + timeline.tenant_conf_updated(); + } } fn create_timeline_data( @@ -1756,21 +1806,23 @@ impl Tenant { let (state, mut rx) = watch::channel(state); tokio::spawn(async move { - let current_state = *rx.borrow_and_update(); + let mut current_state: &'static str = From::from(&*rx.borrow_and_update()); let tid = tenant_id.to_string(); TENANT_STATE_METRIC - .with_label_values(&[&tid, current_state.as_str()]) + .with_label_values(&[&tid, current_state]) .inc(); loop { match rx.changed().await { Ok(()) => { - let new_state = *rx.borrow(); + let new_state: &'static str = From::from(&*rx.borrow_and_update()); TENANT_STATE_METRIC - .with_label_values(&[&tid, current_state.as_str()]) + .with_label_values(&[&tid, current_state]) .dec(); TENANT_STATE_METRIC - .with_label_values(&[&tid, new_state.as_str()]) + .with_label_values(&[&tid, new_state]) .inc(); + + current_state = new_state; } Err(_sender_dropped_error) => { info!("Tenant dropped the state updates sender, quitting waiting for tenant state change"); @@ -1783,6 +1835,9 @@ impl Tenant { Tenant { tenant_id, conf, + // using now here is good enough approximation to catch tenants with really long + // activation times. + loading_started_at: Instant::now(), tenant_conf: Arc::new(RwLock::new(tenant_conf)), timelines: Mutex::new(HashMap::new()), gc_cs: tokio::sync::Mutex::new(()), @@ -1865,7 +1920,7 @@ impl Tenant { .to_string(); // Convert the config to a toml file. - conf_content += &toml_edit::easy::to_string(&tenant_conf)?; + conf_content += &toml_edit::ser::to_string(&tenant_conf)?; let mut target_config_file = VirtualFile::open_with_options( target_config_path, @@ -2093,7 +2148,7 @@ impl Tenant { src_timeline: &Arc, dst_id: TimelineId, start_lsn: Option, - _ctx: &RequestContext, + ctx: &RequestContext, ) -> anyhow::Result> { let src_id = src_timeline.timeline_id; @@ -2186,7 +2241,7 @@ impl Tenant { false, Some(Arc::clone(src_timeline)), )? - .initialize_with_lock(&mut timelines, true, true)?; + .initialize_with_lock(ctx, &mut timelines, true, true)?; drop(timelines); // Root timeline gets its layers during creation and uploads them along with the metadata. @@ -2299,7 +2354,7 @@ impl Tenant { let timeline = { let mut timelines = self.timelines.lock().unwrap(); - raw_timeline.initialize_with_lock(&mut timelines, false, true)? + raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)? }; info!( @@ -2791,6 +2846,9 @@ pub mod harness { trace_read_requests: Some(tenant_conf.trace_read_requests), eviction_policy: Some(tenant_conf.eviction_policy), min_resident_size_override: tenant_conf.min_resident_size_override, + evictions_low_residence_duration_metric_threshold: Some( + tenant_conf.evictions_low_residence_duration_metric_threshold, + ), } } } @@ -2823,7 +2881,13 @@ pub mod harness { }; LOG_HANDLE.get_or_init(|| { - logging::init(logging::LogFormat::Test).expect("Failed to init test logging") + logging::init( + logging::LogFormat::Test, + // enable it in case in case the tests exercise code paths that use + // debug_assert_current_span_has_tenant_and_timeline_id + logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, + ) + .expect("Failed to init test logging") }); let repo_dir = PageServerConf::test_repo_dir(test_name); diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index cdabb23a7b..34f57840fb 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -39,6 +39,7 @@ pub mod defaults { pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds"; pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds"; pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024; + pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour"; } /// Per-tenant configuration options @@ -93,6 +94,9 @@ pub struct TenantConf { pub trace_read_requests: bool, pub eviction_policy: EvictionPolicy, pub min_resident_size_override: Option, + // See the corresponding metric's help string. + #[serde(with = "humantime_serde")] + pub evictions_low_residence_duration_metric_threshold: Duration, } /// Same as TenantConf, but this struct preserves the information about @@ -164,6 +168,11 @@ pub struct TenantConfOpt { #[serde(skip_serializing_if = "Option::is_none")] #[serde(default)] pub min_resident_size_override: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(with = "humantime_serde")] + #[serde(default)] + pub evictions_low_residence_duration_metric_threshold: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] @@ -228,6 +237,9 @@ impl TenantConfOpt { min_resident_size_override: self .min_resident_size_override .or(global_conf.min_resident_size_override), + evictions_low_residence_duration_metric_threshold: self + .evictions_low_residence_duration_metric_threshold + .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold), } } } @@ -260,6 +272,10 @@ impl Default for TenantConf { trace_read_requests: false, eviction_policy: EvictionPolicy::NoEviction, min_resident_size_override: None, + evictions_low_residence_duration_metric_threshold: humantime::parse_duration( + DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD, + ) + .expect("cannot parse default evictions_low_residence_duration_metric_threshold"), } } } @@ -275,9 +291,9 @@ mod tests { ..TenantConfOpt::default() }; - let toml_form = toml_edit::easy::to_string(&small_conf).unwrap(); + let toml_form = toml_edit::ser::to_string(&small_conf).unwrap(); assert_eq!(toml_form, "gc_horizon = 42\n"); - assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap()); + assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap()); let json_form = serde_json::to_string(&small_conf).unwrap(); assert_eq!(json_form, "{\"gc_horizon\":42}"); diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 4c659be9aa..8d06ccd565 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -48,7 +48,6 @@ mod layer_coverage; use crate::context::RequestContext; use crate::keyspace::KeyPartitioning; -use crate::metrics::NUM_ONDISK_LAYERS; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use crate::tenant::storage_layer::Layer; @@ -275,6 +274,7 @@ where /// Helper function for BatchedUpdates::insert_historic /// pub(self) fn insert_historic_noflush(&mut self, layer: Arc) { + // TODO: See #3869, resulting #4088, attempted fix and repro #4094 self.historic.insert( historic_layer_coverage::LayerKey::from(&*layer), Arc::clone(&layer), @@ -283,8 +283,6 @@ where if Self::is_l0(&layer) { self.l0_delta_layers.push(layer); } - - NUM_ONDISK_LAYERS.inc(); } /// @@ -309,8 +307,6 @@ where "failed to locate removed historic layer from l0_delta_layers" ); } - - NUM_ONDISK_LAYERS.dec(); } pub(self) fn replace_historic_noflush( diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 4971186206..754316b3cd 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -537,7 +537,7 @@ where Some(tenant) => match tenant.current_state() { TenantState::Attaching | TenantState::Loading - | TenantState::Broken + | TenantState::Broken { .. } | TenantState::Active => tenant.set_stopping(), TenantState::Stopping => return Err(TenantStateError::IsStopping(tenant_id)), }, @@ -565,7 +565,7 @@ where let tenants_accessor = TENANTS.read().await; match tenants_accessor.get(&tenant_id) { Some(tenant) => { - tenant.set_broken(&e.to_string()); + tenant.set_broken(e.to_string()); } None => { warn!("Tenant {tenant_id} got removed from memory"); diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 28c4943dbd..c42824a8b5 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -219,7 +219,8 @@ use utils::lsn::Lsn; use crate::metrics::{ MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics, - REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS, + RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES, + REMOTE_ONDEMAND_DOWNLOADED_LAYERS, }; use crate::tenant::remote_timeline_client::index::LayerFileMetadata; use crate::{ @@ -367,9 +368,13 @@ impl RemoteTimelineClient { /// Download index file pub async fn download_index_file(&self) -> Result { - let _unfinished_gauge_guard = self - .metrics - .call_begin(&RemoteOpFileKind::Index, &RemoteOpKind::Download); + let _unfinished_gauge_guard = self.metrics.call_begin( + &RemoteOpFileKind::Index, + &RemoteOpKind::Download, + crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { + reason: "no need for a downloads gauge", + }, + ); download::download_index_part( self.conf, @@ -398,9 +403,13 @@ impl RemoteTimelineClient { layer_metadata: &LayerFileMetadata, ) -> anyhow::Result { let downloaded_size = { - let _unfinished_gauge_guard = self - .metrics - .call_begin(&RemoteOpFileKind::Layer, &RemoteOpKind::Download); + let _unfinished_gauge_guard = self.metrics.call_begin( + &RemoteOpFileKind::Layer, + &RemoteOpKind::Download, + crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { + reason: "no need for a downloads gauge", + }, + ); download::download_layer_file( self.conf, &self.storage_impl, @@ -886,11 +895,32 @@ impl RemoteTimelineClient { fn calls_unfinished_metric_impl( &self, op: &UploadOp, - ) -> Option<(RemoteOpFileKind, RemoteOpKind)> { + ) -> Option<( + RemoteOpFileKind, + RemoteOpKind, + RemoteTimelineClientMetricsCallTrackSize, + )> { + use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize; let res = match op { - UploadOp::UploadLayer(_, _) => (RemoteOpFileKind::Layer, RemoteOpKind::Upload), - UploadOp::UploadMetadata(_, _) => (RemoteOpFileKind::Index, RemoteOpKind::Upload), - UploadOp::Delete(file_kind, _) => (*file_kind, RemoteOpKind::Delete), + UploadOp::UploadLayer(_, m) => ( + RemoteOpFileKind::Layer, + RemoteOpKind::Upload, + RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size()), + ), + UploadOp::UploadMetadata(_, _) => ( + RemoteOpFileKind::Index, + RemoteOpKind::Upload, + DontTrackSize { + reason: "metadata uploads are tiny", + }, + ), + UploadOp::Delete(file_kind, _) => ( + *file_kind, + RemoteOpKind::Delete, + DontTrackSize { + reason: "should we track deletes? positive or negative sign?", + }, + ), UploadOp::Barrier(_) => { // we do not account these return None; @@ -900,20 +930,20 @@ impl RemoteTimelineClient { } fn calls_unfinished_metric_begin(&self, op: &UploadOp) { - let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) { + let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) { Some(x) => x, None => return, }; - let guard = self.metrics.call_begin(&file_kind, &op_kind); + let guard = self.metrics.call_begin(&file_kind, &op_kind, track_bytes); guard.will_decrement_manually(); // in unfinished_ops_metric_end() } fn calls_unfinished_metric_end(&self, op: &UploadOp) { - let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) { + let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) { Some(x) => x, None => return, }; - self.metrics.call_end(&file_kind, &op_kind); + self.metrics.call_end(&file_kind, &op_kind, track_bytes); } fn stop(&self) { @@ -981,11 +1011,19 @@ impl RemoteTimelineClient { mod tests { use super::*; use crate::{ - tenant::harness::{TenantHarness, TIMELINE_ID}, + context::RequestContext, + tenant::{ + harness::{TenantHarness, TIMELINE_ID}, + Tenant, + }, DEFAULT_PG_VERSION, }; use remote_storage::{RemoteStorageConfig, RemoteStorageKind}; - use std::{collections::HashSet, path::Path}; + use std::{ + collections::HashSet, + path::{Path, PathBuf}, + }; + use tokio::runtime::EnterGuard; use utils::lsn::Lsn; pub(super) fn dummy_contents(name: &str) -> Vec { @@ -1034,39 +1072,80 @@ mod tests { assert_eq!(found, expected); } + struct TestSetup { + runtime: &'static tokio::runtime::Runtime, + entered_runtime: EnterGuard<'static>, + harness: TenantHarness<'static>, + tenant: Arc, + tenant_ctx: RequestContext, + remote_fs_dir: PathBuf, + client: Arc, + } + + impl TestSetup { + fn new(test_name: &str) -> anyhow::Result { + // Use a current-thread runtime in the test + let runtime = Box::leak(Box::new( + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?, + )); + let entered_runtime = runtime.enter(); + + let test_name = Box::leak(Box::new(format!("remote_timeline_client__{test_name}"))); + let harness = TenantHarness::create(test_name)?; + let (tenant, ctx) = runtime.block_on(harness.load()); + // create an empty timeline directory + let timeline = + tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?; + let _ = timeline.initialize(&ctx).unwrap(); + + let remote_fs_dir = harness.conf.workdir.join("remote_fs"); + std::fs::create_dir_all(remote_fs_dir)?; + let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?; + + let storage_config = RemoteStorageConfig { + max_concurrent_syncs: std::num::NonZeroUsize::new( + remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS, + ) + .unwrap(), + max_sync_errors: std::num::NonZeroU32::new( + remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS, + ) + .unwrap(), + storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()), + }; + + let storage = GenericRemoteStorage::from_config(&storage_config).unwrap(); + + let client = Arc::new(RemoteTimelineClient { + conf: harness.conf, + runtime, + tenant_id: harness.tenant_id, + timeline_id: TIMELINE_ID, + storage_impl: storage, + upload_queue: Mutex::new(UploadQueue::Uninitialized), + metrics: Arc::new(RemoteTimelineClientMetrics::new( + &harness.tenant_id, + &TIMELINE_ID, + )), + }); + + Ok(Self { + runtime, + entered_runtime, + harness, + tenant, + tenant_ctx: ctx, + remote_fs_dir, + client, + }) + } + } + // Test scheduling #[test] fn upload_scheduling() -> anyhow::Result<()> { - // Use a current-thread runtime in the test - let runtime = Box::leak(Box::new( - tokio::runtime::Builder::new_current_thread() - .enable_all() - .build()?, - )); - let _entered = runtime.enter(); - - let harness = TenantHarness::create("upload_scheduling")?; - let (tenant, ctx) = runtime.block_on(harness.load()); - let _timeline = - tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?; - let timeline_path = harness.timeline_path(&TIMELINE_ID); - - let remote_fs_dir = harness.conf.workdir.join("remote_fs"); - std::fs::create_dir_all(remote_fs_dir)?; - let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?; - - let storage_config = RemoteStorageConfig { - max_concurrent_syncs: std::num::NonZeroUsize::new( - remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS, - ) - .unwrap(), - max_sync_errors: std::num::NonZeroU32::new( - remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS, - ) - .unwrap(), - storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()), - }; - // Test outline: // // Schedule upload of a bunch of layers. Check that they are started immediately, not queued @@ -1081,21 +1160,19 @@ mod tests { // Schedule another deletion. Check that it's launched immediately. // Schedule index upload. Check that it's queued - println!("workdir: {}", harness.conf.workdir.display()); - - let storage_impl = GenericRemoteStorage::from_config(&storage_config)?; - let client = Arc::new(RemoteTimelineClient { - conf: harness.conf, + let TestSetup { runtime, - tenant_id: harness.tenant_id, - timeline_id: TIMELINE_ID, - storage_impl, - upload_queue: Mutex::new(UploadQueue::Uninitialized), - metrics: Arc::new(RemoteTimelineClientMetrics::new( - &harness.tenant_id, - &TIMELINE_ID, - )), - }); + entered_runtime: _entered_runtime, + harness, + tenant: _tenant, + tenant_ctx: _tenant_ctx, + remote_fs_dir, + client, + } = TestSetup::new("upload_scheduling").unwrap(); + + let timeline_path = harness.timeline_path(&TIMELINE_ID); + + println!("workdir: {}", harness.conf.workdir.display()); let remote_timeline_dir = remote_fs_dir.join(timeline_path.strip_prefix(&harness.conf.workdir)?); @@ -1216,4 +1293,90 @@ mod tests { Ok(()) } + + #[test] + fn bytes_unfinished_gauge_for_layer_file_uploads() -> anyhow::Result<()> { + // Setup + + let TestSetup { + runtime, + harness, + client, + .. + } = TestSetup::new("metrics")?; + + let metadata = dummy_metadata(Lsn(0x10)); + client.init_upload_queue_for_empty_remote(&metadata)?; + + let timeline_path = harness.timeline_path(&TIMELINE_ID); + + let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(); + let content_1 = dummy_contents("foo"); + std::fs::write( + timeline_path.join(layer_file_name_1.file_name()), + &content_1, + )?; + + #[derive(Debug, PartialEq)] + struct BytesStartedFinished { + started: Option, + finished: Option, + } + let get_bytes_started_stopped = || { + let started = client + .metrics + .get_bytes_started_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload) + .map(|v| v.try_into().unwrap()); + let stopped = client + .metrics + .get_bytes_finished_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload) + .map(|v| v.try_into().unwrap()); + BytesStartedFinished { + started, + finished: stopped, + } + }; + + // Test + + let init = get_bytes_started_stopped(); + + client.schedule_layer_file_upload( + &layer_file_name_1, + &LayerFileMetadata::new(content_1.len() as u64), + )?; + + let pre = get_bytes_started_stopped(); + + runtime.block_on(client.wait_completion())?; + + let post = get_bytes_started_stopped(); + + // Validate + + assert_eq!( + init, + BytesStartedFinished { + started: None, + finished: None + } + ); + assert_eq!( + pre, + BytesStartedFinished { + started: Some(content_1.len()), + // assert that the _finished metric is created eagerly so that subtractions work on first sample + finished: Some(0), + } + ); + assert_eq!( + post, + BytesStartedFinished { + started: Some(content_1.len()), + finished: Some(content_1.len()) + } + ); + + Ok(()) + } } diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index bda095d850..a0d8c0193a 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -16,6 +16,7 @@ use tracing::{info, warn}; use crate::config::PageServerConf; use crate::tenant::storage_layer::LayerFileName; +use crate::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id; use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS}; use remote_storage::{DownloadError, GenericRemoteStorage}; use utils::crashsafe::path_with_suffix_extension; @@ -43,6 +44,8 @@ pub async fn download_layer_file<'a>( layer_file_name: &'a LayerFileName, layer_metadata: &'a LayerFileMetadata, ) -> Result { + debug_assert_current_span_has_tenant_and_timeline_id(); + let timeline_path = conf.timeline_path(&timeline_id, &tenant_id); let local_path = timeline_path.join(layer_file_name.file_name()); @@ -154,7 +157,7 @@ pub async fn download_layer_file<'a>( .with_context(|| format!("Could not fsync layer file {}", local_path.display(),)) .map_err(DownloadError::Other)?; - tracing::info!("download complete: {}", local_path.display()); + tracing::debug!("download complete: {}", local_path.display()); Ok(bytes_amount) } diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index ce9f4d9bf8..699121ccd9 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -74,7 +74,7 @@ pub(super) async fn upload_timeline_layer<'a>( })?; storage - .upload(Box::new(source_file), fs_size, &storage_path, None) + .upload(source_file, fs_size, &storage_path, None) .await .with_context(|| { format!( diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 8aeacc12f5..7e7dbd3c5c 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -209,7 +209,7 @@ async fn wait_for_active_tenant( loop { match tenant_state_updates.changed().await { Ok(()) => { - let new_state = *tenant_state_updates.borrow(); + let new_state = &*tenant_state_updates.borrow(); match new_state { TenantState::Active => { debug!("Tenant state changed to active, continuing the task loop"); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e80e32644b..8768841d87 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -14,6 +14,7 @@ use pageserver_api::models::{ DownloadRemoteLayersTaskState, LayerMapInfo, LayerResidenceStatus, TimelineState, }; use remote_storage::GenericRemoteStorage; +use storage_broker::BrokerClientChannel; use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError}; use tokio_util::sync::CancellationToken; use tracing::*; @@ -30,7 +31,7 @@ use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering}; use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak}; use std::time::{Duration, Instant, SystemTime}; -use crate::broker_client::is_broker_client_initialized; +use crate::broker_client::{get_broker_client, is_broker_client_initialized}; use crate::context::{DownloadBehavior, RequestContext}; use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata}; use crate::tenant::storage_layer::{ @@ -47,7 +48,7 @@ use crate::tenant::{ use crate::config::PageServerConf; use crate::keyspace::{KeyPartitioning, KeySpace}; -use crate::metrics::TimelineMetrics; +use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS}; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key}; use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError}; @@ -71,11 +72,12 @@ use crate::walredo::WalRedoManager; use crate::METADATA_FILE_NAME; use crate::ZERO_PAGE; use crate::{is_temporary, task_mgr}; -use walreceiver::spawn_connection_manager_task; pub(super) use self::eviction_task::EvictionTaskTenantState; use self::eviction_task::EvictionTaskTimelineState; +use self::walreceiver::{WalReceiver, WalReceiverConf}; +use super::config::TenantConf; use super::layer_map::BatchedUpdates; use super::remote_timeline_client::index::IndexPart; use super::remote_timeline_client::RemoteTimelineClient; @@ -160,7 +162,7 @@ pub struct Timeline { ancestor_timeline: Option>, ancestor_lsn: Lsn, - metrics: TimelineMetrics, + pub(super) metrics: TimelineMetrics, /// Ensures layers aren't frozen by checkpointer between /// [`Timeline::get_layer_for_write`] and layer reads. @@ -214,6 +216,7 @@ pub struct Timeline { /// or None if WAL receiver has not received anything for this timeline /// yet. pub last_received_wal: Mutex>, + pub walreceiver: WalReceiver, /// Relation size cache pub rel_size_cache: RwLock>, @@ -866,10 +869,18 @@ impl Timeline { Ok(()) } - pub fn activate(self: &Arc) { + pub fn activate(self: &Arc, ctx: &RequestContext) -> anyhow::Result<()> { + if is_broker_client_initialized() { + self.launch_wal_receiver(ctx, get_broker_client().clone())?; + } else if cfg!(test) { + info!("not launching WAL receiver because broker client hasn't been initialized"); + } else { + anyhow::bail!("broker client not initialized"); + } + self.set_state(TimelineState::Active); - self.launch_wal_receiver(); self.launch_eviction_task(); + Ok(()) } pub fn set_state(&self, new_state: TimelineState) { @@ -925,6 +936,7 @@ impl Timeline { } } + #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))] pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result> { let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) }; let Some(remote_layer) = layer.downcast_remote_layer() else { return Ok(Some(false)) }; @@ -1126,6 +1138,8 @@ impl Timeline { if let Some(delta) = local_layer_residence_duration { self.metrics .evictions_with_low_residence_duration + .read() + .unwrap() .observe(delta); info!(layer=%local_layer.short_id(), residence_millis=delta.as_millis(), "evicted layer after known residence period"); } else { @@ -1199,6 +1213,35 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.eviction_policy) } + fn get_evictions_low_residence_duration_metric_threshold( + tenant_conf: &TenantConfOpt, + default_tenant_conf: &TenantConf, + ) -> Duration { + tenant_conf + .evictions_low_residence_duration_metric_threshold + .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold) + } + + pub(super) fn tenant_conf_updated(&self) { + // NB: Most tenant conf options are read by background loops, so, + // changes will automatically be picked up. + + // The threshold is embedded in the metric. So, we need to update it. + { + let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold( + &self.tenant_conf.read().unwrap(), + &self.conf.default_tenant_conf, + ); + let tenant_id_str = self.tenant_id.to_string(); + let timeline_id_str = self.timeline_id.to_string(); + self.metrics + .evictions_with_low_residence_duration + .write() + .unwrap() + .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold); + } + } + /// Open a Timeline handle. /// /// Loads the metadata for the timeline into memory, but not the layer map. @@ -1220,7 +1263,36 @@ impl Timeline { let (layer_flush_start_tx, _) = tokio::sync::watch::channel(0); let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(()))); + let tenant_conf_guard = tenant_conf.read().unwrap(); + let wal_connect_timeout = tenant_conf_guard + .walreceiver_connect_timeout + .unwrap_or(conf.default_tenant_conf.walreceiver_connect_timeout); + let lagging_wal_timeout = tenant_conf_guard + .lagging_wal_timeout + .unwrap_or(conf.default_tenant_conf.lagging_wal_timeout); + let max_lsn_wal_lag = tenant_conf_guard + .max_lsn_wal_lag + .unwrap_or(conf.default_tenant_conf.max_lsn_wal_lag); + let evictions_low_residence_duration_metric_threshold = + Self::get_evictions_low_residence_duration_metric_threshold( + &tenant_conf_guard, + &conf.default_tenant_conf, + ); + drop(tenant_conf_guard); + Arc::new_cyclic(|myself| { + let walreceiver = WalReceiver::new( + TenantTimelineId::new(tenant_id, timeline_id), + Weak::clone(myself), + WalReceiverConf { + wal_connect_timeout, + lagging_wal_timeout, + max_lsn_wal_lag, + auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(), + availability_zone: conf.availability_zone.clone(), + }, + ); + let mut result = Timeline { conf, tenant_conf, @@ -1231,6 +1303,7 @@ impl Timeline { layers: RwLock::new(LayerMap::default()), walredo_mgr, + walreceiver, remote_client: remote_client.map(Arc::new), @@ -1252,7 +1325,7 @@ impl Timeline { &timeline_id, crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( "mtime", - conf.evictions_low_residence_duration_metric_threshold, + evictions_low_residence_duration_metric_threshold, ), ), @@ -1350,44 +1423,17 @@ impl Timeline { *flush_loop_state = FlushLoopState::Running; } - pub(super) fn launch_wal_receiver(self: &Arc) { - if !is_broker_client_initialized() { - if cfg!(test) { - info!("not launching WAL receiver because broker client hasn't been initialized"); - return; - } else { - panic!("broker client not initialized"); - } - } - + pub(super) fn launch_wal_receiver( + &self, + ctx: &RequestContext, + broker_client: BrokerClientChannel, + ) -> anyhow::Result<()> { info!( "launching WAL receiver for timeline {} of tenant {}", self.timeline_id, self.tenant_id ); - let tenant_conf_guard = self.tenant_conf.read().unwrap(); - let lagging_wal_timeout = tenant_conf_guard - .lagging_wal_timeout - .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout); - let walreceiver_connect_timeout = tenant_conf_guard - .walreceiver_connect_timeout - .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout); - let max_lsn_wal_lag = tenant_conf_guard - .max_lsn_wal_lag - .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag); - drop(tenant_conf_guard); - let self_clone = Arc::clone(self); - let background_ctx = - // XXX: this is a detached_child. Plumb through the ctx from call sites. - RequestContext::todo_child(TaskKind::WalReceiverManager, DownloadBehavior::Error); - spawn_connection_manager_task( - self_clone, - walreceiver_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, - crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(), - self.conf.availability_zone.clone(), - background_ctx, - ); + self.walreceiver.start(ctx, broker_client)?; + Ok(()) } /// @@ -2303,6 +2349,7 @@ impl Timeline { id, ctx.task_kind() ); + UNEXPECTED_ONDEMAND_DOWNLOADS.inc(); timeline.download_remote_layer(remote_layer).await?; continue 'layer_map_search; } @@ -3766,11 +3813,13 @@ impl Timeline { /// If the caller has a deadline or needs a timeout, they can simply stop polling: /// we're **cancellation-safe** because the download happens in a separate task_mgr task. /// So, the current download attempt will run to completion even if we stop polling. - #[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%remote_layer.short_id()))] + #[instrument(skip_all, fields(layer=%remote_layer.short_id()))] pub async fn download_remote_layer( &self, remote_layer: Arc, ) -> anyhow::Result<()> { + debug_assert_current_span_has_tenant_and_timeline_id(); + use std::sync::atomic::Ordering::Relaxed; let permit = match Arc::clone(&remote_layer.ongoing_download) @@ -3814,6 +3863,8 @@ impl Timeline { .await; if let Ok(size) = &result { + info!("layer file download finished"); + // XXX the temp file is still around in Err() case // and consumes space until we clean up upon pageserver restart. self_clone.metrics.resident_physical_size_gauge.add(*size); @@ -3885,6 +3936,8 @@ impl Timeline { updates.flush(); drop(layers); + info!("on-demand download successful"); + // Now that we've inserted the download into the layer map, // close the semaphore. This will make other waiters for // this download return Ok(()). @@ -3892,7 +3945,7 @@ impl Timeline { remote_layer.ongoing_download.close(); } else { // Keep semaphore open. We'll drop the permit at the end of the function. - error!("on-demand download failed: {:?}", result.as_ref().unwrap_err()); + error!("layer file download failed: {:?}", result.as_ref().unwrap_err()); } // Don't treat it as an error if the task that triggered the download @@ -4203,3 +4256,36 @@ fn rename_to_backup(path: &Path) -> anyhow::Result<()> { bail!("couldn't find an unused backup number for {:?}", path) } + +#[cfg(not(debug_assertions))] +#[inline] +pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {} + +#[cfg(debug_assertions)] +#[inline] +pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() { + use utils::tracing_span_assert; + + pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy< + tracing_span_assert::MultiNameExtractor<2>, + > = once_cell::sync::Lazy::new(|| { + tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]) + }); + + pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy< + tracing_span_assert::MultiNameExtractor<2>, + > = once_cell::sync::Lazy::new(|| { + tracing_span_assert::MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]) + }); + + match tracing_span_assert::check_fields_present([ + &*TENANT_ID_EXTRACTOR, + &*TIMELINE_ID_EXTRACTOR, + ]) { + Ok(()) => (), + Err(missing) => panic!( + "missing extractors: {:?}", + missing.into_iter().map(|e| e.name()).collect::>() + ), + } +} diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs index f33a12c5cc..00f446af38 100644 --- a/pageserver/src/tenant/timeline/walreceiver.rs +++ b/pageserver/src/tenant/timeline/walreceiver.rs @@ -23,14 +23,133 @@ mod connection_manager; mod walreceiver_connection; -use crate::task_mgr::WALRECEIVER_RUNTIME; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::{self, TaskKind, WALRECEIVER_RUNTIME}; +use crate::tenant::timeline::walreceiver::connection_manager::{ + connection_manager_loop_step, ConnectionManagerState, +}; +use anyhow::Context; use std::future::Future; +use std::num::NonZeroU64; +use std::ops::ControlFlow; +use std::sync::atomic::{self, AtomicBool}; +use std::sync::{Arc, Weak}; +use std::time::Duration; +use storage_broker::BrokerClientChannel; +use tokio::select; use tokio::sync::watch; use tokio_util::sync::CancellationToken; use tracing::*; -pub use connection_manager::spawn_connection_manager_task; +use utils::id::TenantTimelineId; + +use super::Timeline; + +#[derive(Clone)] +pub struct WalReceiverConf { + /// The timeout on the connection to safekeeper for WAL streaming. + pub wal_connect_timeout: Duration, + /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one. + pub lagging_wal_timeout: Duration, + /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one. + pub max_lsn_wal_lag: NonZeroU64, + pub auth_token: Option>, + pub availability_zone: Option, +} + +pub struct WalReceiver { + timeline: TenantTimelineId, + timeline_ref: Weak, + conf: WalReceiverConf, + started: AtomicBool, +} + +impl WalReceiver { + pub fn new( + timeline: TenantTimelineId, + timeline_ref: Weak, + conf: WalReceiverConf, + ) -> Self { + Self { + timeline, + timeline_ref, + conf, + started: AtomicBool::new(false), + } + } + + pub fn start( + &self, + ctx: &RequestContext, + mut broker_client: BrokerClientChannel, + ) -> anyhow::Result<()> { + if self.started.load(atomic::Ordering::Acquire) { + anyhow::bail!("Wal receiver is already started"); + } + + let timeline = self.timeline_ref.upgrade().with_context(|| { + format!("walreceiver start on a dropped timeline {}", self.timeline) + })?; + + let tenant_id = timeline.tenant_id; + let timeline_id = timeline.timeline_id; + let walreceiver_ctx = + ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error); + + let wal_receiver_conf = self.conf.clone(); + task_mgr::spawn( + WALRECEIVER_RUNTIME.handle(), + TaskKind::WalReceiverManager, + Some(tenant_id), + Some(timeline_id), + &format!("walreceiver for timeline {tenant_id}/{timeline_id}"), + false, + async move { + info!("WAL receiver manager started, connecting to broker"); + let mut connection_manager_state = ConnectionManagerState::new( + timeline, + wal_receiver_conf, + ); + loop { + select! { + _ = task_mgr::shutdown_watcher() => { + info!("WAL receiver shutdown requested, shutting down"); + connection_manager_state.shutdown().await; + return Ok(()); + }, + loop_step_result = connection_manager_loop_step( + &mut broker_client, + &mut connection_manager_state, + &walreceiver_ctx, + ) => match loop_step_result { + ControlFlow::Continue(()) => continue, + ControlFlow::Break(()) => { + info!("Connection manager loop ended, shutting down"); + connection_manager_state.shutdown().await; + return Ok(()); + } + }, + } + } + }.instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id)) + ); + + self.started.store(true, atomic::Ordering::Release); + + Ok(()) + } + + pub async fn stop(&self) { + task_mgr::shutdown_tasks( + Some(TaskKind::WalReceiverManager), + Some(self.timeline.tenant_id), + Some(self.timeline.timeline_id), + ) + .await; + self.started.store(false, atomic::Ordering::Release); + } +} /// A handle of an asynchronous task. /// The task has a channel that it can use to communicate its lifecycle events in a certain form, see [`TaskEvent`] @@ -39,26 +158,26 @@ pub use connection_manager::spawn_connection_manager_task; /// Note that the communication happens via the `watch` channel, that does not accumulate the events, replacing the old one with the never one on submission. /// That may lead to certain events not being observed by the listener. #[derive(Debug)] -pub struct TaskHandle { +struct TaskHandle { join_handle: Option>>, events_receiver: watch::Receiver>, cancellation: CancellationToken, } -pub enum TaskEvent { +enum TaskEvent { Update(TaskStateUpdate), End(anyhow::Result<()>), } #[derive(Debug, Clone)] -pub enum TaskStateUpdate { +enum TaskStateUpdate { Started, Progress(E), } impl TaskHandle { /// Initializes the task, starting it immediately after the creation. - pub fn spawn( + fn spawn( task: impl FnOnce(watch::Sender>, CancellationToken) -> Fut + Send + 'static, ) -> Self where @@ -131,7 +250,7 @@ impl TaskHandle { } /// Aborts current task, waiting for it to finish. - pub async fn shutdown(self) { + async fn shutdown(self) { if let Some(jh) = self.join_handle { self.cancellation.cancel(); match jh.await { diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index de07676ffe..731c5c4644 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -11,11 +11,9 @@ use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration}; -use super::TaskStateUpdate; -use crate::broker_client::get_broker_client; +use super::{TaskStateUpdate, WalReceiverConf}; use crate::context::{DownloadBehavior, RequestContext}; -use crate::task_mgr::WALRECEIVER_RUNTIME; -use crate::task_mgr::{self, TaskKind}; +use crate::task_mgr::TaskKind; use crate::tenant::Timeline; use anyhow::Context; use chrono::{NaiveDateTime, Utc}; @@ -38,75 +36,17 @@ use utils::{ use super::{walreceiver_connection::WalConnectionStatus, TaskEvent, TaskHandle}; -/// Spawns the loop to take care of the timeline's WAL streaming connection. -pub fn spawn_connection_manager_task( - timeline: Arc, - wal_connect_timeout: Duration, - lagging_wal_timeout: Duration, - max_lsn_wal_lag: NonZeroU64, - auth_token: Option>, - availability_zone: Option, - ctx: RequestContext, -) { - let mut broker_client = get_broker_client().clone(); - - let tenant_id = timeline.tenant_id; - let timeline_id = timeline.timeline_id; - - task_mgr::spawn( - WALRECEIVER_RUNTIME.handle(), - TaskKind::WalReceiverManager, - Some(tenant_id), - Some(timeline_id), - &format!("walreceiver for timeline {tenant_id}/{timeline_id}"), - false, - async move { - info!("WAL receiver manager started, connecting to broker"); - let mut walreceiver_state = WalreceiverState::new( - timeline, - wal_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, - auth_token, - availability_zone, - ); - loop { - select! { - _ = task_mgr::shutdown_watcher() => { - info!("WAL receiver shutdown requested, shutting down"); - walreceiver_state.shutdown().await; - return Ok(()); - }, - loop_step_result = connection_manager_loop_step( - &mut broker_client, - &mut walreceiver_state, - &ctx, - ) => match loop_step_result { - ControlFlow::Continue(()) => continue, - ControlFlow::Break(()) => { - info!("Connection manager loop ended, shutting down"); - walreceiver_state.shutdown().await; - return Ok(()); - } - }, - } - } - } - .instrument( - info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id), - ), - ); -} - /// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker. /// Based on the updates, desides whether to start, keep or stop a WAL receiver task. /// If storage broker subscription is cancelled, exits. -async fn connection_manager_loop_step( +pub(super) async fn connection_manager_loop_step( broker_client: &mut BrokerClientChannel, - walreceiver_state: &mut WalreceiverState, + connection_manager_state: &mut ConnectionManagerState, ctx: &RequestContext, ) -> ControlFlow<(), ()> { - let mut timeline_state_updates = walreceiver_state.timeline.subscribe_for_state_updates(); + let mut timeline_state_updates = connection_manager_state + .timeline + .subscribe_for_state_updates(); match wait_for_active_timeline(&mut timeline_state_updates).await { ControlFlow::Continue(()) => {} @@ -117,8 +57,8 @@ async fn connection_manager_loop_step( } let id = TenantTimelineId { - tenant_id: walreceiver_state.timeline.tenant_id, - timeline_id: walreceiver_state.timeline.timeline_id, + tenant_id: connection_manager_state.timeline.tenant_id, + timeline_id: connection_manager_state.timeline.timeline_id, }; // Subscribe to the broker updates. Stream shares underlying TCP connection @@ -128,7 +68,7 @@ async fn connection_manager_loop_step( info!("Subscribed for broker timeline updates"); loop { - let time_until_next_retry = walreceiver_state.time_until_next_retry(); + let time_until_next_retry = connection_manager_state.time_until_next_retry(); // These things are happening concurrently: // @@ -141,12 +81,12 @@ async fn connection_manager_loop_step( // - timeline state changes to something that does not allow walreceiver to run concurrently select! { Some(wal_connection_update) = async { - match walreceiver_state.wal_connection.as_mut() { + match connection_manager_state.wal_connection.as_mut() { Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await), None => None, } } => { - let wal_connection = walreceiver_state.wal_connection.as_mut() + let wal_connection = connection_manager_state.wal_connection.as_mut() .expect("Should have a connection, as checked by the corresponding select! guard"); match wal_connection_update { TaskEvent::Update(TaskStateUpdate::Started) => {}, @@ -156,7 +96,7 @@ async fn connection_manager_loop_step( // from this safekeeper. This is good enough to clean unsuccessful // retries history and allow reconnecting to this safekeeper without // sleeping for a long time. - walreceiver_state.wal_connection_retries.remove(&wal_connection.sk_id); + connection_manager_state.wal_connection_retries.remove(&wal_connection.sk_id); } wal_connection.status = new_status; } @@ -165,7 +105,7 @@ async fn connection_manager_loop_step( Ok(()) => debug!("WAL receiving task finished"), Err(e) => error!("wal receiver task finished with an error: {e:?}"), } - walreceiver_state.drop_old_connection(false).await; + connection_manager_state.drop_old_connection(false).await; }, } }, @@ -173,7 +113,7 @@ async fn connection_manager_loop_step( // Got a new update from the broker broker_update = broker_subscription.message() => { match broker_update { - Ok(Some(broker_update)) => walreceiver_state.register_timeline_update(broker_update), + Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update), Err(e) => { error!("broker subscription failed: {e}"); return ControlFlow::Continue(()); @@ -187,12 +127,12 @@ async fn connection_manager_loop_step( new_event = async { loop { - if walreceiver_state.timeline.current_state() == TimelineState::Loading { + if connection_manager_state.timeline.current_state() == TimelineState::Loading { warn!("wal connection manager should only be launched after timeline has become active"); } match timeline_state_updates.changed().await { Ok(()) => { - let new_state = walreceiver_state.timeline.current_state(); + let new_state = connection_manager_state.timeline.current_state(); match new_state { // we're already active as walreceiver, no need to reactivate TimelineState::Active => continue, @@ -234,9 +174,9 @@ async fn connection_manager_loop_step( } => debug!("Waking up for the next retry after waiting for {time_until_next_retry:?}"), } - if let Some(new_candidate) = walreceiver_state.next_connection_candidate() { + if let Some(new_candidate) = connection_manager_state.next_connection_candidate() { info!("Switching to new connection candidate: {new_candidate:?}"); - walreceiver_state + connection_manager_state .change_connection(new_candidate, ctx) .await } @@ -314,25 +254,17 @@ const WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0; const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5; /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible. -struct WalreceiverState { +pub(super) struct ConnectionManagerState { id: TenantTimelineId, - /// Use pageserver data about the timeline to filter out some of the safekeepers. timeline: Arc, - /// The timeout on the connection to safekeeper for WAL streaming. - wal_connect_timeout: Duration, - /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one. - lagging_wal_timeout: Duration, - /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one. - max_lsn_wal_lag: NonZeroU64, + conf: WalReceiverConf, /// Current connection to safekeeper for WAL streaming. wal_connection: Option, /// Info about retries and unsuccessful attempts to connect to safekeepers. wal_connection_retries: HashMap, /// Data about all timelines, available for connection, fetched from storage broker, grouped by their corresponding safekeeper node id. wal_stream_candidates: HashMap, - auth_token: Option>, - availability_zone: Option, } /// Current connection data. @@ -375,15 +307,8 @@ struct BrokerSkTimeline { latest_update: NaiveDateTime, } -impl WalreceiverState { - fn new( - timeline: Arc, - wal_connect_timeout: Duration, - lagging_wal_timeout: Duration, - max_lsn_wal_lag: NonZeroU64, - auth_token: Option>, - availability_zone: Option, - ) -> Self { +impl ConnectionManagerState { + pub(super) fn new(timeline: Arc, conf: WalReceiverConf) -> Self { let id = TenantTimelineId { tenant_id: timeline.tenant_id, timeline_id: timeline.timeline_id, @@ -391,14 +316,10 @@ impl WalreceiverState { Self { id, timeline, - wal_connect_timeout, - lagging_wal_timeout, - max_lsn_wal_lag, + conf, wal_connection: None, wal_stream_candidates: HashMap::new(), wal_connection_retries: HashMap::new(), - auth_token, - availability_zone, } } @@ -407,7 +328,7 @@ impl WalreceiverState { self.drop_old_connection(true).await; let id = self.id; - let connect_timeout = self.wal_connect_timeout; + let connect_timeout = self.conf.wal_connect_timeout; let timeline = Arc::clone(&self.timeline); let ctx = ctx.detached_child( TaskKind::WalReceiverConnectionHandler, @@ -427,7 +348,7 @@ impl WalreceiverState { .context("walreceiver connection handling failure") } .instrument( - info_span!("walreceiver_connection", id = %id, node_id = %new_sk.safekeeper_id), + info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, node_id = %new_sk.safekeeper_id), ) }); @@ -563,7 +484,7 @@ impl WalreceiverState { (now - existing_wal_connection.status.latest_connection_update).to_std() { // Drop connection if we haven't received keepalive message for a while. - if latest_interaciton > self.wal_connect_timeout { + if latest_interaciton > self.conf.wal_connect_timeout { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, wal_source_connconf: new_wal_source_connconf, @@ -573,7 +494,7 @@ impl WalreceiverState { existing_wal_connection.status.latest_connection_update, ), check_time: now, - threshold: self.wal_connect_timeout, + threshold: self.conf.wal_connect_timeout, }, }); } @@ -589,7 +510,7 @@ impl WalreceiverState { // Check if the new candidate has much more WAL than the current one. match new_commit_lsn.0.checked_sub(current_commit_lsn.0) { Some(new_sk_lsn_advantage) => { - if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() { + if new_sk_lsn_advantage >= self.conf.max_lsn_wal_lag.get() { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, wal_source_connconf: new_wal_source_connconf, @@ -597,16 +518,16 @@ impl WalreceiverState { reason: ReconnectReason::LaggingWal { current_commit_lsn, new_commit_lsn, - threshold: self.max_lsn_wal_lag, + threshold: self.conf.max_lsn_wal_lag, }, }); } // If we have a candidate with the same commit_lsn as the current one, which is in the same AZ as pageserver, // and the current one is not, switch to the new one. - if self.availability_zone.is_some() + if self.conf.availability_zone.is_some() && existing_wal_connection.availability_zone - != self.availability_zone - && self.availability_zone == new_availability_zone + != self.conf.availability_zone + && self.conf.availability_zone == new_availability_zone { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, @@ -677,7 +598,7 @@ impl WalreceiverState { if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since { if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() { if candidate_commit_lsn > current_commit_lsn - && waiting_for_new_wal > self.lagging_wal_timeout + && waiting_for_new_wal > self.conf.lagging_wal_timeout { return Some(NewWalConnectionCandidate { safekeeper_id: new_sk_id, @@ -691,7 +612,7 @@ impl WalreceiverState { existing_wal_connection.status.latest_wal_update, ), check_time: now, - threshold: self.lagging_wal_timeout, + threshold: self.conf.lagging_wal_timeout, }, }); } @@ -757,11 +678,11 @@ impl WalreceiverState { match wal_stream_connection_config( self.id, info.safekeeper_connstr.as_ref(), - match &self.auth_token { + match &self.conf.auth_token { None => None, Some(x) => Some(x), }, - self.availability_zone.as_deref(), + self.conf.availability_zone.as_deref(), ) { Ok(connstr) => Some((*sk_id, info, connstr)), Err(e) => { @@ -775,7 +696,7 @@ impl WalreceiverState { /// Remove candidates which haven't sent broker updates for a while. fn cleanup_old_candidates(&mut self) { let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len()); - let lagging_wal_timeout = self.lagging_wal_timeout; + let lagging_wal_timeout = self.conf.lagging_wal_timeout; self.wal_stream_candidates.retain(|node_id, broker_info| { if let Ok(time_since_latest_broker_update) = @@ -799,7 +720,7 @@ impl WalreceiverState { } } - async fn shutdown(mut self) { + pub(super) async fn shutdown(mut self) { if let Some(wal_connection) = self.wal_connection.take() { wal_connection.connection_task.shutdown().await; } @@ -903,7 +824,7 @@ mod tests { let mut state = dummy_state(&harness).await; let now = Utc::now().naive_utc(); - let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?; + let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?; let delay_over_threshold = now - lagging_wal_timeout - lagging_wal_timeout; state.wal_connection = None; @@ -914,7 +835,7 @@ mod tests { ( NodeId(3), dummy_broker_sk_timeline( - 1 + state.max_lsn_wal_lag.get(), + 1 + state.conf.max_lsn_wal_lag.get(), "delay_over_threshold", delay_over_threshold, ), @@ -948,7 +869,7 @@ mod tests { streaming_lsn: Some(Lsn(current_lsn)), }; - state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap(); + state.conf.max_lsn_wal_lag = NonZeroU64::new(100).unwrap(); state.wal_connection = Some(WalConnection { started_at: now, sk_id: connected_sk_id, @@ -966,7 +887,7 @@ mod tests { ( connected_sk_id, dummy_broker_sk_timeline( - current_lsn + state.max_lsn_wal_lag.get() * 2, + current_lsn + state.conf.max_lsn_wal_lag.get() * 2, DUMMY_SAFEKEEPER_HOST, now, ), @@ -978,7 +899,7 @@ mod tests { ( NodeId(2), dummy_broker_sk_timeline( - current_lsn + state.max_lsn_wal_lag.get() / 2, + current_lsn + state.conf.max_lsn_wal_lag.get() / 2, "not_enough_advanced_lsn", now, ), @@ -1003,7 +924,11 @@ mod tests { state.wal_connection = None; state.wal_stream_candidates = HashMap::from([( NodeId(0), - dummy_broker_sk_timeline(1 + state.max_lsn_wal_lag.get(), DUMMY_SAFEKEEPER_HOST, now), + dummy_broker_sk_timeline( + 1 + state.conf.max_lsn_wal_lag.get(), + DUMMY_SAFEKEEPER_HOST, + now, + ), )]); let only_candidate = state @@ -1101,7 +1026,7 @@ mod tests { let now = Utc::now().naive_utc(); let connected_sk_id = NodeId(0); - let new_lsn = Lsn(current_lsn.0 + state.max_lsn_wal_lag.get() + 1); + let new_lsn = Lsn(current_lsn.0 + state.conf.max_lsn_wal_lag.get() + 1); let connection_status = WalConnectionStatus { is_connected: true, @@ -1146,7 +1071,7 @@ mod tests { ReconnectReason::LaggingWal { current_commit_lsn: current_lsn, new_commit_lsn: new_lsn, - threshold: state.max_lsn_wal_lag + threshold: state.conf.max_lsn_wal_lag }, "Should select bigger WAL safekeeper if it starts to lag enough" ); @@ -1165,7 +1090,7 @@ mod tests { let current_lsn = Lsn(100_000).align(); let now = Utc::now().naive_utc(); - let wal_connect_timeout = chrono::Duration::from_std(state.wal_connect_timeout)?; + let wal_connect_timeout = chrono::Duration::from_std(state.conf.wal_connect_timeout)?; let time_over_threshold = Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout; @@ -1208,7 +1133,7 @@ mod tests { .. } => { assert_eq!(last_keep_alive, Some(time_over_threshold)); - assert_eq!(threshold, state.lagging_wal_timeout); + assert_eq!(threshold, state.conf.lagging_wal_timeout); } unexpected => panic!("Unexpected reason: {unexpected:?}"), } @@ -1228,7 +1153,7 @@ mod tests { let new_lsn = Lsn(100_100).align(); let now = Utc::now().naive_utc(); - let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?; + let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?; let time_over_threshold = Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout; @@ -1275,7 +1200,7 @@ mod tests { assert_eq!(current_commit_lsn, current_lsn); assert_eq!(candidate_commit_lsn, new_lsn); assert_eq!(last_wal_interaction, Some(time_over_threshold)); - assert_eq!(threshold, state.lagging_wal_timeout); + assert_eq!(threshold, state.conf.lagging_wal_timeout); } unexpected => panic!("Unexpected reason: {unexpected:?}"), } @@ -1289,27 +1214,29 @@ mod tests { const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr"; - async fn dummy_state(harness: &TenantHarness<'_>) -> WalreceiverState { + async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState { let (tenant, ctx) = harness.load().await; let timeline = tenant .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx) .expect("Failed to create an empty timeline for dummy wal connection manager"); let timeline = timeline.initialize(&ctx).unwrap(); - WalreceiverState { + ConnectionManagerState { id: TenantTimelineId { tenant_id: harness.tenant_id, timeline_id: TIMELINE_ID, }, timeline, - wal_connect_timeout: Duration::from_secs(1), - lagging_wal_timeout: Duration::from_secs(1), - max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(), + conf: WalReceiverConf { + wal_connect_timeout: Duration::from_secs(1), + lagging_wal_timeout: Duration::from_secs(1), + max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(), + auth_token: None, + availability_zone: None, + }, wal_connection: None, wal_stream_candidates: HashMap::new(), wal_connection_retries: HashMap::new(), - auth_token: None, - availability_zone: None, } } @@ -1321,7 +1248,7 @@ mod tests { let harness = TenantHarness::create("switch_to_same_availability_zone")?; let mut state = dummy_state(&harness).await; - state.availability_zone = test_az.clone(); + state.conf.availability_zone = test_az.clone(); let current_lsn = Lsn(100_000).align(); let now = Utc::now().naive_utc(); diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index ea2f2392ea..d80c7c5673 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -37,12 +37,12 @@ use crate::{ use postgres_backend::is_expected_io_error; use postgres_connection::PgConnectionConfig; use postgres_ffi::waldecoder::WalStreamDecoder; -use pq_proto::PageserverFeedback; use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; /// Status of the connection. #[derive(Debug, Clone, Copy)] -pub struct WalConnectionStatus { +pub(super) struct WalConnectionStatus { /// If we were able to initiate a postgres connection, this means that safekeeper process is at least running. pub is_connected: bool, /// Defines a healthy connection as one on which pageserver received WAL from safekeeper @@ -60,7 +60,7 @@ pub struct WalConnectionStatus { /// Open a connection to the given safekeeper and receive WAL, sending back progress /// messages as we go. -pub async fn handle_walreceiver_connection( +pub(super) async fn handle_walreceiver_connection( timeline: Arc, wal_source_connconf: PgConnectionConfig, events_sender: watch::Sender>, @@ -319,12 +319,12 @@ pub async fn handle_walreceiver_connection( timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0)); // The last LSN we processed. It is not guaranteed to survive pageserver crash. - let last_received_lsn = u64::from(last_lsn); + let last_received_lsn = last_lsn; // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data - let disk_consistent_lsn = u64::from(timeline.get_disk_consistent_lsn()); + let disk_consistent_lsn = timeline.get_disk_consistent_lsn(); // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`. - let remote_consistent_lsn = u64::from(timeline_remote_consistent_lsn); + let remote_consistent_lsn = timeline_remote_consistent_lsn; let ts = SystemTime::now(); // Update the status about what we just received. This is shown in the mgmt API. diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 8dff259f02..cc46fb5a25 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -370,6 +370,74 @@ lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno) return found; } +/* + * Evict a page (if present) from the local file cache + */ +void +lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno) +{ + BufferTag tag; + FileCacheEntry* entry; + ssize_t rc; + bool found; + int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1); + uint32 hash; + + if (lfc_size_limit == 0) /* fast exit if file cache is disabled */ + return; + + INIT_BUFFERTAG(tag, rnode, forkNum, (blkno & ~(BLOCKS_PER_CHUNK-1))); + + hash = get_hash_value(lfc_hash, &tag); + + LWLockAcquire(lfc_lock, LW_EXCLUSIVE); + entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, &found); + + if (!found) + { + /* nothing to do */ + LWLockRelease(lfc_lock); + return; + } + + /* remove the page from the cache */ + entry->bitmap[chunk_offs >> 5] &= ~(1 << (chunk_offs & (32 - 1))); + + /* + * If the chunk has no live entries, we can position the chunk to be + * recycled first. + */ + if (entry->bitmap[chunk_offs >> 5] == 0) + { + bool has_remaining_pages; + + for (int i = 0; i < (BLOCKS_PER_CHUNK / 32); i++) { + if (entry->bitmap[i] != 0) + { + has_remaining_pages = true; + break; + } + } + + /* + * Put the entry at the position that is first to be reclaimed when + * we have no cached pages remaining in the chunk + */ + if (!has_remaining_pages) + { + dlist_delete(&entry->lru_node); + dlist_push_head(&lfc_ctl->lru, &entry->lru_node); + } + } + + /* + * Done: apart from empty chunks, we don't move chunks in the LRU when + * they're empty because eviction isn't usage. + */ + + LWLockRelease(lfc_lock); +} + /* * Try to read page from local cache. * Returns true if page is found in local cache. @@ -528,7 +596,6 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, LWLockRelease(lfc_lock); } - /* * Record structure holding the to be exposed cache data. */ diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index c44e8fcda5..21330c018f 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -17,6 +17,8 @@ #include "pagestore_client.h" #include "fmgr.h" #include "access/xlog.h" +#include "access/xlogutils.h" +#include "storage/buf_internals.h" #include "libpq-fe.h" #include "libpq/pqformat.h" @@ -57,6 +59,8 @@ int n_unflushed_requests = 0; int flush_every_n_requests = 8; int readahead_buffer_size = 128; +bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL; + static void pageserver_flush(void); static bool @@ -467,6 +471,8 @@ pg_init_libpagestore(void) smgr_hook = smgr_neon; smgr_init_hook = smgr_init_neon; dbsize_hook = neon_dbsize; + old_redo_read_buffer_filter = redo_read_buffer_filter; + redo_read_buffer_filter = neon_redo_read_buffer_filter; } lfc_init(); } diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 5c98902554..217c1974a0 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -24,6 +24,7 @@ #include "neon.h" #include "walproposer.h" +#include "pagestore_client.h" PG_MODULE_MAGIC; void _PG_init(void); diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h index 3eac8f4570..60d321a945 100644 --- a/pgxn/neon/neon.h +++ b/pgxn/neon/neon.h @@ -11,6 +11,7 @@ #ifndef NEON_H #define NEON_H +#include "access/xlogreader.h" /* GUCs */ extern char *neon_auth_token; @@ -20,4 +21,11 @@ extern char *neon_tenant; extern void pg_init_libpagestore(void); extern void pg_init_walproposer(void); +/* + * Returns true if we shouldn't do REDO on that block in record indicated by + * block_id; false otherwise. + */ +extern bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id); +extern bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id); + #endif /* NEON_H */ diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index a1f05ac685..22f5cdb73a 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -207,6 +207,7 @@ extern void forget_cached_relsize(RelFileNode rnode, ForkNumber forknum); extern void lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer); extern bool lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, char *buffer); extern bool lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno); +extern void lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno); extern void lfc_init(void); diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 5b30641856..528d4eb051 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -189,6 +189,7 @@ typedef struct PrfHashEntry { #define SH_DEFINE #define SH_DECLARE #include "lib/simplehash.h" +#include "neon.h" /* * PrefetchState maintains the state of (prefetch) getPage@LSN requests. @@ -1209,6 +1210,9 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch if (ShutdownRequestPending) return; + /* Don't log any pages if we're not allowed to do so. */ + if (!XLogInsertAllowed()) + return; /* * Whenever a VM or FSM page is evicted, WAL-log it. FSM and (some) VM @@ -1375,8 +1379,18 @@ neon_get_request_lsn(bool *latest, RelFileNode rnode, ForkNumber forknum, BlockN if (RecoveryInProgress()) { + /* + * We don't know if WAL has been generated but not yet replayed, so + * we're conservative in our estimates about latest pages. + */ *latest = false; - lsn = GetXLogReplayRecPtr(NULL); + + /* + * Get the last written LSN of this page. + */ + lsn = GetLastWrittenLSN(rnode, forknum, blkno); + lsn = nm_adjust_lsn(lsn); + elog(DEBUG1, "neon_get_request_lsn GetXLogReplayRecPtr %X/%X request lsn 0 ", (uint32) ((lsn) >> 32), (uint32) (lsn)); } @@ -1559,6 +1573,15 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) /* * Newly created relation is empty, remember that in the relsize cache. * + * Note that in REDO, this is called to make sure the relation fork exists, + * but it does not truncate the relation. So, we can only update the + * relsize if it didn't exist before. + * + * Also, in redo, we must make sure to update the cached size of the + * relation, as that is the primary source of truth for REDO's + * file length considerations, and as file extension isn't (perfectly) + * logged, we need to take care of that before we hit file size checks. + * * FIXME: This is currently not just an optimization, but required for * correctness. Postgres can call smgrnblocks() on the newly-created * relation. Currently, we don't call SetLastWrittenLSN() when a new @@ -1566,7 +1589,14 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) * cache, we might call smgrnblocks() on the newly-created relation before * the creation WAL record hass been received by the page server. */ - set_cached_relsize(reln->smgr_rnode.node, forkNum, 0); + if (isRedo) + { + update_cached_relsize(reln->smgr_rnode.node, forkNum, 0); + get_cached_relsize(reln->smgr_rnode.node, forkNum, + &reln->smgr_cached_nblocks[forkNum]); + } + else + set_cached_relsize(reln->smgr_rnode.node, forkNum, 0); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -1831,6 +1861,26 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, .blockNum = blkno, }; + /* + * The redo process does not lock pages that it needs to replay but are + * not in the shared buffers, so a concurrent process may request the + * page after redo has decided it won't redo that page and updated the + * LwLSN for that page. + * If we're in hot standby we need to take care that we don't return + * until after REDO has finished replaying up to that LwLSN, as the page + * should have been locked up to that point. + * + * See also the description on neon_redo_read_buffer_filter below. + * + * NOTE: It is possible that the WAL redo process will still do IO due to + * concurrent failed read IOs. Those IOs should never have a request_lsn + * that is as large as the WAL record we're currently replaying, if it + * weren't for the behaviour of the LwLsn cache that uses the highest + * value of the LwLsn cache when the entry is not found. + */ + if (RecoveryInProgress() && !(MyBackendType == B_STARTUP)) + XLogWaitForReplayOf(request_lsn); + /* * Try to find prefetched page in the list of received pages. */ @@ -2584,3 +2634,143 @@ smgr_init_neon(void) smgr_init_standard(); neon_init(); } + + +/* + * Return whether we can skip the redo for this block. + * + * The conditions for skipping the IO are: + * + * - The block is not in the shared buffers, and + * - The block is not in the local file cache + * + * ... because any subsequent read of the page requires us to read + * the new version of the page from the PageServer. We do not + * check the local file cache; we instead evict the page from LFC: it + * is cheaper than going through the FS calls to read the page, and + * limits the number of lock operations used in the REDO process. + * + * We have one exception to the rules for skipping IO: We always apply + * changes to shared catalogs' pages. Although this is mostly out of caution, + * catalog updates usually result in backends rebuilding their catalog snapshot, + * which means it's quite likely the modified page is going to be used soon. + * + * It is important to note that skipping WAL redo for a page also means + * the page isn't locked by the redo process, as there is no Buffer + * being returned, nor is there a buffer descriptor to lock. + * This means that any IO that wants to read this block needs to wait + * for the WAL REDO process to finish processing the WAL record before + * it allows the system to start reading the block, as releasing the + * block early could lead to phantom reads. + * + * For example, REDO for a WAL record that modifies 3 blocks could skip + * the first block, wait for a lock on the second, and then modify the + * third block. Without skipping, all blocks would be locked and phantom + * reads would not occur, but with skipping, a concurrent process could + * read block 1 with post-REDO contents and read block 3 with pre-REDO + * contents, where with REDO locking it would wait on block 1 and see + * block 3 with post-REDO contents only. + */ +bool +neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id) +{ + XLogRecPtr end_recptr = record->EndRecPtr; + XLogRecPtr prev_end_recptr = record->ReadRecPtr - 1; + RelFileNode rnode; + ForkNumber forknum; + BlockNumber blkno; + BufferTag tag; + uint32 hash; + LWLock *partitionLock; + Buffer buffer; + bool no_redo_needed; + BlockNumber relsize; + + if (old_redo_read_buffer_filter && old_redo_read_buffer_filter(record, block_id)) + return true; + +#if PG_VERSION_NUM < 150000 + if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno)) + elog(PANIC, "failed to locate backup block with ID %d", block_id); +#else + XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno); +#endif + + /* + * Out of an abundance of caution, we always run redo on shared catalogs, + * regardless of whether the block is stored in shared buffers. + * See also this function's top comment. + */ + if (!OidIsValid(rnode.dbNode)) + return false; + + INIT_BUFFERTAG(tag, rnode, forknum, blkno); + hash = BufTableHashCode(&tag); + partitionLock = BufMappingPartitionLock(hash); + + /* + * Lock the partition of shared_buffers so that it can't be updated + * concurrently. + */ + LWLockAcquire(partitionLock, LW_SHARED); + + /* Try to find the relevant buffer */ + buffer = BufTableLookup(&tag, hash); + + no_redo_needed = buffer < 0; + + /* we don't have the buffer in memory, update lwLsn past this record */ + if (no_redo_needed) + { + SetLastWrittenLSNForBlock(end_recptr, rnode, forknum, blkno); + lfc_evict(rnode, forknum, blkno); + } + else + { + SetLastWrittenLSNForBlock(prev_end_recptr, rnode, forknum, blkno); + } + + LWLockRelease(partitionLock); + + /* Extend the relation if we know its size */ + if (get_cached_relsize(rnode, forknum, &relsize)) + { + if (relsize < blkno + 1) + update_cached_relsize(rnode, forknum, blkno + 1); + } + else + { + /* + * Size was not cached. We populate the cache now, with the size of the + * relation measured after this WAL record is applied. + * + * This length is later reused when we open the smgr to read the block, + * which is fine and expected. + */ + + NeonResponse *response; + NeonNblocksResponse *nbresponse; + NeonNblocksRequest request = { + .req = (NeonRequest) { + .lsn = end_recptr, + .latest = false, + .tag = T_NeonNblocksRequest, + }, + .rnode = rnode, + .forknum = forknum, + }; + + response = page_server_request(&request); + + Assert(response->tag == T_NeonNblocksResponse); + nbresponse = (NeonNblocksResponse *) response; + + Assert(nbresponse->n_blocks > blkno); + + set_cached_relsize(rnode, forknum, nbresponse->n_blocks); + + elog(SmgrTrace, "Set length to %d", nbresponse->n_blocks); + } + + return no_redo_needed; +} diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 45037a8c01..a99be40955 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -1964,18 +1964,26 @@ CombineHotStanbyFeedbacks(HotStandbyFeedback * hs) { if (safekeeper[i].appendResponse.hs.ts != 0) { - if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.xmin, hs->xmin)) + HotStandbyFeedback *skhs = &safekeeper[i].appendResponse.hs; + if (FullTransactionIdIsNormal(skhs->xmin) + && FullTransactionIdPrecedes(skhs->xmin, hs->xmin)) { - hs->xmin = safekeeper[i].appendResponse.hs.xmin; - hs->ts = safekeeper[i].appendResponse.hs.ts; + hs->xmin = skhs->xmin; + hs->ts = skhs->ts; } - if (FullTransactionIdPrecedes(safekeeper[i].appendResponse.hs.catalog_xmin, hs->catalog_xmin)) + if (FullTransactionIdIsNormal(skhs->catalog_xmin) + && FullTransactionIdPrecedes(skhs->catalog_xmin, hs->xmin)) { - hs->catalog_xmin = safekeeper[i].appendResponse.hs.catalog_xmin; - hs->ts = safekeeper[i].appendResponse.hs.ts; + hs->catalog_xmin = skhs->catalog_xmin; + hs->ts = skhs->ts; } } } + + if (hs->xmin.value == ~0) + hs->xmin = InvalidFullTransactionId; + if (hs->catalog_xmin.value == ~0) + hs->catalog_xmin = InvalidFullTransactionId; } /* diff --git a/pgxn/neon_walredo/seccomp.c b/pgxn/neon_walredo/seccomp.c index 5d5ba549ef..1e8f6682a2 100644 --- a/pgxn/neon_walredo/seccomp.c +++ b/pgxn/neon_walredo/seccomp.c @@ -9,6 +9,14 @@ * To prevent this, it has been decided to limit possible interactions * with the outside world using the Secure Computing BPF mode. * + * This code is intended to support both x86_64 and aarch64. The latter + * doesn't implement some syscalls like open and select. We allow both + * select (absent on aarch64) and pselect6 (present on both architectures) + * We call select(2) through libc, and the libc wrapper calls select or pselect6 + * depending on the architecture. You can check which syscalls are present on + * different architectures with the `scmp_sys_resolver` tool from the + * seccomp package. + * * We use this mode to disable all syscalls not in the allowlist. This * approach has its pros & cons: * @@ -73,8 +81,6 @@ * I suspect that certain libc functions might involve slightly * different syscalls, e.g. select/pselect6/pselect6_time64/whatever. * - * - Test on any arch other than amd64 to see if it works there. - * *------------------------------------------------------------------------- */ @@ -122,9 +128,10 @@ seccomp_load_rules(PgSeccompRule *rules, int count) /* * First, check that open of a well-known file works. - * XXX: We use raw syscall() to call the very open(). + * XXX: We use raw syscall() to call the very openat() which is + * present both on x86_64 and on aarch64. */ - fd = syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0); + fd = syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0); if (seccomp_test_sighandler_done) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), @@ -135,15 +142,15 @@ seccomp_load_rules(PgSeccompRule *rules, int count) errmsg("seccomp: could not open /dev/null for seccomp testing: %m"))); close((int) fd); - /* Set a trap on open() to test seccomp bpf */ - rule = PG_SCMP(open, SCMP_ACT_TRAP); + /* Set a trap on openat() to test seccomp bpf */ + rule = PG_SCMP(openat, SCMP_ACT_TRAP); if (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), errmsg("seccomp: could not load test trap"))); - /* Finally, check that open() now raises SIGSYS */ - (void) syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0); + /* Finally, check that openat() now raises SIGSYS */ + (void) syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0); if (!seccomp_test_sighandler_done) ereport(FATAL, (errcode(ERRCODE_SYSTEM_ERROR), @@ -224,7 +231,7 @@ seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unus die(1, DIE_PREFIX "bad signal number\n"); /* TODO: maybe somehow extract the hardcoded syscall number */ - if (info->si_syscall != SCMP_SYS(open)) + if (info->si_syscall != SCMP_SYS(openat)) die(1, DIE_PREFIX "bad syscall number\n"); #undef DIE_PREFIX diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index add8b14c95..9d702b29c3 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -64,6 +64,7 @@ webpki-roots.workspace = true x509-parser.workspace = true workspace_hack.workspace = true +tokio-util.workspace = true [dev-dependencies] rcgen.workspace = true diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index b5efc72803..0465703ae6 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -1,8 +1,8 @@ use crate::{cancellation::CancelClosure, error::UserFacingError}; -use futures::TryFutureExt; +use futures::{FutureExt, TryFutureExt}; use itertools::Itertools; use pq_proto::StartupMessageParams; -use std::{io, net::SocketAddr}; +use std::{io, net::SocketAddr, time::Duration}; use thiserror::Error; use tokio::net::TcpStream; use tokio_postgres::NoTls; @@ -130,9 +130,23 @@ impl ConnCfg { async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> { use tokio_postgres::config::Host; + // wrap TcpStream::connect with timeout + let connect_with_timeout = |host, port| { + let connection_timeout = Duration::from_millis(10000); + tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map( + move |res| match res { + Ok(tcpstream_connect_res) => tcpstream_connect_res, + Err(_) => Err(io::Error::new( + io::ErrorKind::TimedOut, + format!("exceeded connection timeout {connection_timeout:?}"), + )), + }, + ) + }; + let connect_once = |host, port| { info!("trying to connect to compute node at {host}:{port}"); - TcpStream::connect((host, port)).and_then(|socket| async { + connect_with_timeout(host, port).and_then(|socket| async { let socket_addr = socket.peer_addr()?; // This prevents load balancer from severing the connection. socket2::SockRef::from(&socket).set_keepalive(true)?; @@ -165,7 +179,6 @@ impl ConnCfg { Host::Unix(_) => continue, // unix sockets are not welcome here }; - // TODO: maybe we should add a timeout. match connect_once(host, *port).await { Ok(socket) => return Ok(socket), Err(err) => { diff --git a/proxy/src/config.rs b/proxy/src/config.rs index ad51502b49..0ceb556ca1 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -40,7 +40,7 @@ pub fn configure_tls( let mut cert_resolver = CertResolver::new(); // add default certificate - cert_resolver.add_cert(key_path, cert_path)?; + cert_resolver.add_cert(key_path, cert_path, true)?; // add extra certificates if let Some(certs_dir) = certs_dir { @@ -52,8 +52,11 @@ pub fn configure_tls( let key_path = path.join("tls.key"); let cert_path = path.join("tls.crt"); if key_path.exists() && cert_path.exists() { - cert_resolver - .add_cert(&key_path.to_string_lossy(), &cert_path.to_string_lossy())?; + cert_resolver.add_cert( + &key_path.to_string_lossy(), + &cert_path.to_string_lossy(), + false, + )?; } } } @@ -78,16 +81,23 @@ pub fn configure_tls( struct CertResolver { certs: HashMap>, + default: Option>, } impl CertResolver { fn new() -> Self { Self { certs: HashMap::new(), + default: None, } } - fn add_cert(&mut self, key_path: &str, cert_path: &str) -> anyhow::Result<()> { + fn add_cert( + &mut self, + key_path: &str, + cert_path: &str, + is_default: bool, + ) -> anyhow::Result<()> { let priv_key = { let key_bytes = std::fs::read(key_path).context("TLS key file")?; let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..]) @@ -136,10 +146,13 @@ impl CertResolver { "Failed to parse common name from certificate at '{cert_path}'." ))?; - self.certs.insert( - common_name, - Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)), - ); + let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)); + + if is_default { + self.default = Some(cert.clone()); + } + + self.certs.insert(common_name, cert); Ok(()) } @@ -172,7 +185,17 @@ impl rustls::server::ResolvesServerCert for CertResolver { } } } else { - None + // No SNI, use the default certificate, otherwise we can't get to + // options parameter which can be used to set endpoint name too. + // That means that non-SNI flow will not work for CNAME domains in + // verify-full mode. + // + // If that will be a problem we can: + // + // a) Instead of multi-cert approach use single cert with extra + // domains listed in Subject Alternative Name (SAN). + // b) Deploy separate proxy instances for extra domains. + self.default.as_ref().cloned() } } } diff --git a/proxy/src/http/websocket.rs b/proxy/src/http/websocket.rs index 1757652a90..c7676e8e14 100644 --- a/proxy/src/http/websocket.rs +++ b/proxy/src/http/websocket.rs @@ -22,6 +22,7 @@ use tokio::{ io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf}, net::TcpListener, }; +use tokio_util::sync::CancellationToken; use tracing::{error, info, info_span, warn, Instrument}; use utils::http::{error::ApiError, json::json_response}; @@ -188,6 +189,7 @@ async fn ws_handler( pub async fn task_main( config: &'static ProxyConfig, ws_listener: TcpListener, + cancellation_token: CancellationToken, ) -> anyhow::Result<()> { scopeguard::defer! { info!("websocket server has shut down"); @@ -231,6 +233,7 @@ pub async fn task_main( hyper::Server::builder(accept::from_stream(tls_listener)) .serve(make_svc) + .with_graceful_shutdown(cancellation_token.cancelled()) .await?; Ok(()) diff --git a/proxy/src/main.rs b/proxy/src/main.rs index c6526e9aff..1fd13c9f68 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -28,6 +28,7 @@ use config::ProxyConfig; use futures::FutureExt; use std::{borrow::Cow, future::Future, net::SocketAddr}; use tokio::{net::TcpListener, task::JoinError}; +use tokio_util::sync::CancellationToken; use tracing::{info, warn}; use utils::{project_git_version, sentry_init::init_sentry}; @@ -66,39 +67,48 @@ async fn main() -> anyhow::Result<()> { let proxy_address: SocketAddr = args.get_one::("proxy").unwrap().parse()?; info!("Starting proxy on {proxy_address}"); let proxy_listener = TcpListener::bind(proxy_address).await?; + let cancellation_token = CancellationToken::new(); - let mut tasks = vec![ - tokio::spawn(handle_signals()), - tokio::spawn(http::server::task_main(http_listener)), - tokio::spawn(proxy::task_main(config, proxy_listener)), - tokio::spawn(console::mgmt::task_main(mgmt_listener)), - ]; + let mut client_tasks = vec![tokio::spawn(proxy::task_main( + config, + proxy_listener, + cancellation_token.clone(), + ))]; if let Some(wss_address) = args.get_one::("wss") { let wss_address: SocketAddr = wss_address.parse()?; info!("Starting wss on {wss_address}"); let wss_listener = TcpListener::bind(wss_address).await?; - tasks.push(tokio::spawn(http::websocket::task_main( + client_tasks.push(tokio::spawn(http::websocket::task_main( config, wss_listener, + cancellation_token.clone(), ))); } + let mut tasks = vec![ + tokio::spawn(handle_signals(cancellation_token)), + tokio::spawn(http::server::task_main(http_listener)), + tokio::spawn(console::mgmt::task_main(mgmt_listener)), + ]; + if let Some(metrics_config) = &config.metric_collection { tasks.push(tokio::spawn(metrics::task_main(metrics_config))); } - // This combinator will block until either all tasks complete or - // one of them finishes with an error (others will be cancelled). - let tasks = tasks.into_iter().map(flatten_err); - let _: Vec<()> = futures::future::try_join_all(tasks).await?; - + let tasks = futures::future::try_join_all(tasks.into_iter().map(flatten_err)); + let client_tasks = futures::future::try_join_all(client_tasks.into_iter().map(flatten_err)); + tokio::select! { + // We are only expecting an error from these forever tasks + res = tasks => { res?; }, + res = client_tasks => { res?; }, + } Ok(()) } /// Handle unix signals appropriately. -async fn handle_signals() -> anyhow::Result<()> { +async fn handle_signals(token: CancellationToken) -> anyhow::Result<()> { use tokio::signal::unix::{signal, SignalKind}; let mut hangup = signal(SignalKind::hangup())?; @@ -116,11 +126,9 @@ async fn handle_signals() -> anyhow::Result<()> { warn!("received SIGINT, exiting immediately"); bail!("interrupted"); } - // TODO: Don't accept new proxy connections. - // TODO: Shut down once all exisiting connections have been closed. _ = terminate.recv() => { - warn!("received SIGTERM, exiting immediately"); - bail!("terminated"); + warn!("received SIGTERM, shutting down once all existing connections have closed"); + token.cancel(); } } } diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index 445c2e930c..6ae1e3a447 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -95,7 +95,7 @@ fn gather_proxy_io_bytes_per_client() -> Vec<(Ids, (u64, DateTime))> { current_metrics.push(( Ids { endpoint_id: endpoint_id.to_string(), - branch_id: "".to_string(), + branch_id: branch_id.to_string(), }, (value, Utc::now()), )); diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 70fb25474e..1169d76160 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -17,6 +17,7 @@ use once_cell::sync::Lazy; use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams}; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; +use tokio_util::sync::CancellationToken; use tracing::{error, info, warn}; use utils::measured_stream::MeasuredStream; @@ -63,6 +64,7 @@ static NUM_BYTES_PROXIED_COUNTER: Lazy = Lazy::new(|| { pub async fn task_main( config: &'static ProxyConfig, listener: tokio::net::TcpListener, + cancellation_token: CancellationToken, ) -> anyhow::Result<()> { scopeguard::defer! { info!("proxy has shut down"); @@ -72,29 +74,48 @@ pub async fn task_main( // will be inherited by all accepted client sockets. socket2::SockRef::from(&listener).set_keepalive(true)?; + let mut connections = tokio::task::JoinSet::new(); let cancel_map = Arc::new(CancelMap::default()); + loop { - let (socket, peer_addr) = listener.accept().await?; - info!("accepted postgres client connection from {peer_addr}"); + tokio::select! { + accept_result = listener.accept() => { + let (socket, peer_addr) = accept_result?; + info!("accepted postgres client connection from {peer_addr}"); - let session_id = uuid::Uuid::new_v4(); - let cancel_map = Arc::clone(&cancel_map); - tokio::spawn( - async move { - info!("spawned a task for {peer_addr}"); + let session_id = uuid::Uuid::new_v4(); + let cancel_map = Arc::clone(&cancel_map); + connections.spawn( + async move { + info!("spawned a task for {peer_addr}"); - socket - .set_nodelay(true) - .context("failed to set socket option")?; + socket + .set_nodelay(true) + .context("failed to set socket option")?; - handle_client(config, &cancel_map, session_id, socket).await + handle_client(config, &cancel_map, session_id, socket).await + } + .unwrap_or_else(move |e| { + // Acknowledge that the task has finished with an error. + error!(?session_id, "per-client task finished with an error: {e:#}"); + }), + ); } - .unwrap_or_else(|e| { - // Acknowledge that the task has finished with an error. - error!("per-client task finished with an error: {e:#}"); - }), - ); + _ = cancellation_token.cancelled() => { + drop(listener); + break; + } + } } + // Drain connections + while let Some(res) = connections.join_next().await { + if let Err(e) = res { + if !e.is_panic() && !e.is_cancelled() { + warn!("unexpected error from joined connection task: {e:?}"); + } + } + } + Ok(()) } // TODO(tech debt): unite this with its twin below. diff --git a/run_clippy.sh b/run_clippy.sh index 9adfddedc2..ae2a17ec0c 100755 --- a/run_clippy.sh +++ b/run_clippy.sh @@ -1,4 +1,5 @@ -#!/bin/bash +#!/usr/bin/env bash +set -euo pipefail # If you save this in your path under the name "cargo-zclippy" (or whatever # name you like), then you can run it as "cargo zclippy" from the shell prompt. @@ -8,7 +9,11 @@ # warnings and errors right in the editor. # In vscode, this setting is Rust-analyzer>Check On Save:Command -# * `-A unknown_lints` – do not warn about unknown lint suppressions -# that people with newer toolchains might use -# * `-D warnings` - fail on any warnings (`cargo` returns non-zero exit status) -cargo clippy --locked --all --all-targets --all-features -- -A unknown_lints -D warnings +# NB: the CI runs the full feature powerset, so, it catches slightly more errors +# at the expense of longer runtime. This script is used by developers, so, don't +# do that here. + +thisscript="${BASH_SOURCE[0]}" +thisscript_dir="$(dirname "$thisscript")" +CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")" +exec cargo clippy --all-features $CLIPPY_COMMON_ARGS diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index ace921a26d..3699a2a74c 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -134,7 +134,10 @@ fn main() -> anyhow::Result<()> { // 1. init logging // 2. tracing panic hook // 3. sentry - logging::init(LogFormat::from_config(&args.log_format)?)?; + logging::init( + LogFormat::from_config(&args.log_format)?, + logging::TracingErrorLayerEnablement::Disabled, + )?; logging::replace_panic_hook_with_tracing_panic_hook().forget(); info!("version: {GIT_VERSION}"); diff --git a/safekeeper/src/broker.rs b/safekeeper/src/broker.rs index 92f35bf51f..6a98d8fd84 100644 --- a/safekeeper/src/broker.rs +++ b/safekeeper/src/broker.rs @@ -91,7 +91,7 @@ async fn pull_loop(conf: SafeKeeperConf) -> Result<()> { // connection to the broker. // note: there are blocking operations below, but it's considered fine for now - tli.record_safekeeper_info(&msg).await? + tli.record_safekeeper_info(msg).await? } } bail!("end of stream"); diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 674cf9f6eb..f711c4429d 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -9,9 +9,10 @@ use std::path::PathBuf; use anyhow::Result; use chrono::{DateTime, Utc}; use postgres_ffi::XLogSegNo; +use serde::Deserialize; use serde::Serialize; -use utils::http::json::display_serialize; +use serde_with::{serde_as, DisplayFromStr}; use utils::id::NodeId; use utils::id::TenantTimelineId; use utils::id::{TenantId, TimelineId}; @@ -22,11 +23,11 @@ use crate::safekeeper::SafekeeperMemState; use crate::safekeeper::TermHistory; use crate::SafeKeeperConf; -use crate::timeline::ReplicaState; +use crate::send_wal::WalSenderState; use crate::GlobalTimelines; /// Various filters that influence the resulting JSON output. -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Args { /// Dump all available safekeeper state. False by default. pub dump_all: bool, @@ -51,7 +52,7 @@ pub struct Args { } /// Response for debug dump request. -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Response { pub start_time: DateTime, pub finish_time: DateTime, @@ -61,7 +62,7 @@ pub struct Response { } /// Safekeeper configuration. -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Config { pub id: NodeId, pub workdir: PathBuf, @@ -72,22 +73,23 @@ pub struct Config { pub wal_backup_enabled: bool, } -#[derive(Debug, Serialize)] +#[serde_as] +#[derive(Debug, Serialize, Deserialize)] pub struct Timeline { - #[serde(serialize_with = "display_serialize")] + #[serde_as(as = "DisplayFromStr")] pub tenant_id: TenantId, - #[serde(serialize_with = "display_serialize")] + #[serde_as(as = "DisplayFromStr")] pub timeline_id: TimelineId, pub control_file: Option, pub memory: Option, pub disk_content: Option, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Memory { pub is_cancelled: bool, pub peers_info_len: usize, - pub replicas: Vec>, + pub walsenders: Vec, pub wal_backup_active: bool, pub active: bool, pub num_computes: u32, @@ -102,12 +104,12 @@ pub struct Memory { pub file_open: bool, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct DiskContent { pub files: Vec, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct FileInfo { pub name: String, pub size: u64, diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index a589fe1869..2c3d1cea0e 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -3,6 +3,7 @@ use anyhow::Context; use std::str; +use std::str::FromStr; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::{info, info_span, Instrument}; @@ -49,12 +50,14 @@ fn parse_cmd(cmd: &str) -> anyhow::Result { if cmd.starts_with("START_WAL_PUSH") { Ok(SafekeeperPostgresCommand::StartWalPush) } else if cmd.starts_with("START_REPLICATION") { - let re = - Regex::new(r"START_REPLICATION(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)").unwrap(); + let re = Regex::new( + r"START_REPLICATION(?: SLOT [^ ]+)?(?: PHYSICAL)? ([[:xdigit:]]+/[[:xdigit:]]+)", + ) + .unwrap(); let mut caps = re.captures_iter(cmd); let start_lsn = caps .next() - .map(|cap| cap[1].parse::()) + .map(|cap| Lsn::from_str(&cap[1])) .context("parse start LSN from START_REPLICATION command")??; Ok(SafekeeperPostgresCommand::StartReplication { start_lsn }) } else if cmd.starts_with("IDENTIFY_SYSTEM") { diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index cdec45c148..eeb08d2733 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -3,7 +3,8 @@ use hyper::{Body, Request, Response, StatusCode, Uri}; use once_cell::sync::Lazy; use postgres_ffi::WAL_SEGMENT_SIZE; use safekeeper_api::models::SkTimelineInfo; -use serde::Serialize; +use serde::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; use std::collections::{HashMap, HashSet}; use std::fmt; use std::str::FromStr; @@ -11,7 +12,6 @@ use std::sync::Arc; use storage_broker::proto::SafekeeperTimelineInfo; use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; use tokio::task::JoinError; -use utils::http::json::display_serialize; use crate::debug_dump; use crate::safekeeper::ServerInfo; @@ -57,44 +57,46 @@ fn get_conf(request: &Request) -> &SafeKeeperConf { /// Same as TermSwitchEntry, but serializes LSN using display serializer /// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response. -#[derive(Debug, Serialize)] -struct TermSwitchApiEntry { +#[serde_as] +#[derive(Debug, Serialize, Deserialize)] +pub struct TermSwitchApiEntry { pub term: Term, - #[serde(serialize_with = "display_serialize")] + #[serde_as(as = "DisplayFromStr")] pub lsn: Lsn, } /// Augment AcceptorState with epoch for convenience -#[derive(Debug, Serialize)] -struct AcceptorStateStatus { - term: Term, - epoch: Term, - term_history: Vec, +#[derive(Debug, Serialize, Deserialize)] +pub struct AcceptorStateStatus { + pub term: Term, + pub epoch: Term, + pub term_history: Vec, } /// Info about timeline on safekeeper ready for reporting. -#[derive(Debug, Serialize)] -struct TimelineStatus { - #[serde(serialize_with = "display_serialize")] - tenant_id: TenantId, - #[serde(serialize_with = "display_serialize")] - timeline_id: TimelineId, - acceptor_state: AcceptorStateStatus, - pg_info: ServerInfo, - #[serde(serialize_with = "display_serialize")] - flush_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - timeline_start_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - local_start_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - commit_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - backup_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - peer_horizon_lsn: Lsn, - #[serde(serialize_with = "display_serialize")] - remote_consistent_lsn: Lsn, +#[serde_as] +#[derive(Debug, Serialize, Deserialize)] +pub struct TimelineStatus { + #[serde_as(as = "DisplayFromStr")] + pub tenant_id: TenantId, + #[serde_as(as = "DisplayFromStr")] + pub timeline_id: TimelineId, + pub acceptor_state: AcceptorStateStatus, + pub pg_info: ServerInfo, + #[serde_as(as = "DisplayFromStr")] + pub flush_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub timeline_start_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub local_start_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub commit_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub backup_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub peer_horizon_lsn: Lsn, + #[serde_as(as = "DisplayFromStr")] + pub remote_consistent_lsn: Lsn, } fn check_permission(request: &Request, tenant_id: Option) -> Result<(), ApiError> { @@ -144,7 +146,7 @@ async fn timeline_status_handler(request: Request) -> Result) -> Result, term: Term, lsn: Lsn) -> anyhow::R Ok(()) } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize)] pub struct InsertedWAL { begin_lsn: Lsn, pub end_lsn: Lsn, diff --git a/safekeeper/src/metrics.rs b/safekeeper/src/metrics.rs index 2aaa17bfc5..eafee557d7 100644 --- a/safekeeper/src/metrics.rs +++ b/safekeeper/src/metrics.rs @@ -15,11 +15,11 @@ use metrics::{ use once_cell::sync::Lazy; use postgres_ffi::XLogSegNo; +use utils::pageserver_feedback::PageserverFeedback; use utils::{id::TenantTimelineId, lsn::Lsn}; use crate::{ safekeeper::{SafeKeeperState, SafekeeperMemState}, - timeline::ReplicaState, GlobalTimelines, }; @@ -231,7 +231,7 @@ pub fn time_io_closure(closure: impl FnOnce() -> Result<()>) -> Result { /// Metrics for a single timeline. pub struct FullTimelineInfo { pub ttid: TenantTimelineId, - pub replicas: Vec, + pub ps_feedback: PageserverFeedback, pub wal_backup_active: bool, pub timeline_is_active: bool, pub num_computes: u32, @@ -242,6 +242,7 @@ pub struct FullTimelineInfo { pub persisted_state: SafeKeeperState, pub flush_lsn: Lsn, + pub remote_consistent_lsn: Lsn, pub wal_storage: WalStorageMetrics, } @@ -514,19 +515,6 @@ impl Collector for TimelineCollector { let timeline_id = tli.ttid.timeline_id.to_string(); let labels = &[tenant_id.as_str(), timeline_id.as_str()]; - let mut most_advanced: Option = None; - for replica in tli.replicas.iter() { - if let Some(replica_feedback) = replica.pageserver_feedback { - if let Some(current) = most_advanced { - if current.last_received_lsn < replica_feedback.last_received_lsn { - most_advanced = Some(replica_feedback); - } - } else { - most_advanced = Some(replica_feedback); - } - } - } - self.commit_lsn .with_label_values(labels) .set(tli.mem_state.commit_lsn.into()); @@ -544,7 +532,7 @@ impl Collector for TimelineCollector { .set(tli.mem_state.peer_horizon_lsn.into()); self.remote_consistent_lsn .with_label_values(labels) - .set(tli.mem_state.remote_consistent_lsn.into()); + .set(tli.remote_consistent_lsn.into()); self.timeline_active .with_label_values(labels) .set(tli.timeline_is_active as u64); @@ -567,15 +555,17 @@ impl Collector for TimelineCollector { .with_label_values(labels) .set(tli.wal_storage.flush_wal_seconds); - if let Some(feedback) = most_advanced { - self.ps_last_received_lsn + self.ps_last_received_lsn + .with_label_values(labels) + .set(tli.ps_feedback.last_received_lsn.0); + if let Ok(unix_time) = tli + .ps_feedback + .replytime + .duration_since(SystemTime::UNIX_EPOCH) + { + self.feedback_last_time_seconds .with_label_values(labels) - .set(feedback.last_received_lsn); - if let Ok(unix_time) = feedback.replytime.duration_since(SystemTime::UNIX_EPOCH) { - self.feedback_last_time_seconds - .with_label_values(labels) - .set(unix_time.as_secs()); - } + .set(unix_time.as_secs()); } if tli.last_removed_segno != 0 { diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index 61e4c5f0fa..195470e3ca 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -27,6 +27,8 @@ use tokio::sync::mpsc::error::TryRecvError; use tokio::sync::mpsc::Receiver; use tokio::sync::mpsc::Sender; use tokio::task::spawn_blocking; +use tokio::time::Duration; +use tokio::time::Instant; use tracing::*; use utils::id::TenantTimelineId; use utils::lsn::Lsn; @@ -206,6 +208,10 @@ async fn network_write( } } +// Send keepalive messages to walproposer, to make sure it receives updates +// even when it writes a steady stream of messages. +const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1); + /// Takes messages from msg_rx, processes and pushes replies to reply_tx. struct WalAcceptor { tli: Arc, @@ -253,18 +259,25 @@ impl WalAcceptor { timeline: Arc::clone(&self.tli), }; - let mut next_msg: ProposerAcceptorMessage; + // After this timestamp we will stop processing AppendRequests and send a response + // to the walproposer. walproposer sends at least one AppendRequest per second, + // we will send keepalives by replying to these requests once per second. + let mut next_keepalive = Instant::now(); loop { let opt_msg = self.msg_rx.recv().await; if opt_msg.is_none() { return Ok(()); // chan closed, streaming terminated } - next_msg = opt_msg.unwrap(); + let mut next_msg = opt_msg.unwrap(); - if matches!(next_msg, ProposerAcceptorMessage::AppendRequest(_)) { + let reply_msg = if matches!(next_msg, ProposerAcceptorMessage::AppendRequest(_)) { // loop through AppendRequest's while it's readily available to // write as many WAL as possible without fsyncing + // + // Note: this will need to be rewritten if we want to read non-AppendRequest messages here. + // Otherwise, we might end up in a situation where we read a message, but don't + // process it. while let ProposerAcceptorMessage::AppendRequest(append_request) = next_msg { let noflush_msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request); @@ -274,6 +287,11 @@ impl WalAcceptor { } } + // get out of this loop if keepalive time is reached + if Instant::now() >= next_keepalive { + break; + } + match self.msg_rx.try_recv() { Ok(msg) => next_msg = msg, Err(TryRecvError::Empty) => break, @@ -282,18 +300,18 @@ impl WalAcceptor { } // flush all written WAL to the disk - if let Some(reply) = self.tli.process_msg(&ProposerAcceptorMessage::FlushWAL)? { - if self.reply_tx.send(reply).await.is_err() { - return Ok(()); // chan closed, streaming terminated - } - } + self.tli.process_msg(&ProposerAcceptorMessage::FlushWAL)? } else { // process message other than AppendRequest - if let Some(reply) = self.tli.process_msg(&next_msg)? { - if self.reply_tx.send(reply).await.is_err() { - return Ok(()); // chan closed, streaming terminated - } + self.tli.process_msg(&next_msg)? + }; + + if let Some(reply) = reply_msg { + if self.reply_tx.send(reply).await.is_err() { + return Ok(()); // chan closed, streaming terminated } + // reset keepalive time + next_keepalive = Instant::now() + KEEPALIVE_INTERVAL; } } } diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 10b4842cbd..33da0c8e5a 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -18,7 +18,8 @@ use crate::control_file; use crate::send_wal::HotStandbyFeedback; use crate::wal_storage; -use pq_proto::{PageserverFeedback, SystemId}; +use pq_proto::SystemId; +use utils::pageserver_feedback::PageserverFeedback; use utils::{ bin_ser::LeSer, id::{NodeId, TenantId, TenantTimelineId, TimelineId}, @@ -205,14 +206,13 @@ pub struct SafeKeeperState { pub peers: PersistedPeers, } -#[derive(Debug, Clone, Serialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] // In memory safekeeper state. Fields mirror ones in `SafeKeeperState`; values // are not flushed yet. pub struct SafekeeperMemState { pub commit_lsn: Lsn, pub backup_lsn: Lsn, pub peer_horizon_lsn: Lsn, - pub remote_consistent_lsn: Lsn, #[serde(with = "hex")] pub proposer_uuid: PgUuid, } @@ -347,7 +347,7 @@ pub struct AppendRequestHeader { } /// Report safekeeper state to proposer -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize)] pub struct AppendResponse { // Current term of the safekeeper; if it is higher than proposer's, the // compute is out of date. @@ -540,7 +540,6 @@ where commit_lsn: state.commit_lsn, backup_lsn: state.backup_lsn, peer_horizon_lsn: state.peer_horizon_lsn, - remote_consistent_lsn: state.remote_consistent_lsn, proposer_uuid: state.proposer_uuid, }, state, @@ -781,10 +780,6 @@ where // Initializing backup_lsn is useful to avoid making backup think it should upload 0 segment. self.inmem.backup_lsn = max(self.inmem.backup_lsn, state.timeline_start_lsn); - // Initializing remote_consistent_lsn sets that we have nothing to - // stream to pageserver(s) immediately after creation. - self.inmem.remote_consistent_lsn = - max(self.inmem.remote_consistent_lsn, state.timeline_start_lsn); state.acceptor_state.term_history = msg.term_history.clone(); self.persist_control_file(state)?; @@ -837,7 +832,6 @@ where state.commit_lsn = self.inmem.commit_lsn; state.backup_lsn = self.inmem.backup_lsn; state.peer_horizon_lsn = self.inmem.peer_horizon_lsn; - state.remote_consistent_lsn = self.inmem.remote_consistent_lsn; state.proposer_uuid = self.inmem.proposer_uuid; self.state.persist(&state) } @@ -940,14 +934,12 @@ where self.state.backup_lsn + (self.state.server.wal_seg_size as u64) < new_backup_lsn; self.inmem.backup_lsn = new_backup_lsn; - let new_remote_consistent_lsn = max( - Lsn(sk_info.remote_consistent_lsn), - self.inmem.remote_consistent_lsn, - ); + // value in sk_info should be maximized over our local in memory value. + let new_remote_consistent_lsn = Lsn(sk_info.remote_consistent_lsn); + assert!(self.state.remote_consistent_lsn <= new_remote_consistent_lsn); sync_control_file |= self.state.remote_consistent_lsn + (self.state.server.wal_seg_size as u64) < new_remote_consistent_lsn; - self.inmem.remote_consistent_lsn = new_remote_consistent_lsn; let new_peer_horizon_lsn = max(Lsn(sk_info.peer_horizon_lsn), self.inmem.peer_horizon_lsn); sync_control_file |= self.state.peer_horizon_lsn + (self.state.server.wal_seg_size as u64) @@ -955,7 +947,12 @@ where self.inmem.peer_horizon_lsn = new_peer_horizon_lsn; if sync_control_file { - self.persist_control_file(self.state.clone())?; + let mut state = self.state.clone(); + // Note: we do not persist remote_consistent_lsn in other paths of + // persisting cf -- that is not much needed currently. We could do + // that by storing Arc to walsenders in Safekeeper. + state.remote_consistent_lsn = new_remote_consistent_lsn; + self.persist_control_file(state)?; } Ok(()) } diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index a6ca89efa4..6b303eb0fe 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -1,21 +1,28 @@ //! This module implements the streaming side of replication protocol, starting -//! with the "START_REPLICATION" message. +//! with the "START_REPLICATION" message, and registry of walsenders. use crate::handler::SafekeeperPostgresHandler; -use crate::timeline::{ReplicaState, Timeline}; +use crate::timeline::Timeline; +use crate::wal_service::ConnectionId; use crate::wal_storage::WalReader; use crate::GlobalTimelines; use anyhow::Context as AnyhowContext; use bytes::Bytes; +use parking_lot::Mutex; use postgres_backend::PostgresBackend; use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError}; use postgres_ffi::get_current_timestamp; use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; -use pq_proto::{BeMessage, PageserverFeedback, WalSndKeepAlive, XLogDataBody}; +use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; use serde::{Deserialize, Serialize}; +use serde_with::{serde_as, DisplayFromStr}; use tokio::io::{AsyncRead, AsyncWrite}; +use utils::id::TenantTimelineId; +use utils::lsn::AtomicLsn; +use utils::pageserver_feedback::PageserverFeedback; -use std::cmp::min; +use std::cmp::{max, min}; +use std::net::SocketAddr; use std::str; use std::sync::Arc; use std::time::Duration; @@ -40,6 +47,8 @@ pub struct HotStandbyFeedback { pub catalog_xmin: FullTransactionId, } +const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0; + impl HotStandbyFeedback { pub fn empty() -> HotStandbyFeedback { HotStandbyFeedback { @@ -51,24 +60,294 @@ impl HotStandbyFeedback { } /// Standby status update -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct StandbyReply { - pub write_lsn: Lsn, // last lsn received by pageserver - pub flush_lsn: Lsn, // pageserver's disk consistent lSN - pub apply_lsn: Lsn, // pageserver's remote consistent lSN - pub reply_ts: TimestampTz, + pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby. + pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby. + pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby. + pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01. pub reply_requested: bool, } -/// Scope guard to unregister replication connection from timeline -struct ReplicationConnGuard { - replica: usize, // replica internal ID assigned by timeline - timeline: Arc, +impl StandbyReply { + fn empty() -> Self { + StandbyReply { + write_lsn: Lsn::INVALID, + flush_lsn: Lsn::INVALID, + apply_lsn: Lsn::INVALID, + reply_ts: 0, + reply_requested: false, + } + } } -impl Drop for ReplicationConnGuard { +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct StandbyFeedback { + reply: StandbyReply, + hs_feedback: HotStandbyFeedback, +} + +/// WalSenders registry. Timeline holds it (wrapped in Arc). +pub struct WalSenders { + /// Lsn maximized over all walsenders *and* peer data, so might be higher + /// than what we receive from replicas. + remote_consistent_lsn: AtomicLsn, + mutex: Mutex, +} + +impl WalSenders { + pub fn new(remote_consistent_lsn: Lsn) -> Arc { + Arc::new(WalSenders { + remote_consistent_lsn: AtomicLsn::from(remote_consistent_lsn), + mutex: Mutex::new(WalSendersShared::new()), + }) + } + + /// Register new walsender. Returned guard provides access to the slot and + /// automatically deregisters in Drop. + fn register( + self: &Arc, + ttid: TenantTimelineId, + addr: SocketAddr, + conn_id: ConnectionId, + appname: Option, + ) -> WalSenderGuard { + let slots = &mut self.mutex.lock().slots; + let walsender_state = WalSenderState { + ttid, + addr, + conn_id, + appname, + feedback: ReplicationFeedback::Pageserver(PageserverFeedback::empty()), + }; + // find empty slot or create new one + let pos = if let Some(pos) = slots.iter().position(|s| s.is_none()) { + slots[pos] = Some(walsender_state); + pos + } else { + let pos = slots.len(); + slots.push(Some(walsender_state)); + pos + }; + WalSenderGuard { + id: pos, + walsenders: self.clone(), + } + } + + /// Get state of all walsenders. + pub fn get_all(self: &Arc) -> Vec { + self.mutex.lock().slots.iter().flatten().cloned().collect() + } + + /// Get aggregated pageserver feedback. + pub fn get_ps_feedback(self: &Arc) -> PageserverFeedback { + self.mutex.lock().agg_ps_feedback + } + + /// Get aggregated pageserver and hot standby feedback (we send them to compute). + pub fn get_feedbacks(self: &Arc) -> (PageserverFeedback, HotStandbyFeedback) { + let shared = self.mutex.lock(); + (shared.agg_ps_feedback, shared.agg_hs_feedback) + } + + /// Record new pageserver feedback, update aggregated values. + fn record_ps_feedback(self: &Arc, id: WalSenderId, feedback: &PageserverFeedback) { + let mut shared = self.mutex.lock(); + shared.get_slot_mut(id).feedback = ReplicationFeedback::Pageserver(*feedback); + shared.update_ps_feedback(); + self.update_remote_consistent_lsn(shared.agg_ps_feedback.remote_consistent_lsn); + } + + /// Record standby reply. + fn record_standby_reply(self: &Arc, id: WalSenderId, reply: &StandbyReply) { + let mut shared = self.mutex.lock(); + let slot = shared.get_slot_mut(id); + match &mut slot.feedback { + ReplicationFeedback::Standby(sf) => sf.reply = *reply, + ReplicationFeedback::Pageserver(_) => { + slot.feedback = ReplicationFeedback::Standby(StandbyFeedback { + reply: *reply, + hs_feedback: HotStandbyFeedback::empty(), + }) + } + } + } + + /// Record hot standby feedback, update aggregated value. + fn record_hs_feedback(self: &Arc, id: WalSenderId, feedback: &HotStandbyFeedback) { + let mut shared = self.mutex.lock(); + let slot = shared.get_slot_mut(id); + match &mut slot.feedback { + ReplicationFeedback::Standby(sf) => sf.hs_feedback = *feedback, + ReplicationFeedback::Pageserver(_) => { + slot.feedback = ReplicationFeedback::Standby(StandbyFeedback { + reply: StandbyReply::empty(), + hs_feedback: *feedback, + }) + } + } + shared.update_hs_feedback(); + } + + /// Get remote_consistent_lsn reported by the pageserver. Returns None if + /// client is not pageserver. + fn get_ws_remote_consistent_lsn(self: &Arc, id: WalSenderId) -> Option { + let shared = self.mutex.lock(); + let slot = shared.get_slot(id); + match slot.feedback { + ReplicationFeedback::Pageserver(feedback) => Some(feedback.remote_consistent_lsn), + _ => None, + } + } + + /// Get remote_consistent_lsn maximized across all walsenders and peers. + pub fn get_remote_consistent_lsn(self: &Arc) -> Lsn { + self.remote_consistent_lsn.load() + } + + /// Update maximized remote_consistent_lsn, return new (potentially) value. + pub fn update_remote_consistent_lsn(self: &Arc, candidate: Lsn) -> Lsn { + self.remote_consistent_lsn + .fetch_max(candidate) + .max(candidate) + } + + /// Unregister walsender. + fn unregister(self: &Arc, id: WalSenderId) { + let mut shared = self.mutex.lock(); + shared.slots[id] = None; + shared.update_hs_feedback(); + } +} + +struct WalSendersShared { + // aggregated over all walsenders value + agg_hs_feedback: HotStandbyFeedback, + // aggregated over all walsenders value + agg_ps_feedback: PageserverFeedback, + slots: Vec>, +} + +impl WalSendersShared { + fn new() -> Self { + WalSendersShared { + agg_hs_feedback: HotStandbyFeedback::empty(), + agg_ps_feedback: PageserverFeedback::empty(), + slots: Vec::new(), + } + } + + /// Get content of provided id slot, it must exist. + fn get_slot(&self, id: WalSenderId) -> &WalSenderState { + self.slots[id].as_ref().expect("walsender doesn't exist") + } + + /// Get mut content of provided id slot, it must exist. + fn get_slot_mut(&mut self, id: WalSenderId) -> &mut WalSenderState { + self.slots[id].as_mut().expect("walsender doesn't exist") + } + + /// Update aggregated hot standy feedback. We just take min of valid xmins + /// and ts. + fn update_hs_feedback(&mut self) { + let mut agg = HotStandbyFeedback::empty(); + for ws_state in self.slots.iter().flatten() { + if let ReplicationFeedback::Standby(standby_feedback) = ws_state.feedback { + let hs_feedback = standby_feedback.hs_feedback; + // doing Option math like op1.iter().chain(op2.iter()).min() + // would be nicer, but we serialize/deserialize this struct + // directly, so leave as is for now + if hs_feedback.xmin != INVALID_FULL_TRANSACTION_ID { + if agg.xmin != INVALID_FULL_TRANSACTION_ID { + agg.xmin = min(agg.xmin, hs_feedback.xmin); + } else { + agg.xmin = hs_feedback.xmin; + } + agg.ts = min(agg.ts, hs_feedback.ts); + } + if hs_feedback.catalog_xmin != INVALID_FULL_TRANSACTION_ID { + if agg.catalog_xmin != INVALID_FULL_TRANSACTION_ID { + agg.catalog_xmin = min(agg.catalog_xmin, hs_feedback.catalog_xmin); + } else { + agg.catalog_xmin = hs_feedback.catalog_xmin; + } + agg.ts = min(agg.ts, hs_feedback.ts); + } + } + } + self.agg_hs_feedback = agg; + } + + /// Update aggregated pageserver feedback. LSNs (last_received, + /// disk_consistent, remote_consistent) and reply timestamp are just + /// maximized; timeline_size if taken from feedback with highest + /// last_received lsn. This is generally reasonable, but we might want to + /// implement other policies once multiple pageservers start to be actively + /// used. + fn update_ps_feedback(&mut self) { + let init = PageserverFeedback::empty(); + let acc = + self.slots + .iter() + .flatten() + .fold(init, |mut acc, ws_state| match ws_state.feedback { + ReplicationFeedback::Pageserver(feedback) => { + if feedback.last_received_lsn > acc.last_received_lsn { + acc.current_timeline_size = feedback.current_timeline_size; + } + acc.last_received_lsn = + max(feedback.last_received_lsn, acc.last_received_lsn); + acc.disk_consistent_lsn = + max(feedback.disk_consistent_lsn, acc.disk_consistent_lsn); + acc.remote_consistent_lsn = + max(feedback.remote_consistent_lsn, acc.remote_consistent_lsn); + acc.replytime = max(feedback.replytime, acc.replytime); + acc + } + ReplicationFeedback::Standby(_) => acc, + }); + self.agg_ps_feedback = acc; + } +} + +// Serialized is used only for pretty printing in json. +#[serde_as] +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WalSenderState { + #[serde_as(as = "DisplayFromStr")] + ttid: TenantTimelineId, + addr: SocketAddr, + conn_id: ConnectionId, + // postgres application_name + appname: Option, + feedback: ReplicationFeedback, +} + +// Receiver is either pageserver or regular standby, which have different +// feedbacks. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +enum ReplicationFeedback { + Pageserver(PageserverFeedback), + Standby(StandbyFeedback), +} + +// id of the occupied slot in WalSenders to access it (and save in the +// WalSenderGuard). We could give Arc directly to the slot, but there is not +// much sense in that as values aggregation which is performed on each feedback +// receival iterates over all walsenders. +pub type WalSenderId = usize; + +/// Scope guard to access slot in WalSenders registry and unregister from it in +/// Drop. +pub struct WalSenderGuard { + id: WalSenderId, + walsenders: Arc, +} + +impl Drop for WalSenderGuard { fn drop(&mut self) { - self.timeline.remove_replica(self.replica); + self.walsenders.unregister(self.id); } } @@ -97,16 +376,13 @@ impl SafekeeperPostgresHandler { let tli = GlobalTimelines::get(self.ttid).map_err(|e| CopyStreamHandlerEnd::Other(e.into()))?; - let state = ReplicaState::new(); - // This replica_id is used below to check if it's time to stop replication. - let replica_id = tli.add_replica(state); - - // Use a guard object to remove our entry from the timeline, when the background - // thread and us have both finished using it. - let _guard = Arc::new(ReplicationConnGuard { - replica: replica_id, - timeline: tli.clone(), - }); + // Use a guard object to remove our entry from the timeline when we are done. + let ws_guard = Arc::new(tli.get_walsenders().register( + self.ttid, + *pgb.get_peer_addr(), + self.conn_id, + self.appname.clone(), + )); // Walproposer gets special handling: safekeeper must give proposer all // local WAL till the end, whether committed or not (walproposer will @@ -154,16 +430,11 @@ impl SafekeeperPostgresHandler { end_pos, stop_pos, commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(), - replica_id, + ws_guard: ws_guard.clone(), wal_reader, send_buf: [0; MAX_SEND_SIZE], }; - let mut reply_reader = ReplyReader { - reader, - tli, - replica_id, - feedback: ReplicaState::new(), - }; + let mut reply_reader = ReplyReader { reader, ws_guard }; let res = tokio::select! { // todo: add read|write .context to these errors @@ -190,7 +461,7 @@ struct WalSender<'a, IO> { // in recovery. stop_pos: Option, commit_lsn_watch_rx: Receiver, - replica_id: usize, + ws_guard: Arc, wal_reader: WalReader, // buffer for readling WAL into to send it send_buf: [u8; MAX_SEND_SIZE], @@ -264,14 +535,20 @@ impl WalSender<'_, IO> { return Ok(()); } // Timed out waiting for WAL, check for termination and send KA - if self.tli.should_walsender_stop(self.replica_id) { - // Terminate if there is nothing more to send. - // TODO close the stream properly - return Err(CopyStreamHandlerEnd::ServerInitiated(format!( - "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", - self.appname, self.start_pos, - ))); + if let Some(remote_consistent_lsn) = self + .ws_guard + .walsenders + .get_ws_remote_consistent_lsn(self.ws_guard.id) + { + if self.tli.should_walsender_stop(remote_consistent_lsn) { + // Terminate if there is nothing more to send. + return Err(CopyStreamHandlerEnd::ServerInitiated(format!( + "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", + self.appname, self.start_pos, + ))); + } } + self.pgb .write_message(&BeMessage::KeepAlive(WalSndKeepAlive { sent_ptr: self.end_pos.0, @@ -286,9 +563,7 @@ impl WalSender<'_, IO> { /// A half driving receiving replies. struct ReplyReader { reader: PostgresBackendReader, - tli: Arc, - replica_id: usize, - feedback: ReplicaState, + ws_guard: Arc, } impl ReplyReader { @@ -303,29 +578,32 @@ impl ReplyReader { match msg.first().cloned() { Some(HOT_STANDBY_FEEDBACK_TAG_BYTE) => { // Note: deserializing is on m[1..] because we skip the tag byte. - self.feedback.hs_feedback = HotStandbyFeedback::des(&msg[1..]) + let hs_feedback = HotStandbyFeedback::des(&msg[1..]) .context("failed to deserialize HotStandbyFeedback")?; - self.tli - .update_replica_state(self.replica_id, self.feedback); + self.ws_guard + .walsenders + .record_hs_feedback(self.ws_guard.id, &hs_feedback); } Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => { - let _reply = + let reply = StandbyReply::des(&msg[1..]).context("failed to deserialize StandbyReply")?; - // This must be a regular postgres replica, - // because pageserver doesn't send this type of messages to safekeeper. - // Currently we just ignore this, tracking progress for them is not supported. + self.ws_guard + .walsenders + .record_standby_reply(self.ws_guard.id, &reply); } Some(NEON_STATUS_UPDATE_TAG_BYTE) => { // pageserver sends this. // Note: deserializing is on m[9..] because we skip the tag byte and len bytes. let buf = Bytes::copy_from_slice(&msg[9..]); - let reply = PageserverFeedback::parse(buf); + let ps_feedback = PageserverFeedback::parse(buf); - trace!("PageserverFeedback is {:?}", reply); - self.feedback.pageserver_feedback = Some(reply); - - self.tli - .update_replica_state(self.replica_id, self.feedback); + trace!("PageserverFeedback is {:?}", ps_feedback); + self.ws_guard + .walsenders + .record_ps_feedback(self.ws_guard.id, &ps_feedback); + // in principle new remote_consistent_lsn could allow to + // deactivate the timeline, but we check that regularly through + // broker updated, not need to do it here } _ => warn!("unexpected message {:?}", msg), } @@ -368,3 +646,89 @@ async fn wait_for_lsn(rx: &mut Receiver, lsn: Lsn) -> anyhow::Result