mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-26 14:50:36 +00:00
Compare commits
1 Commits
prefetch_s
...
debug-port
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fb4b6ce8dc |
40
.github/actions/neon-branch-create/action.yml
vendored
40
.github/actions/neon-branch-create/action.yml
vendored
@@ -5,12 +5,12 @@ inputs:
|
||||
api_key:
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
environment:
|
||||
desctiption: 'dev (aka captest) or staging'
|
||||
required: true
|
||||
project_id:
|
||||
desctiption: 'ID of the Project to create Branch in'
|
||||
required: true
|
||||
api_host:
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
outputs:
|
||||
dsn:
|
||||
description: 'Created Branch DSN (for main database)'
|
||||
@@ -22,6 +22,27 @@ outputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Parse Input
|
||||
id: parse-input
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
case "${ENVIRONMENT}" in
|
||||
dev)
|
||||
API_HOST=console.dev.neon.tech
|
||||
;;
|
||||
staging)
|
||||
API_HOST=console.stage.neon.tech
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
|
||||
- name: Create New Branch
|
||||
id: create-branch
|
||||
shell: bash -euxo pipefail {0}
|
||||
@@ -35,12 +56,7 @@ runs:
|
||||
--data "{
|
||||
\"branch\": {
|
||||
\"name\": \"Created by actions/neon-branch-create; GITHUB_RUN_ID=${GITHUB_RUN_ID} at $(date +%s)\"
|
||||
},
|
||||
\"endpoints\": [
|
||||
{
|
||||
\"type\": \"read_write\"
|
||||
}
|
||||
]
|
||||
}
|
||||
}")
|
||||
|
||||
if [ -z "${branch}" ]; then
|
||||
@@ -68,8 +84,8 @@ runs:
|
||||
host=$(echo $branch | jq --raw-output '.endpoints[0].host')
|
||||
echo "host=${host}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
PROJECT_ID: ${{ inputs.project_id }}
|
||||
|
||||
- name: Get Role name
|
||||
@@ -87,8 +103,8 @@ runs:
|
||||
role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
|
||||
echo "role_name=${role_name}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
PROJECT_ID: ${{ inputs.project_id }}
|
||||
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
|
||||
|
||||
@@ -130,8 +146,8 @@ runs:
|
||||
echo "::add-mask::${dsn}"
|
||||
echo "dsn=${dsn}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
PROJECT_ID: ${{ inputs.project_id }}
|
||||
BRANCH_ID: ${{ steps.create-branch.outputs.branch_id }}
|
||||
ROLE_NAME: ${{ steps.role-name.outputs.role_name }}
|
||||
|
||||
29
.github/actions/neon-branch-delete/action.yml
vendored
29
.github/actions/neon-branch-delete/action.yml
vendored
@@ -5,19 +5,40 @@ inputs:
|
||||
api_key:
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
environment:
|
||||
desctiption: 'dev (aka captest) or staging'
|
||||
required: true
|
||||
project_id:
|
||||
desctiption: 'ID of the Project which should be deleted'
|
||||
required: true
|
||||
branch_id:
|
||||
desctiption: 'ID of the branch to delete'
|
||||
required: true
|
||||
api_host:
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Parse Input
|
||||
id: parse-input
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
case "${ENVIRONMENT}" in
|
||||
dev)
|
||||
API_HOST=console.dev.neon.tech
|
||||
;;
|
||||
staging)
|
||||
API_HOST=console.stage.neon.tech
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
|
||||
- name: Delete Branch
|
||||
# Do not try to delete a branch if .github/actions/neon-project-create
|
||||
# or .github/actions/neon-branch-create failed before
|
||||
@@ -52,7 +73,7 @@ runs:
|
||||
exit 1
|
||||
fi
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
PROJECT_ID: ${{ inputs.project_id }}
|
||||
BRANCH_ID: ${{ inputs.branch_id }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
|
||||
43
.github/actions/neon-project-create/action.yml
vendored
43
.github/actions/neon-project-create/action.yml
vendored
@@ -5,16 +5,12 @@ inputs:
|
||||
api_key:
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
environment:
|
||||
desctiption: 'dev (aka captest) or staging'
|
||||
required: true
|
||||
region_id:
|
||||
desctiption: 'Region ID, if not set the project will be created in the default region'
|
||||
default: aws-us-east-2
|
||||
postgres_version:
|
||||
desctiption: 'Postgres version; default is 15'
|
||||
default: 15
|
||||
api_host:
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
|
||||
required: false
|
||||
outputs:
|
||||
dsn:
|
||||
description: 'Created Project DSN (for main database)'
|
||||
@@ -26,6 +22,31 @@ outputs:
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Parse Input
|
||||
id: parse-input
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
case "${ENVIRONMENT}" in
|
||||
dev)
|
||||
API_HOST=console.dev.neon.tech
|
||||
REGION_ID=${REGION_ID:-aws-eu-west-1}
|
||||
;;
|
||||
staging)
|
||||
API_HOST=console.stage.neon.tech
|
||||
REGION_ID=${REGION_ID:-aws-us-east-2}
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
echo "region_id=${REGION_ID}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
REGION_ID: ${{ inputs.region_id }}
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
# A shell without `set -x` to not to expose password/dsn in logs
|
||||
@@ -40,7 +61,6 @@ runs:
|
||||
--data "{
|
||||
\"project\": {
|
||||
\"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
|
||||
\"pg_version\": ${POSTGRES_VERSION},
|
||||
\"region_id\": \"${REGION_ID}\",
|
||||
\"settings\": { }
|
||||
}
|
||||
@@ -56,7 +76,6 @@ runs:
|
||||
project_id=$(echo $project | jq --raw-output '.project.id')
|
||||
echo "project_id=${project_id}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
REGION_ID: ${{ inputs.region_id }}
|
||||
POSTGRES_VERSION: ${{ inputs.postgres_version }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
REGION_ID: ${{ steps.parse-input.outputs.region_id }}
|
||||
|
||||
29
.github/actions/neon-project-delete/action.yml
vendored
29
.github/actions/neon-project-delete/action.yml
vendored
@@ -5,16 +5,37 @@ inputs:
|
||||
api_key:
|
||||
desctiption: 'Neon API key'
|
||||
required: true
|
||||
environment:
|
||||
desctiption: 'dev (aka captest) or staging'
|
||||
required: true
|
||||
project_id:
|
||||
desctiption: 'ID of the Project to delete'
|
||||
required: true
|
||||
api_host:
|
||||
desctiption: 'Neon API host'
|
||||
default: console.stage.neon.tech
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Parse Input
|
||||
id: parse-input
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: |
|
||||
case "${ENVIRONMENT}" in
|
||||
dev)
|
||||
API_HOST=console.dev.neon.tech
|
||||
;;
|
||||
staging)
|
||||
API_HOST=console.stage.neon.tech
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown environment=${ENVIRONMENT}. Allowed 'dev' or 'staging' only"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "api_host=${API_HOST}" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
ENVIRONMENT: ${{ inputs.environment }}
|
||||
|
||||
- name: Delete Neon Project
|
||||
# Do not try to delete a project if .github/actions/neon-project-create failed before
|
||||
if: ${{ inputs.project_id != '' }}
|
||||
@@ -28,6 +49,6 @@ runs:
|
||||
--header "Content-Type: application/json" \
|
||||
--header "Authorization: Bearer ${API_KEY}"
|
||||
env:
|
||||
API_HOST: ${{ inputs.api_host }}
|
||||
API_KEY: ${{ inputs.api_key }}
|
||||
PROJECT_ID: ${{ inputs.project_id }}
|
||||
API_HOST: ${{ steps.parse-input.outputs.api_host }}
|
||||
|
||||
6
.github/ansible/scripts/init_pageserver.sh
vendored
6
.github/ansible/scripts/init_pageserver.sh
vendored
@@ -1,8 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
# fetch params from meta-data service
|
||||
# get instance id from meta-data service
|
||||
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
||||
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
|
||||
|
||||
# store fqdn hostname in var
|
||||
HOST=$(hostname -f)
|
||||
@@ -17,8 +16,7 @@ cat <<EOF | tee /tmp/payload
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"http_host": "${HOST}",
|
||||
"http_port": 9898,
|
||||
"active": false,
|
||||
"availability_zone_id": "${AZ_ID}"
|
||||
"active": false
|
||||
}
|
||||
EOF
|
||||
|
||||
|
||||
2
.github/ansible/systemd/pageserver.service
vendored
2
.github/ansible/systemd/pageserver.service
vendored
@@ -5,7 +5,7 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=pageserver
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }}
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
|
||||
2
.github/ansible/systemd/safekeeper.service
vendored
2
.github/ansible/systemd/safekeeper.service
vendored
@@ -5,7 +5,7 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }}
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.zeta.eu-west-1.internal.aws.neon.build
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.zeta.eu-west-1.internal.aws.neon.build
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.beta.us-east-2.internal.aws.neon.build
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.beta.us-east-2.internal.aws.neon.build
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: neon-stress
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-stress.stage.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.epsilon.ap-southeast-1.internal.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.gamma.eu-central-1.internal.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.gamma.eu-central-1.internal.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.delta.us-east-2.internal.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.delta.us-east-2.internal.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
# Helm chart values for neon-proxy-scram.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-release.local/management/api/v2"
|
||||
domain: "*.us-west-2.aws.neon.tech"
|
||||
|
||||
# -- Additional labels for neon-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: prod
|
||||
zenith_region: us-west-2
|
||||
zenith_region_slug: us-west-2
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
|
||||
|
||||
#metrics:
|
||||
# enabled: true
|
||||
# serviceMonitor:
|
||||
# enabled: true
|
||||
# selector:
|
||||
# release: kube-prometheus-stack
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.eta.us-west-2.internal.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker.eta.us-west-2.internal.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.stage.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
281
.github/workflows/benchmarking.yml
vendored
281
.github/workflows/benchmarking.yml
vendored
@@ -15,6 +15,9 @@ on:
|
||||
|
||||
workflow_dispatch: # adds ability to run this manually
|
||||
inputs:
|
||||
environment:
|
||||
description: 'Environment to run remote tests on (dev or staging)'
|
||||
required: false
|
||||
region_id:
|
||||
description: 'Use a particular region. If not set the default region will be used'
|
||||
required: false
|
||||
@@ -34,69 +37,103 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
bench:
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: "neon-staging"
|
||||
# this workflow runs on self hosteed runner
|
||||
# it's environment is quite different from usual guthub runner
|
||||
# probably the most important difference is that it doesn't start from clean workspace each time
|
||||
# e g if you install system packages they are not cleaned up since you install them directly in host machine
|
||||
# not a container or something
|
||||
# See documentation for more info: https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners
|
||||
runs-on: [self-hosted, zenith-benchmarker]
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /usr/pgsql
|
||||
DEFAULT_PG_VERSION: 14
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Checkout zenith repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
# actions/setup-python@v2 is not working correctly on self-hosted runners
|
||||
# see https://github.com/actions/setup-python/issues/162
|
||||
# and probably https://github.com/actions/setup-python/issues/162#issuecomment-865387976 in particular
|
||||
# so the simplest solution to me is to use already installed system python and spin virtualenvs for job runs.
|
||||
# there is Python 3.7.10 already installed on the machine so use it to install poetry and then use poetry's virtuealenvs
|
||||
- name: Install poetry & deps
|
||||
run: |
|
||||
python3 -m pip install --upgrade poetry wheel
|
||||
# since pip/poetry caches are reused there shouldn't be any troubles with install every time
|
||||
./scripts/pysync
|
||||
|
||||
- name: Show versions
|
||||
run: |
|
||||
echo Python
|
||||
python3 --version
|
||||
poetry run python3 --version
|
||||
echo Poetry
|
||||
poetry --version
|
||||
echo Pgbench
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
|
||||
- name: Create Neon Project
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
environment: ${{ github.event.inputs.environment || 'staging' }}
|
||||
api_key: ${{ ( github.event.inputs.environment || 'staging' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
|
||||
|
||||
- name: Run benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
# Set --sparse-ordering option of pytest-order plugin
|
||||
# to ensure tests are running in order of appears in the file.
|
||||
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
|
||||
extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
|
||||
# pgbench is installed system wide from official repo
|
||||
# https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
|
||||
# via
|
||||
# sudo tee /etc/yum.repos.d/pgdg.repo<<EOF
|
||||
# [pgdg13]
|
||||
# name=PostgreSQL 13 for RHEL/CentOS 7 - x86_64
|
||||
# baseurl=https://download.postgresql.org/pub/repos/yum/13/redhat/rhel-7-x86_64/
|
||||
# enabled=1
|
||||
# gpgcheck=0
|
||||
# EOF
|
||||
# sudo yum makecache
|
||||
# sudo yum install postgresql13-contrib
|
||||
# actual binaries are located in /usr/pgsql-13/bin/
|
||||
env:
|
||||
# The pgbench test runs two tests of given duration against each scale.
|
||||
# So the total runtime with these parameters is 2 * 2 * 300 = 1200, or 20 minutes.
|
||||
# Plus time needed to initialize the test databases.
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "300"
|
||||
TEST_PG_BENCH_SCALES_MATRIX: "10,100"
|
||||
PLATFORM: "neon-staging"
|
||||
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
|
||||
REMOTE_ENV: "1" # indicate to test harness that we do not have zenith binaries locally
|
||||
run: |
|
||||
# just to be sure that no data was cached on self hosted runner
|
||||
# since it might generate duplicates when calling ingest_perf_test_result.py
|
||||
rm -rf perf-report-staging
|
||||
mkdir -p perf-report-staging
|
||||
# Set --sparse-ordering option of pytest-order plugin to ensure tests are running in order of appears in the file,
|
||||
# it's important for test_perf_pgbench.py::test_pgbench_remote_* tests.
|
||||
# Do not run tests from test_runner/performance/test_perf_olap.py because they require a prepared DB. We run them separately in `clickbench-compare` job.
|
||||
./scripts/pytest test_runner/performance/ -v \
|
||||
-m "remote_cluster" \
|
||||
--sparse-ordering \
|
||||
--out-dir perf-report-staging \
|
||||
--timeout 5400 \
|
||||
--ignore test_runner/performance/test_perf_olap.py
|
||||
|
||||
- name: Submit result
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
run: |
|
||||
REPORT_FROM=$(realpath perf-report-staging) REPORT_TO=staging scripts/generate_and_push_perf_report.sh
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
environment: staging
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: success() || failure()
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
@@ -115,22 +152,15 @@ jobs:
|
||||
# neon-captest-prefetch: Same, with prefetching enabled (new project)
|
||||
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
platform: [ neon-captest-new, neon-captest-prefetch, rds-postgres ]
|
||||
platform: [ neon-captest-new, neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
|
||||
db_size: [ 10gb ]
|
||||
runner: [ us-east-2 ]
|
||||
include:
|
||||
- platform: neon-captest-reuse
|
||||
db_size: 10gb
|
||||
runner: dev # TODO: Switch to us-east-2 after dry-bonus-223539 migration to staging
|
||||
- platform: neon-captest-new
|
||||
db_size: 50gb
|
||||
runner: us-east-2
|
||||
- platform: neon-captest-prefetch
|
||||
db_size: 50gb
|
||||
runner: us-east-2
|
||||
- platform: rds-aurora
|
||||
db_size: 50gb
|
||||
runner: us-east-2
|
||||
|
||||
env:
|
||||
TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
|
||||
@@ -142,9 +172,9 @@ jobs:
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
runs-on: [ self-hosted, "${{ matrix.runner }}", x64 ]
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
|
||||
options: --init
|
||||
|
||||
timeout-minutes: 360 # 6h
|
||||
@@ -169,9 +199,8 @@ jobs:
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
environment: ${{ github.event.inputs.environment || 'dev' }}
|
||||
api_key: ${{ ( github.event.inputs.environment || 'dev' ) == 'staging' && secrets.NEON_STAGING_API_KEY || secrets.NEON_CAPTEST_API_KEY }}
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
@@ -202,11 +231,8 @@ jobs:
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
run: |
|
||||
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
|
||||
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET seqscan_prefetch_buffers=10"
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
@@ -249,13 +275,6 @@ jobs:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: success() || failure()
|
||||
uses: ./.github/actions/allure-report
|
||||
@@ -263,6 +282,14 @@ jobs:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
- name: Delete Neon Project
|
||||
if: ${{ steps.create-neon-project.outputs.project_id && always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
environment: dev
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_CAPTEST_API_KEY }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
@@ -299,9 +326,9 @@ jobs:
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
|
||||
options: --init
|
||||
|
||||
timeout-minutes: 360 # 6h
|
||||
@@ -347,15 +374,12 @@ jobs:
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
run: |
|
||||
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
|
||||
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: ClickBench benchmark
|
||||
- name: Benchmark clickbench
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
@@ -383,114 +407,3 @@ jobs:
|
||||
slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
tpch-compare:
|
||||
# TCP-H DB for rds-aurora and rds-Postgres deployed to the same clusters
|
||||
# we use for performance testing in pgbench-compare & clickbench-compare.
|
||||
# Run this job only when clickbench-compare is finished to avoid the intersection.
|
||||
# We might change it after https://github.com/neondatabase/neon/issues/2900.
|
||||
#
|
||||
# *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
|
||||
if: success() || failure()
|
||||
needs: [ clickbench-compare ]
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# neon-captest-prefetch: We have pre-created projects with prefetch enabled
|
||||
# rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
|
||||
# rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
|
||||
platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]
|
||||
|
||||
env:
|
||||
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
BUILD_TYPE: remote
|
||||
SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
|
||||
PLATFORM: ${{ matrix.platform }}
|
||||
|
||||
runs-on: [ self-hosted, us-east-2, x64 ]
|
||||
container:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rustlegacy:pinned
|
||||
options: --init
|
||||
|
||||
timeout-minutes: 360 # 6h
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Download Neon artifact
|
||||
uses: ./.github/actions/download
|
||||
with:
|
||||
name: neon-${{ runner.os }}-release-artifact
|
||||
path: /tmp/neon/
|
||||
prefix: latest
|
||||
|
||||
- name: Add Postgres binaries to PATH
|
||||
run: |
|
||||
${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
|
||||
echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Set up Connection String
|
||||
id: set-up-connstr
|
||||
run: |
|
||||
case "${PLATFORM}" in
|
||||
neon-captest-prefetch)
|
||||
CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
|
||||
;;
|
||||
rds-aurora)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_TPCH_S10_CONNSTR }}
|
||||
;;
|
||||
rds-postgres)
|
||||
CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
|
||||
;;
|
||||
*)
|
||||
echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
|
||||
|
||||
psql ${CONNSTR} -c "SELECT version();"
|
||||
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
run: |
|
||||
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
|
||||
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Run TPC-H benchmark
|
||||
uses: ./.github/actions/run-python-test-set
|
||||
with:
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
test_selection: performance/test_perf_olap.py
|
||||
run_in_parallel: false
|
||||
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
|
||||
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
|
||||
env:
|
||||
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
|
||||
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
- name: Create Allure report
|
||||
if: success() || failure()
|
||||
uses: ./.github/actions/allure-report
|
||||
with:
|
||||
action: generate
|
||||
build_type: ${{ env.BUILD_TYPE }}
|
||||
|
||||
- name: Post to a Slack channel
|
||||
if: ${{ github.event.schedule && failure() }}
|
||||
uses: slackapi/slack-github-action@v1
|
||||
with:
|
||||
channel-id: "C033QLM5P7D" # dev-staging-stream
|
||||
slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
|
||||
env:
|
||||
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
|
||||
|
||||
142
.github/workflows/build_and_test.yml
vendored
142
.github/workflows/build_and_test.yml
vendored
@@ -305,7 +305,7 @@ jobs:
|
||||
image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
|
||||
options: --init
|
||||
needs: [ regress-tests, benchmarks ]
|
||||
if: ${{ !cancelled() }}
|
||||
if: success() || failure()
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -668,11 +668,11 @@ jobs:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "staging.neon-storage-broker", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "storage_broker_ns": "neon-stress-storage-broker", "storage_broker_config": "neon-stress.neon-storage-broker", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY", storage_broker_config: }'
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
|
||||
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "production.neon-storage-broker", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
@@ -732,7 +732,7 @@ jobs:
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-new:
|
||||
@@ -770,7 +770,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-pr-test-new:
|
||||
@@ -803,7 +803,7 @@ jobs:
|
||||
./get_binaries.sh
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-prod-new:
|
||||
@@ -843,7 +843,7 @@ jobs:
|
||||
fi
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_PRODUCTION_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
@@ -885,48 +885,8 @@ jobs:
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-staging:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
env:
|
||||
KUBECONFIG: .kubeconfig
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Add curl
|
||||
run: apt update && apt install curl -y
|
||||
|
||||
- name: Store kubeconfig file
|
||||
run: |
|
||||
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
|
||||
chmod 0600 ${KUBECONFIG}
|
||||
|
||||
- name: Setup helm v3
|
||||
run: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace ${{ matrix.storage_broker_ns }} --create-namespace --install --atomic -f .github/helm-values/${{ matrix.storage_broker_config }}.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-new:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
@@ -965,53 +925,19 @@ jobs:
|
||||
- name: Re-deploy scram proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
- name: Re-deploy link proxy
|
||||
if: matrix.deploy_link_proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
- name: Re-deploy legacy scram proxy
|
||||
if: matrix.deploy_legacy_scram_proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-dev-new:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: dev-us-east-2-beta
|
||||
- target_region: eu-west-1
|
||||
target_cluster: dev-eu-west-1-zeta
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
|
||||
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-prod-new:
|
||||
runs-on: prod
|
||||
@@ -1029,8 +955,6 @@ jobs:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: prod-us-east-2-delta
|
||||
- target_region: us-west-2
|
||||
target_cluster: prod-us-west-2-eta
|
||||
- target_region: eu-central-1
|
||||
target_cluster: prod-eu-central-1-gamma
|
||||
- target_region: ap-southeast-1
|
||||
@@ -1050,45 +974,7 @@ jobs:
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-prod-new:
|
||||
runs-on: prod
|
||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: prod-us-east-2-delta
|
||||
- target_region: us-west-2
|
||||
target_cluster: prod-us-west-2-eta
|
||||
- target_region: eu-central-1
|
||||
target_cluster: prod-eu-central-1-gamma
|
||||
- target_region: ap-southeast-1
|
||||
target_cluster: prod-ap-southeast-1-epsilon
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ needs.tag.outputs.build-tag }} --wait --timeout 5m0s
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
promote-compatibility-data:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
|
||||
6
.github/workflows/pg_clients.yml
vendored
6
.github/workflows/pg_clients.yml
vendored
@@ -23,7 +23,6 @@ jobs:
|
||||
runs-on: [ ubuntu-latest ]
|
||||
|
||||
env:
|
||||
DEFAULT_PG_VERSION: 14
|
||||
TEST_OUTPUT: /tmp/test_output
|
||||
|
||||
steps:
|
||||
@@ -52,8 +51,8 @@ jobs:
|
||||
id: create-neon-project
|
||||
uses: ./.github/actions/neon-project-create
|
||||
with:
|
||||
environment: staging
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
postgres_version: ${{ env.DEFAULT_PG_VERSION }}
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
@@ -64,7 +63,7 @@ jobs:
|
||||
run: |
|
||||
# Test framework expects we have psql binary;
|
||||
# but since we don't really need it in this test, let's mock it
|
||||
mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
|
||||
mkdir -p "$POSTGRES_DISTRIB_DIR/v14/bin" && touch "$POSTGRES_DISTRIB_DIR/v14/bin/psql";
|
||||
./scripts/pytest \
|
||||
--junitxml=$TEST_OUTPUT/junit.xml \
|
||||
--tb=short \
|
||||
@@ -76,6 +75,7 @@ jobs:
|
||||
if: ${{ always() }}
|
||||
uses: ./.github/actions/neon-project-delete
|
||||
with:
|
||||
environment: staging
|
||||
project_id: ${{ steps.create-neon-project.outputs.project_id }}
|
||||
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
|
||||
|
||||
|
||||
786
Cargo.lock
generated
786
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
27
README.md
27
README.md
@@ -2,20 +2,29 @@
|
||||
|
||||
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
|
||||
The project used to be called "Zenith". Many of the commands and code comments
|
||||
still refer to "zenith", but we are in the process of renaming things.
|
||||
|
||||
## Quick start
|
||||
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
|
||||
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
|
||||
|
||||
Alternatively, compile and run the project [locally](#running-local-installation).
|
||||
|
||||
## Architecture overview
|
||||
|
||||
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
|
||||
A Neon installation consists of compute nodes and a Neon storage engine.
|
||||
|
||||
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
|
||||
|
||||
The Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for the compute nodes.
|
||||
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
|
||||
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
|
||||
|
||||
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
Pageserver consists of:
|
||||
- Repository - Neon storage implementation.
|
||||
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
|
||||
- Page service - service that communicates with compute nodes and responds with pages from the repository.
|
||||
- WAL redo - service that builds pages from base images and WAL records on Page service request
|
||||
|
||||
## Running local installation
|
||||
|
||||
@@ -220,20 +229,12 @@ CARGO_BUILD_FLAGS="--features=testing" make
|
||||
|
||||
## Documentation
|
||||
|
||||
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
|
||||
Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
|
||||
|
||||
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
|
||||
|
||||
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
|
||||
|
||||
See also README files in some source directories, and `rustdoc` style documentation comments.
|
||||
|
||||
Other resources:
|
||||
|
||||
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
|
||||
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
|
||||
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
|
||||
|
||||
### Postgres-specific terms
|
||||
|
||||
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
|
||||
|
||||
@@ -5,7 +5,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
chrono = "0.4"
|
||||
clap = "4.0"
|
||||
env_logger = "0.9"
|
||||
futures = "0.3.13"
|
||||
|
||||
@@ -14,19 +14,17 @@
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::io::Write;
|
||||
use std::os::unix::prelude::AsRawFd;
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::process::{Child, Command};
|
||||
use std::time::Duration;
|
||||
use std::{fs, io, thread};
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use nix::errno::Errno;
|
||||
use nix::fcntl::{FcntlArg, FdFlag};
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use utils::pid_file::{self, PidFileRead};
|
||||
|
||||
use utils::lock_file;
|
||||
|
||||
// These constants control the loop used to poll for process start / stop.
|
||||
//
|
||||
@@ -88,14 +86,6 @@ where
|
||||
let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
|
||||
filled_cmd.envs(envs);
|
||||
|
||||
let pid_file_to_check = match initial_pid_file {
|
||||
InitialPidFile::Create(path) => {
|
||||
pre_exec_create_pidfile(filled_cmd, path);
|
||||
path
|
||||
}
|
||||
InitialPidFile::Expect(path) => path,
|
||||
};
|
||||
|
||||
let mut spawned_process = filled_cmd.spawn().with_context(|| {
|
||||
format!("Could not spawn {process_name}, see console output and log files for details.")
|
||||
})?;
|
||||
@@ -105,8 +95,29 @@ where
|
||||
.with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
|
||||
);
|
||||
|
||||
let pid_file_to_check = match initial_pid_file {
|
||||
InitialPidFile::Create(target_pid_file_path) => {
|
||||
match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
|
||||
lock_file::LockCreationResult::Created { .. } => {
|
||||
// We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
|
||||
// as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked { .. } => {
|
||||
anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
|
||||
}
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!(
|
||||
"Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
|
||||
};
|
||||
|
||||
for retries in 0..RETRIES {
|
||||
match process_started(pid, Some(pid_file_to_check), &process_status_check) {
|
||||
match process_started(pid, pid_file_to_check, &process_status_check) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} started, pid: {pid}");
|
||||
return Ok(spawned_process);
|
||||
@@ -136,45 +147,14 @@ where
|
||||
anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
|
||||
}
|
||||
|
||||
/// Send SIGTERM to child process
|
||||
pub fn send_stop_child_process(child: &std::process::Child) -> anyhow::Result<()> {
|
||||
let pid = child.id();
|
||||
match kill(
|
||||
nix::unistd::Pid::from_raw(pid.try_into().unwrap()),
|
||||
Signal::SIGTERM,
|
||||
) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(Errno::ESRCH) => {
|
||||
println!("child process with pid {pid} does not exist");
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => anyhow::bail!("Failed to send signal to child process with pid {pid}: {e}"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
|
||||
let pid = match pid_file::read(pid_file)
|
||||
.with_context(|| format!("read pid_file {pid_file:?}"))?
|
||||
{
|
||||
PidFileRead::NotExist => {
|
||||
println!("{process_name} is already stopped: no pid file present at {pid_file:?}");
|
||||
return Ok(());
|
||||
}
|
||||
PidFileRead::NotHeldByAnyProcess(_) => {
|
||||
// Don't try to kill according to file contents beacuse the pid might have been re-used by another process.
|
||||
// Don't delete the file either, it can race with new pid file creation.
|
||||
// Read `pid_file` module comment for details.
|
||||
println!(
|
||||
"No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
PidFileRead::LockedByOtherProcess(pid) => pid,
|
||||
};
|
||||
// XXX the pid could become invalid (and recycled) at any time before the kill() below.
|
||||
if !pid_file.exists() {
|
||||
println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(pid_file)?;
|
||||
|
||||
// send signal
|
||||
let sig = if immediate {
|
||||
print!("Stopping {process_name} with pid {pid} immediately..");
|
||||
Signal::SIGQUIT
|
||||
@@ -186,9 +166,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
match kill(pid, sig) {
|
||||
Ok(()) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
// Again, don't delete the pid file. The unlink can race with a new pid file being created.
|
||||
println!(
|
||||
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone."
|
||||
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
@@ -200,6 +179,11 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
match process_has_stopped(pid) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} stopped");
|
||||
if let Err(e) = fs::remove_file(pid_file) {
|
||||
if e.kind() != io::ErrorKind::NotFound {
|
||||
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
Ok(false) => {
|
||||
@@ -257,69 +241,6 @@ fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
|
||||
/// 1. Claims a pidfile with a fcntl lock on it and
|
||||
/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
|
||||
/// will remain held until the cmd exits.
|
||||
fn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command
|
||||
where
|
||||
P: Into<PathBuf>,
|
||||
{
|
||||
let path: PathBuf = path.into();
|
||||
// SAFETY
|
||||
// pre_exec is marked unsafe because it runs between fork and exec.
|
||||
// Why is that dangerous in various ways?
|
||||
// Long answer: https://github.com/rust-lang/rust/issues/39575
|
||||
// Short answer: in a multi-threaded program, other threads may have
|
||||
// been inside of critical sections at the time of fork. In the
|
||||
// original process, that was allright, assuming they protected
|
||||
// the critical sections appropriately, e.g., through locks.
|
||||
// Fork adds another process to the mix that
|
||||
// 1. Has a single thread T
|
||||
// 2. In an exact copy of the address space at the time of fork.
|
||||
// A variety of problems scan occur now:
|
||||
// 1. T tries to grab a lock that was locked at the time of fork.
|
||||
// It will wait forever since in its address space, the lock
|
||||
// is in state 'taken' but the thread that would unlock it is
|
||||
// not there.
|
||||
// 2. A rust object that represented some external resource in the
|
||||
// parent now got implicitly copied by the the fork, even though
|
||||
// the object's type is not `Copy`. The parent program may use
|
||||
// non-copyability as way to enforce unique ownership of an
|
||||
// external resource in the typesystem. The fork breaks that
|
||||
// assumption, as now both parent and child process have an
|
||||
// owned instance of the object that represents the same
|
||||
// underlying resource.
|
||||
// While these seem like niche problems, (1) in particular is
|
||||
// highly relevant. For example, `malloc()` may grab a mutex internally,
|
||||
// and so, if we forked while another thread was mallocing' and our
|
||||
// pre_exec closure allocates as well, it will block on the malloc
|
||||
// mutex forever
|
||||
//
|
||||
// The proper solution is to only use C library functions that are marked
|
||||
// "async-signal-safe": https://man7.org/linux/man-pages/man7/signal-safety.7.html
|
||||
//
|
||||
// With this specific pre_exec() closure, the non-error path doesn't allocate.
|
||||
// The error path uses `anyhow`, and hence does allocate.
|
||||
// We take our chances there, hoping that any potential disaster is constrained
|
||||
// to the child process (e.g., malloc has no state ourside of the child process).
|
||||
// Last, `expect` prints to stderr, and stdio is not async-signal-safe.
|
||||
// Again, we take our chances, making the same assumptions as for malloc.
|
||||
unsafe {
|
||||
cmd.pre_exec(move || {
|
||||
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
|
||||
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
|
||||
// remains locked after exec.
|
||||
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
|
||||
.expect("remove FD_CLOEXEC");
|
||||
// Don't run drop(file), it would close the file before we actually exec.
|
||||
std::mem::forget(file);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
fn process_started<F>(
|
||||
pid: Pid,
|
||||
pid_file_to_check: Option<&Path>,
|
||||
@@ -330,11 +251,14 @@ where
|
||||
{
|
||||
match status_check() {
|
||||
Ok(true) => match pid_file_to_check {
|
||||
Some(pid_file_path) => match pid_file::read(pid_file_path)? {
|
||||
PidFileRead::NotExist => Ok(false),
|
||||
PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
|
||||
PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
|
||||
},
|
||||
Some(pid_file_path) => {
|
||||
if pid_file_path.exists() {
|
||||
let pid_in_file = read_pidfile(pid_file_path)?;
|
||||
Ok(pid_in_file == pid)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
None => Ok(true),
|
||||
},
|
||||
Ok(false) => Ok(false),
|
||||
@@ -342,6 +266,21 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a PID file
|
||||
///
|
||||
/// We expect a file that contains a single integer.
|
||||
fn read_pidfile(pidfile: &Path) -> Result<Pid> {
|
||||
let pid_str = fs::read_to_string(pidfile)
|
||||
.with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
|
||||
let pid: i32 = pid_str
|
||||
.parse()
|
||||
.map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
|
||||
if pid < 1 {
|
||||
bail!("pidfile {pidfile:?} contained bad value '{pid}'");
|
||||
}
|
||||
Ok(Pid::from_raw(pid))
|
||||
}
|
||||
|
||||
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
|
||||
match kill(pid, None) {
|
||||
// Process exists, keep waiting
|
||||
|
||||
@@ -324,7 +324,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
pg_version,
|
||||
)
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("pageserver init failed: {e:?}");
|
||||
eprintln!("pageserver init failed: {e}");
|
||||
exit(1);
|
||||
});
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{BufReader, Write};
|
||||
use std::num::NonZeroU64;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Child;
|
||||
use std::{io, result};
|
||||
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use anyhow::{bail, Context};
|
||||
use pageserver_api::models::{
|
||||
TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
@@ -168,21 +168,29 @@ impl PageServerNode {
|
||||
}
|
||||
Err(e) => eprintln!("{e:#}"),
|
||||
}
|
||||
background_process::send_stop_child_process(&pageserver_process)?;
|
||||
|
||||
let exit_code = pageserver_process.wait()?;
|
||||
ensure!(
|
||||
exit_code.success(),
|
||||
format!(
|
||||
"pageserver init failed with exit code {:?}",
|
||||
exit_code.code()
|
||||
)
|
||||
);
|
||||
println!(
|
||||
"Stopped pageserver {} process with pid {}",
|
||||
self.env.pageserver.id,
|
||||
pageserver_process.id(),
|
||||
);
|
||||
match pageserver_process.kill() {
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"Failed to stop pageserver {} process with pid {}: {e:#}",
|
||||
self.env.pageserver.id,
|
||||
pageserver_process.id(),
|
||||
)
|
||||
}
|
||||
Ok(()) => {
|
||||
println!(
|
||||
"Stopped pageserver {} process with pid {}",
|
||||
self.env.pageserver.id,
|
||||
pageserver_process.id(),
|
||||
);
|
||||
// cleanup after pageserver startup, since we do not call regular `stop_process` during init
|
||||
let pid_file = self.pid_file();
|
||||
if let Err(e) = fs::remove_file(&pid_file) {
|
||||
if e.kind() != io::ErrorKind::NotFound {
|
||||
eprintln!("Failed to remove pid file {pid_file:?} after stopping the process: {e:#}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
init_result
|
||||
}
|
||||
|
||||
|
||||
@@ -45,9 +45,9 @@ and create new databases and accounts (control plane API in our case).
|
||||
|
||||
Integration tests, written in Python using the `pytest` framework.
|
||||
|
||||
`/vendor/postgres-v14` and `/vendor/postgres-v15`:
|
||||
`/vendor/postgres-v14`:
|
||||
|
||||
PostgreSQL source tree per version, with the modifications needed for Neon.
|
||||
PostgreSQL source tree, with the modifications needed for Neon.
|
||||
|
||||
`/pgxn/neon`:
|
||||
|
||||
|
||||
@@ -201,6 +201,8 @@ pub struct TimelineInfo {
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
pub pg_version: u32,
|
||||
|
||||
pub awaits_download: bool,
|
||||
|
||||
pub state: TimelineState,
|
||||
|
||||
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use anyhow::*;
|
||||
use core::time::Duration;
|
||||
use log::*;
|
||||
use once_cell::sync::Lazy;
|
||||
use postgres::types::PgLsn;
|
||||
use postgres::Client;
|
||||
use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
|
||||
@@ -25,13 +26,15 @@ pub struct PostgresServer {
|
||||
client_config: postgres::Config,
|
||||
}
|
||||
|
||||
pub static REQUIRED_POSTGRES_CONFIG: [&str; 4] = [
|
||||
"wal_keep_size=50MB", // Ensure old WAL is not removed
|
||||
"shared_preload_libraries=neon", // can only be loaded at startup
|
||||
// Disable background processes as much as possible
|
||||
"wal_writer_delay=10s",
|
||||
"autovacuum=off",
|
||||
];
|
||||
pub static REQUIRED_POSTGRES_CONFIG: Lazy<Vec<&'static str>> = Lazy::new(|| {
|
||||
vec![
|
||||
"wal_keep_size=50MB", // Ensure old WAL is not removed
|
||||
"shared_preload_libraries=neon", // can only be loaded at startup
|
||||
// Disable background processes as much as possible
|
||||
"wal_writer_delay=10s",
|
||||
"autovacuum=off",
|
||||
]
|
||||
});
|
||||
|
||||
impl Conf {
|
||||
pub fn pg_distrib_dir(&self) -> anyhow::Result<PathBuf> {
|
||||
|
||||
@@ -9,11 +9,8 @@ async-trait = "0.1"
|
||||
metrics = { version = "0.1", path = "../metrics" }
|
||||
utils = { version = "0.1", path = "../utils" }
|
||||
once_cell = "1.13.0"
|
||||
aws-smithy-http = "0.51.0"
|
||||
aws-types = "0.51.0"
|
||||
aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
|
||||
aws-sdk-s3 = "0.21.0"
|
||||
hyper = { version = "0.14", features = ["stream"] }
|
||||
rusoto_core = "0.48"
|
||||
rusoto_s3 = "0.48"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
tokio = { version = "1.17", features = ["sync", "macros", "fs", "io-util"] }
|
||||
|
||||
@@ -10,7 +10,7 @@ mod s3_bucket;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::Debug,
|
||||
fmt::{Debug, Display},
|
||||
num::{NonZeroU32, NonZeroUsize},
|
||||
ops::Deref,
|
||||
path::{Path, PathBuf},
|
||||
@@ -41,27 +41,44 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
||||
|
||||
const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
|
||||
|
||||
/// Path on the remote storage, relative to some inner prefix.
|
||||
/// The prefix is an implementation detail, that allows representing local paths
|
||||
/// as the remote ones, stripping the local storage prefix away.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RemotePath(PathBuf);
|
||||
|
||||
impl RemotePath {
|
||||
pub fn new(relative_path: &Path) -> anyhow::Result<Self> {
|
||||
anyhow::ensure!(
|
||||
relative_path.is_relative(),
|
||||
"Path {relative_path:?} is not relative"
|
||||
);
|
||||
Ok(Self(relative_path.to_path_buf()))
|
||||
}
|
||||
|
||||
pub fn with_base(&self, base_path: &Path) -> PathBuf {
|
||||
base_path.join(&self.0)
|
||||
}
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct RemoteObjectId(String);
|
||||
|
||||
///
|
||||
/// A key that refers to an object in remote storage. It works much like a Path,
|
||||
/// but it's a separate datatype so that you don't accidentally mix local paths
|
||||
/// and remote keys.
|
||||
///
|
||||
impl RemoteObjectId {
|
||||
// Needed to retrieve last component for RemoteObjectId.
|
||||
// In other words a file name
|
||||
/// Turn a/b/c or a/b/c/ into c
|
||||
pub fn object_name(&self) -> Option<&str> {
|
||||
self.0.file_name().and_then(|os_str| os_str.to_str())
|
||||
// corner case, char::to_string is not const, thats why this is more verbose than it needs to be
|
||||
// see https://github.com/rust-lang/rust/issues/88674
|
||||
if self.0.len() == 1 && self.0.chars().next().unwrap() == REMOTE_STORAGE_PREFIX_SEPARATOR {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.0.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
self.0.rsplit(REMOTE_STORAGE_PREFIX_SEPARATOR).nth(1)
|
||||
} else {
|
||||
self.0
|
||||
.rsplit_once(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.map(|(_, last)| last)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for RemoteObjectId {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||
Debug::fmt(&self.0, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for RemoteObjectId {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
Display::fmt(&self.0, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,40 +87,49 @@ impl RemotePath {
|
||||
/// providing basic CRUD operations for storage files.
|
||||
#[async_trait::async_trait]
|
||||
pub trait RemoteStorage: Send + Sync + 'static {
|
||||
/// Attempts to derive the storage path out of the local path, if the latter is correct.
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId>;
|
||||
|
||||
/// Gets the download path of the given storage file.
|
||||
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf>;
|
||||
|
||||
/// Lists all items the storage has right now.
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>>;
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>>;
|
||||
|
||||
/// Lists all top level subdirectories for a given prefix
|
||||
/// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
|
||||
/// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
|
||||
/// so this method doesnt need to.
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>>;
|
||||
|
||||
/// Streams the local file contents into remote into the remote storage entry.
|
||||
async fn upload(
|
||||
&self,
|
||||
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
// S3 PUT request requires the content length to be specified,
|
||||
// otherwise it starts to fail with the concurrent connection count increasing.
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from_size_bytes: usize,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
/// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
|
||||
/// Returns the metadata, if any was stored with the file previously.
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError>;
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError>;
|
||||
|
||||
/// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
|
||||
/// Returns the metadata, if any was stored with the file previously.
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
|
||||
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()>;
|
||||
|
||||
/// Downcast to LocalFs implementation. For tests.
|
||||
fn as_local(&self) -> Option<&LocalFs> {
|
||||
@@ -152,35 +178,34 @@ impl std::error::Error for DownloadError {}
|
||||
/// Every storage, currently supported.
|
||||
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
|
||||
#[derive(Clone)]
|
||||
pub enum GenericRemoteStorage {
|
||||
LocalFs(LocalFs),
|
||||
AwsS3(Arc<S3Bucket>),
|
||||
}
|
||||
pub struct GenericRemoteStorage(Arc<dyn RemoteStorage>);
|
||||
|
||||
impl Deref for GenericRemoteStorage {
|
||||
type Target = dyn RemoteStorage;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
GenericRemoteStorage::LocalFs(local_fs) => local_fs,
|
||||
GenericRemoteStorage::AwsS3(s3_bucket) => s3_bucket.as_ref(),
|
||||
}
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl GenericRemoteStorage {
|
||||
pub fn new(storage: impl RemoteStorage) -> Self {
|
||||
Self(Arc::new(storage))
|
||||
}
|
||||
|
||||
pub fn from_config(
|
||||
working_directory: PathBuf,
|
||||
storage_config: &RemoteStorageConfig,
|
||||
) -> anyhow::Result<GenericRemoteStorage> {
|
||||
Ok(match &storage_config.storage {
|
||||
RemoteStorageKind::LocalFs(root) => {
|
||||
info!("Using fs root '{}' as a remote storage", root.display());
|
||||
GenericRemoteStorage::LocalFs(LocalFs::new(root.clone())?)
|
||||
GenericRemoteStorage::new(LocalFs::new(root.clone(), working_directory)?)
|
||||
}
|
||||
RemoteStorageKind::AwsS3(s3_config) => {
|
||||
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
|
||||
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
||||
GenericRemoteStorage::AwsS3(Arc::new(S3Bucket::new(s3_config)?))
|
||||
GenericRemoteStorage::new(S3Bucket::new(s3_config, working_directory)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -194,12 +219,23 @@ impl GenericRemoteStorage {
|
||||
&self,
|
||||
from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
self.upload(from, from_size_bytes, to, None)
|
||||
let target_storage_path = self.remote_object_id(from_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the storage path for source local path '{}'",
|
||||
from_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
self.upload(from, from_size_bytes, &target_storage_path, None)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
|
||||
format!(
|
||||
"Failed to upload from '{}' to storage path '{:?}'",
|
||||
from_path.display(),
|
||||
target_storage_path
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -208,11 +244,24 @@ impl GenericRemoteStorage {
|
||||
pub async fn download_storage_object(
|
||||
&self,
|
||||
byte_range: Option<(u64, Option<u64>)>,
|
||||
from: &RemotePath,
|
||||
to_path: &Path,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let remote_object_path = self
|
||||
.remote_object_id(to_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the storage path for target local path '{}'",
|
||||
to_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
|
||||
match byte_range {
|
||||
Some((start, end)) => self.download_byte_range(from, start, end).await,
|
||||
None => self.download(from).await,
|
||||
Some((start, end)) => {
|
||||
self.download_byte_range(&remote_object_path, start, end)
|
||||
.await
|
||||
}
|
||||
None => self.download(&remote_object_path).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -222,6 +271,23 @@ impl GenericRemoteStorage {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct StorageMetadata(HashMap<String, String>);
|
||||
|
||||
fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
|
||||
if prefix == path {
|
||||
anyhow::bail!(
|
||||
"Prefix and the path are equal, cannot strip: '{}'",
|
||||
prefix.display()
|
||||
)
|
||||
} else {
|
||||
path.strip_prefix(prefix).with_context(|| {
|
||||
format!(
|
||||
"Path '{}' is not prefixed with '{}'",
|
||||
path.display(),
|
||||
prefix.display(),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// External backup storage configuration, enough for creating a client for that storage.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RemoteStorageConfig {
|
||||
@@ -365,24 +431,21 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_object_name() {
|
||||
let k = RemotePath::new(Path::new("a/b/c")).unwrap();
|
||||
fn object_name() {
|
||||
let k = RemoteObjectId("a/b/c".to_owned());
|
||||
assert_eq!(k.object_name(), Some("c"));
|
||||
|
||||
let k = RemotePath::new(Path::new("a/b/c/")).unwrap();
|
||||
let k = RemoteObjectId("a/b/c/".to_owned());
|
||||
assert_eq!(k.object_name(), Some("c"));
|
||||
|
||||
let k = RemotePath::new(Path::new("a/")).unwrap();
|
||||
let k = RemoteObjectId("a/".to_owned());
|
||||
assert_eq!(k.object_name(), Some("a"));
|
||||
|
||||
// XXX is it impossible to have an empty key?
|
||||
let k = RemotePath::new(Path::new("")).unwrap();
|
||||
let k = RemoteObjectId("".to_owned());
|
||||
assert_eq!(k.object_name(), None);
|
||||
|
||||
let k = RemoteObjectId("/".to_owned());
|
||||
assert_eq!(k.object_name(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rempte_path_cannot_be_created_from_absolute_ones() {
|
||||
let err = RemotePath::new(Path::new("/")).expect_err("Should fail on absolute paths");
|
||||
assert_eq!(err.to_string(), "Path \"/\" is not relative");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
//! volume is mounted to the local FS.
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
future::Future,
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
@@ -19,33 +18,60 @@ use tokio::{
|
||||
use tracing::*;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use crate::{Download, DownloadError, RemotePath};
|
||||
use crate::{Download, DownloadError, RemoteObjectId};
|
||||
|
||||
use super::{RemoteStorage, StorageMetadata};
|
||||
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
|
||||
|
||||
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Convert a Path in the remote storage into a RemoteObjectId
|
||||
fn remote_object_id_from_path(path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
Ok(RemoteObjectId(
|
||||
path.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("unexpected characters found in path"))?
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub struct LocalFs {
|
||||
working_directory: PathBuf,
|
||||
storage_root: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalFs {
|
||||
/// Attempts to create local FS storage, along with its root directory.
|
||||
/// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
|
||||
pub fn new(mut storage_root: PathBuf) -> anyhow::Result<Self> {
|
||||
if !storage_root.exists() {
|
||||
std::fs::create_dir_all(&storage_root).with_context(|| {
|
||||
format!("Failed to create all directories in the given root path {storage_root:?}")
|
||||
})?;
|
||||
}
|
||||
if !storage_root.is_absolute() {
|
||||
storage_root = storage_root.canonicalize().with_context(|| {
|
||||
format!("Failed to represent path {storage_root:?} as an absolute path")
|
||||
pub fn new(root: PathBuf, working_directory: PathBuf) -> anyhow::Result<Self> {
|
||||
if !root.exists() {
|
||||
std::fs::create_dir_all(&root).with_context(|| {
|
||||
format!(
|
||||
"Failed to create all directories in the given root path '{}'",
|
||||
root.display(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Ok(Self {
|
||||
working_directory,
|
||||
storage_root: root,
|
||||
})
|
||||
}
|
||||
|
||||
Ok(Self { storage_root })
|
||||
///
|
||||
/// Get the absolute path in the local filesystem to given remote object.
|
||||
///
|
||||
/// This is public so that it can be used in tests. Should not be used elsewhere.
|
||||
///
|
||||
pub fn resolve_in_storage(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
let path = PathBuf::from(&remote_object_id.0);
|
||||
if path.is_relative() {
|
||||
Ok(self.storage_root.join(path))
|
||||
} else if path.starts_with(&self.storage_root) {
|
||||
Ok(path)
|
||||
} else {
|
||||
bail!(
|
||||
"Path '{}' does not belong to the current storage",
|
||||
path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_storage_metadata(
|
||||
@@ -77,48 +103,45 @@ impl LocalFs {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RemoteStorage for LocalFs {
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
|
||||
Ok(get_all_files(&self.storage_root, true)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|path| {
|
||||
path.strip_prefix(&self.storage_root)
|
||||
.context("Failed to strip storage root prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.expect(
|
||||
"We list files for storage root, hence should be able to remote the prefix",
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
/// Convert a "local" path into a "remote path"
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
let path = self.storage_root.join(
|
||||
strip_path_prefix(&self.working_directory, local_path)
|
||||
.context("local path does not belong to this storage")?,
|
||||
);
|
||||
remote_object_id_from_path(&path)
|
||||
}
|
||||
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
let storage_path = PathBuf::from(&remote_object_id.0);
|
||||
let relative_path = strip_path_prefix(&self.storage_root, &storage_path)
|
||||
.context("local path does not belong to this storage")?;
|
||||
Ok(self.working_directory.join(relative_path))
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
get_all_files(&self.storage_root, true).await
|
||||
}
|
||||
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let path = match prefix {
|
||||
Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
|
||||
None => Cow::Borrowed(&self.storage_root),
|
||||
Some(prefix) => Path::new(&prefix.0),
|
||||
None => &self.storage_root,
|
||||
};
|
||||
Ok(get_all_files(path.as_ref(), false)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|path| {
|
||||
path.strip_prefix(&self.storage_root)
|
||||
.context("Failed to strip preifix")
|
||||
.and_then(RemotePath::new)
|
||||
.expect(
|
||||
"We list files for storage root, hence should be able to remote the prefix",
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
get_all_files(path, false).await
|
||||
}
|
||||
|
||||
async fn upload(
|
||||
&self,
|
||||
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
let target_file_path = to.with_base(&self.storage_root);
|
||||
let target_file_path = self.resolve_in_storage(to)?;
|
||||
create_target_directory(&target_file_path).await?;
|
||||
// We need this dance with sort of durable rename (without fsyncs)
|
||||
// to prevent partial uploads. This was really hit when pageserver shutdown
|
||||
@@ -139,8 +162,8 @@ impl RemoteStorage for LocalFs {
|
||||
})?,
|
||||
);
|
||||
|
||||
let from_size_bytes = data_size_bytes as u64;
|
||||
let mut buffer_to_read = data.take(from_size_bytes);
|
||||
let from_size_bytes = from_size_bytes as u64;
|
||||
let mut buffer_to_read = from.take(from_size_bytes);
|
||||
|
||||
let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
|
||||
.await
|
||||
@@ -197,22 +220,27 @@ impl RemoteStorage for LocalFs {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
|
||||
let file_path = self
|
||||
.resolve_in_storage(from)
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
|
||||
let source = io::BufReader::new(
|
||||
fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.open(&file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
format!(
|
||||
"Failed to open source file '{}' to use in the download",
|
||||
file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
.read_storage_metadata(&file_path)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(Download {
|
||||
@@ -226,7 +254,7 @@ impl RemoteStorage for LocalFs {
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError> {
|
||||
@@ -238,15 +266,20 @@ impl RemoteStorage for LocalFs {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
|
||||
}
|
||||
}
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
|
||||
let file_path = self
|
||||
.resolve_in_storage(from)
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
|
||||
let mut source = io::BufReader::new(
|
||||
fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.open(&file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
format!(
|
||||
"Failed to open source file '{}' to use in the download",
|
||||
file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
@@ -256,7 +289,7 @@ impl RemoteStorage for LocalFs {
|
||||
.context("Failed to seek to the range start in a local storage file")
|
||||
.map_err(DownloadError::Other)?;
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
.read_storage_metadata(&file_path)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
@@ -275,12 +308,15 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||
let file_path = path.with_base(&self.storage_root);
|
||||
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()> {
|
||||
let file_path = self.resolve_in_storage(path)?;
|
||||
if file_path.exists() && file_path.is_file() {
|
||||
Ok(fs::remove_file(file_path).await?)
|
||||
} else {
|
||||
bail!("File {file_path:?} either does not exist or is not a file")
|
||||
bail!(
|
||||
"File '{}' either does not exist or is not a file",
|
||||
file_path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,7 +332,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
|
||||
fn get_all_files<'a, P>(
|
||||
directory_path: P,
|
||||
recursive: bool,
|
||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
|
||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<RemoteObjectId>>> + Send + Sync + 'a>>
|
||||
where
|
||||
P: AsRef<Path> + Send + Sync + 'a,
|
||||
{
|
||||
@@ -310,20 +346,20 @@ where
|
||||
let file_type = dir_entry.file_type().await?;
|
||||
let entry_path = dir_entry.path();
|
||||
if file_type.is_symlink() {
|
||||
debug!("{entry_path:?} us a symlink, skipping")
|
||||
debug!("{:?} us a symlink, skipping", entry_path)
|
||||
} else if file_type.is_dir() {
|
||||
if recursive {
|
||||
paths.extend(get_all_files(&entry_path, true).await?.into_iter())
|
||||
} else {
|
||||
paths.push(entry_path)
|
||||
paths.push(remote_object_id_from_path(&dir_entry.path())?)
|
||||
}
|
||||
} else {
|
||||
paths.push(entry_path);
|
||||
paths.push(remote_object_id_from_path(&dir_entry.path())?);
|
||||
}
|
||||
}
|
||||
Ok(paths)
|
||||
} else {
|
||||
bail!("Path {directory_path:?} is not a directory")
|
||||
bail!("Path '{}' is not a directory", directory_path.display())
|
||||
}
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
@@ -358,6 +394,173 @@ fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod pure_tests {
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let local_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("file_name");
|
||||
let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);
|
||||
|
||||
let actual_path = PathBuf::from(
|
||||
storage
|
||||
.remote_object_id(&local_path)
|
||||
.expect("Matching path should map to storage path normally")
|
||||
.0,
|
||||
);
|
||||
assert_eq!(
|
||||
expected_path,
|
||||
actual_path,
|
||||
"File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
|
||||
match storage.remote_object_id(mismatching_path) {
|
||||
Ok(wrong_path) => panic!(
|
||||
"Expected path '{}' to error, but got storage path: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_path,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let error_string = storage_path_error(&storage, &workdir);
|
||||
assert!(error_string.contains("does not belong to this storage"));
|
||||
assert!(error_string.contains(workdir.to_str().unwrap()));
|
||||
|
||||
let mismatching_path_str = "/something/else";
|
||||
let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
|
||||
assert!(
|
||||
error_message.contains(mismatching_path_str),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(error_message.contains("does not belong to this storage"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let name = "not a metadata";
|
||||
let local_path = workdir.join("timelines").join("some_timeline").join(name);
|
||||
assert_eq!(
|
||||
local_path,
|
||||
storage
|
||||
.local_path(&remote_object_id_from_path(
|
||||
&storage_root.join(local_path.strip_prefix(&workdir)?)
|
||||
)?)
|
||||
.expect("For a valid input, valid local path should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta file"
|
||||
);
|
||||
|
||||
let local_metadata_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("metadata");
|
||||
let remote_metadata_path = storage.remote_object_id(&local_metadata_path)?;
|
||||
assert_eq!(
|
||||
local_metadata_path,
|
||||
storage
|
||||
.local_path(&remote_metadata_path)
|
||||
.expect("For a valid input, valid local path should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn local_path_error(storage: &LocalFs, storage_path: &RemoteObjectId) -> String {
|
||||
match storage.local_path(storage_path) {
|
||||
Ok(wrong_path) => panic!(
|
||||
"Expected local path input {:?} to cause an error, but got file path: {:?}",
|
||||
storage_path, wrong_path,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: tempdir()?.path().to_owned(),
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let totally_wrong_path = "wrong_wrong_wrong";
|
||||
let error_message =
|
||||
local_path_error(&storage, &RemoteObjectId(totally_wrong_path.to_string()));
|
||||
assert!(error_message.contains(totally_wrong_path));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let original_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("some name");
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let dummy_storage = LocalFs {
|
||||
working_directory: workdir,
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let storage_path = dummy_storage.remote_object_id(&original_path)?;
|
||||
let download_destination = dummy_storage.local_path(&storage_path)?;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod fs_tests {
|
||||
use super::*;
|
||||
@@ -369,7 +572,7 @@ mod fs_tests {
|
||||
storage: &LocalFs,
|
||||
#[allow(clippy::ptr_arg)]
|
||||
// have to use &PathBuf due to `storage.local_path` parameter requirements
|
||||
remote_storage_path: &RemotePath,
|
||||
remote_storage_path: &RemoteObjectId,
|
||||
expected_metadata: Option<&StorageMetadata>,
|
||||
) -> anyhow::Result<String> {
|
||||
let mut download = storage
|
||||
@@ -392,16 +595,41 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn upload_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = create_storage()?;
|
||||
|
||||
let target_path_1 = upload_dummy_file(&storage, "upload_1", None).await?;
|
||||
let (file, size) = create_file_for_upload(
|
||||
&storage.working_directory.join("whatever"),
|
||||
"whatever_contents",
|
||||
)
|
||||
.await?;
|
||||
let target_path = "/somewhere/else";
|
||||
match storage
|
||||
.upload(
|
||||
Box::new(file),
|
||||
size,
|
||||
&RemoteObjectId(target_path.to_string()),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => panic!("Should not allow storing files with wrong target path"),
|
||||
Err(e) => {
|
||||
let message = format!("{:?}", e);
|
||||
assert!(message.contains(target_path));
|
||||
assert!(message.contains("does not belong to the current storage"));
|
||||
}
|
||||
}
|
||||
assert!(storage.list().await?.is_empty());
|
||||
|
||||
let target_path_1 = upload_dummy_file(&workdir, &storage, "upload_1", None).await?;
|
||||
assert_eq!(
|
||||
storage.list().await?,
|
||||
vec![target_path_1.clone()],
|
||||
"Should list a single file after first upload"
|
||||
);
|
||||
|
||||
let target_path_2 = upload_dummy_file(&storage, "upload_2", None).await?;
|
||||
let target_path_2 = upload_dummy_file(&workdir, &storage, "upload_2", None).await?;
|
||||
assert_eq!(
|
||||
list_files_sorted(&storage).await?,
|
||||
vec![target_path_1.clone(), target_path_2.clone()],
|
||||
@@ -415,7 +643,7 @@ mod fs_tests {
|
||||
async fn upload_file_negatives() -> anyhow::Result<()> {
|
||||
let storage = create_storage()?;
|
||||
|
||||
let id = RemotePath::new(Path::new("dummy"))?;
|
||||
let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?;
|
||||
let content = std::io::Cursor::new(b"12345");
|
||||
|
||||
// Check that you get an error if the size parameter doesn't match the actual
|
||||
@@ -440,14 +668,16 @@ mod fs_tests {
|
||||
}
|
||||
|
||||
fn create_storage() -> anyhow::Result<LocalFs> {
|
||||
LocalFs::new(tempdir()?.path().to_owned())
|
||||
LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
|
||||
assert_eq!(
|
||||
@@ -457,7 +687,7 @@ mod fs_tests {
|
||||
);
|
||||
|
||||
let non_existing_path = "somewhere/else";
|
||||
match storage.download(&RemotePath::new(Path::new(non_existing_path))?).await {
|
||||
match storage.download(&RemoteObjectId(non_existing_path.to_string())).await {
|
||||
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
||||
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
|
||||
}
|
||||
@@ -466,9 +696,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file_range_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let full_range_download_contents =
|
||||
read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
|
||||
@@ -534,9 +766,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file_range_negative() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let start = 1_000_000_000;
|
||||
let end = start + 1;
|
||||
@@ -578,9 +812,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
storage.delete(&upload_target).await?;
|
||||
assert!(storage.list().await?.is_empty());
|
||||
@@ -590,8 +826,7 @@ mod fs_tests {
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("does not exist"));
|
||||
let expected_path = upload_target.with_base(&storage.storage_root);
|
||||
assert!(error_string.contains(expected_path.to_str().unwrap()));
|
||||
assert!(error_string.contains(&upload_target.0));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -599,6 +834,8 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn file_with_metadata() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let metadata = StorageMetadata(HashMap::from([
|
||||
@@ -606,7 +843,7 @@ mod fs_tests {
|
||||
("two".to_string(), "2".to_string()),
|
||||
]));
|
||||
let upload_target =
|
||||
upload_dummy_file(&storage, upload_name, Some(metadata.clone())).await?;
|
||||
upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;
|
||||
|
||||
let full_range_download_contents =
|
||||
read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
|
||||
@@ -646,32 +883,23 @@ mod fs_tests {
|
||||
}
|
||||
|
||||
async fn upload_dummy_file(
|
||||
workdir: &Path,
|
||||
storage: &LocalFs,
|
||||
name: &str,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<RemotePath> {
|
||||
let from_path = storage
|
||||
.storage_root
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join(name);
|
||||
) -> anyhow::Result<RemoteObjectId> {
|
||||
let timeline_path = workdir.join("timelines").join("some_timeline");
|
||||
let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
|
||||
let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
|
||||
let remote_object_id = RemoteObjectId(storage_path.to_str().unwrap().to_string());
|
||||
|
||||
let from_path = storage.working_directory.join(name);
|
||||
let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
|
||||
|
||||
let relative_path = from_path
|
||||
.strip_prefix(&storage.storage_root)
|
||||
.context("Failed to strip storage root prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
from_path, storage.storage_root
|
||||
)
|
||||
})?;
|
||||
|
||||
storage
|
||||
.upload(Box::new(file), size, &relative_path, metadata)
|
||||
.upload(Box::new(file), size, &remote_object_id, metadata)
|
||||
.await?;
|
||||
Ok(relative_path)
|
||||
remote_object_id_from_path(&storage_path)
|
||||
}
|
||||
|
||||
async fn create_file_for_upload(
|
||||
@@ -696,7 +924,7 @@ mod fs_tests {
|
||||
format!("contents for {name}")
|
||||
}
|
||||
|
||||
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {
|
||||
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let mut files = storage.list().await?;
|
||||
files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
Ok(files)
|
||||
|
||||
@@ -4,34 +4,27 @@
|
||||
//! allowing multiple api users to independently work with the same S3 bucket, if
|
||||
//! their bucket prefixes are both specified and different.
|
||||
|
||||
use std::env::var;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::Context;
|
||||
use aws_config::{
|
||||
environment::credentials::EnvironmentVariableCredentialsProvider, imds,
|
||||
imds::credentials::ImdsCredentialsProvider, meta::credentials::provide_credentials_fn,
|
||||
use rusoto_core::{
|
||||
credential::{InstanceMetadataProvider, StaticProvider},
|
||||
HttpClient, Region, RusotoError,
|
||||
};
|
||||
use aws_sdk_s3::{
|
||||
config::Config,
|
||||
error::{GetObjectError, GetObjectErrorKind},
|
||||
types::{ByteStream, SdkError},
|
||||
Client, Endpoint, Region,
|
||||
use rusoto_s3::{
|
||||
DeleteObjectRequest, GetObjectError, GetObjectRequest, ListObjectsV2Request, PutObjectRequest,
|
||||
S3Client, StreamingBody, S3,
|
||||
};
|
||||
use aws_smithy_http::body::SdkBody;
|
||||
use aws_types::credentials::{CredentialsError, ProvideCredentials};
|
||||
use hyper::Body;
|
||||
use tokio::{io, sync::Semaphore};
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tracing::debug;
|
||||
|
||||
use super::StorageMetadata;
|
||||
use crate::{
|
||||
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
const DEFAULT_IMDS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
use super::StorageMetadata;
|
||||
|
||||
pub(super) mod metrics {
|
||||
use metrics::{register_int_counter_vec, IntCounterVec};
|
||||
@@ -98,9 +91,32 @@ pub(super) mod metrics {
|
||||
}
|
||||
}
|
||||
|
||||
fn download_destination(
|
||||
id: &RemoteObjectId,
|
||||
workdir: &Path,
|
||||
prefix_to_strip: Option<&str>,
|
||||
) -> PathBuf {
|
||||
let path_without_prefix = match prefix_to_strip {
|
||||
Some(prefix) => id.0.strip_prefix(prefix).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Could not strip prefix '{}' from S3 object key '{}'",
|
||||
prefix, id.0
|
||||
)
|
||||
}),
|
||||
None => &id.0,
|
||||
};
|
||||
|
||||
workdir.join(
|
||||
path_without_prefix
|
||||
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.collect::<PathBuf>(),
|
||||
)
|
||||
}
|
||||
|
||||
/// AWS S3 storage.
|
||||
pub struct S3Bucket {
|
||||
client: Client,
|
||||
workdir: PathBuf,
|
||||
client: S3Client,
|
||||
bucket_name: String,
|
||||
prefix_in_bucket: Option<String>,
|
||||
// Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
|
||||
@@ -109,53 +125,50 @@ pub struct S3Bucket {
|
||||
concurrency_limiter: Semaphore,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct GetObjectRequest {
|
||||
bucket: String,
|
||||
key: String,
|
||||
range: Option<String>,
|
||||
}
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
|
||||
pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result<Self> {
|
||||
debug!(
|
||||
"Creating s3 remote storage for S3 bucket {}",
|
||||
aws_config.bucket_name
|
||||
);
|
||||
let mut config_builder = Config::builder()
|
||||
.region(Region::new(aws_config.bucket_region.clone()))
|
||||
.credentials_provider(provide_credentials_fn(|| async {
|
||||
match var("AWS_ACCESS_KEY_ID").is_ok() && var("AWS_SECRET_ACCESS_KEY").is_ok() {
|
||||
true => {
|
||||
EnvironmentVariableCredentialsProvider::new()
|
||||
.provide_credentials()
|
||||
.await
|
||||
}
|
||||
false => {
|
||||
let imds_client = imds::Client::builder()
|
||||
.connect_timeout(DEFAULT_IMDS_TIMEOUT)
|
||||
.read_timeout(DEFAULT_IMDS_TIMEOUT)
|
||||
.build()
|
||||
.await
|
||||
.map_err(CredentialsError::unhandled)?;
|
||||
ImdsCredentialsProvider::builder()
|
||||
.imds_client(imds_client)
|
||||
.build()
|
||||
.provide_credentials()
|
||||
.await
|
||||
}
|
||||
}
|
||||
}));
|
||||
let region = match aws_config.endpoint.clone() {
|
||||
Some(custom_endpoint) => Region::Custom {
|
||||
name: aws_config.bucket_region.clone(),
|
||||
endpoint: custom_endpoint,
|
||||
},
|
||||
None => aws_config
|
||||
.bucket_region
|
||||
.parse::<Region>()
|
||||
.context("Failed to parse the s3 region from config")?,
|
||||
};
|
||||
let request_dispatcher = HttpClient::new().context("Failed to create S3 http client")?;
|
||||
|
||||
if let Some(custom_endpoint) = aws_config.endpoint.clone() {
|
||||
let endpoint = Endpoint::immutable(
|
||||
custom_endpoint
|
||||
.parse()
|
||||
.expect("Failed to parse S3 custom endpoint"),
|
||||
let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").ok();
|
||||
let secret_access_key = std::env::var("AWS_SECRET_ACCESS_KEY").ok();
|
||||
// session token is used when authorizing through sso
|
||||
// which is typically the case when testing locally on developer machine
|
||||
let session_token = std::env::var("AWS_SESSION_TOKEN").ok();
|
||||
|
||||
let client = if access_key_id.is_none() && secret_access_key.is_none() {
|
||||
debug!("Using IAM-based AWS access");
|
||||
S3Client::new_with(request_dispatcher, InstanceMetadataProvider::new(), region)
|
||||
} else {
|
||||
debug!(
|
||||
"Using credentials-based AWS access. Session token is set: {}",
|
||||
session_token.is_some()
|
||||
);
|
||||
config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
|
||||
}
|
||||
let client = Client::from_conf(config_builder.build());
|
||||
S3Client::new_with(
|
||||
request_dispatcher,
|
||||
StaticProvider::new(
|
||||
access_key_id.unwrap_or_default(),
|
||||
secret_access_key.unwrap_or_default(),
|
||||
session_token,
|
||||
None,
|
||||
),
|
||||
region,
|
||||
)
|
||||
};
|
||||
|
||||
let prefix_in_bucket = aws_config.prefix_in_bucket.as_deref().map(|prefix| {
|
||||
let mut prefix = prefix;
|
||||
@@ -169,41 +182,16 @@ impl S3Bucket {
|
||||
}
|
||||
prefix
|
||||
});
|
||||
|
||||
Ok(Self {
|
||||
client,
|
||||
workdir,
|
||||
bucket_name: aws_config.bucket_name.clone(),
|
||||
prefix_in_bucket,
|
||||
concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
|
||||
})
|
||||
}
|
||||
|
||||
fn s3_object_to_relative_path(&self, key: &str) -> RemotePath {
|
||||
let relative_path =
|
||||
match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {
|
||||
Some(stripped) => stripped,
|
||||
// we rely on AWS to return properly prefixed paths
|
||||
// for requests with a certain prefix
|
||||
None => panic!(
|
||||
"Key {} does not start with bucket prefix {:?}",
|
||||
key, self.prefix_in_bucket
|
||||
),
|
||||
};
|
||||
RemotePath(
|
||||
relative_path
|
||||
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn relative_path_to_s3_object(&self, path: &RemotePath) -> String {
|
||||
let mut full_path = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
for segment in path.0.iter() {
|
||||
full_path.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
full_path.push_str(segment.to_str().unwrap_or_default());
|
||||
}
|
||||
full_path
|
||||
}
|
||||
|
||||
async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
|
||||
let _guard = self
|
||||
.concurrency_limiter
|
||||
@@ -214,33 +202,20 @@ impl S3Bucket {
|
||||
|
||||
metrics::inc_get_object();
|
||||
|
||||
let get_object = self
|
||||
.client
|
||||
.get_object()
|
||||
.bucket(request.bucket)
|
||||
.key(request.key)
|
||||
.set_range(request.range)
|
||||
.send()
|
||||
.await;
|
||||
|
||||
match get_object {
|
||||
Ok(object_output) => {
|
||||
let metadata = object_output.metadata().cloned().map(StorageMetadata);
|
||||
Ok(Download {
|
||||
metadata,
|
||||
download_stream: Box::pin(io::BufReader::new(
|
||||
object_output.body.into_async_read(),
|
||||
)),
|
||||
})
|
||||
}
|
||||
Err(SdkError::ServiceError {
|
||||
err:
|
||||
GetObjectError {
|
||||
kind: GetObjectErrorKind::NoSuchKey(..),
|
||||
..
|
||||
},
|
||||
..
|
||||
}) => Err(DownloadError::NotFound),
|
||||
match self.client.get_object(request).await {
|
||||
Ok(object_output) => match object_output.body {
|
||||
None => {
|
||||
metrics::inc_get_object_fail();
|
||||
Err(DownloadError::Other(anyhow::anyhow!(
|
||||
"Got no body for the S3 object given"
|
||||
)))
|
||||
}
|
||||
Some(body) => Ok(Download {
|
||||
metadata: object_output.metadata.map(StorageMetadata),
|
||||
download_stream: Box::pin(io::BufReader::new(body.into_async_read())),
|
||||
}),
|
||||
},
|
||||
Err(RusotoError::Service(GetObjectError::NoSuchKey(_))) => Err(DownloadError::NotFound),
|
||||
Err(e) => {
|
||||
metrics::inc_get_object_fail();
|
||||
Err(DownloadError::Other(anyhow::anyhow!(
|
||||
@@ -253,7 +228,25 @@ impl S3Bucket {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RemoteStorage for S3Bucket {
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
let relative_path = strip_path_prefix(&self.workdir, local_path)?;
|
||||
let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
for segment in relative_path {
|
||||
key.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
key.push_str(&segment.to_string_lossy());
|
||||
}
|
||||
Ok(RemoteObjectId(key))
|
||||
}
|
||||
|
||||
fn local_path(&self, storage_path: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
Ok(download_destination(
|
||||
storage_path,
|
||||
&self.workdir,
|
||||
self.prefix_in_bucket.as_deref(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let mut document_keys = Vec::new();
|
||||
|
||||
let mut continuation_token = None;
|
||||
@@ -268,11 +261,12 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
let fetch_response = self
|
||||
.client
|
||||
.list_objects_v2()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(self.prefix_in_bucket.clone())
|
||||
.set_continuation_token(continuation_token)
|
||||
.send()
|
||||
.list_objects_v2(ListObjectsV2Request {
|
||||
bucket: self.bucket_name.clone(),
|
||||
prefix: self.prefix_in_bucket.clone(),
|
||||
continuation_token,
|
||||
..ListObjectsV2Request::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_list_objects_fail();
|
||||
@@ -283,7 +277,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.contents
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
|
||||
.filter_map(|o| Some(RemoteObjectId(o.key?))),
|
||||
);
|
||||
|
||||
match fetch_response.continuation_token {
|
||||
@@ -297,10 +291,13 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
/// See the doc for `RemoteStorage::list_prefixes`
|
||||
/// Note: it wont include empty "directories"
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.map(|p| p.0.clone())
|
||||
.or_else(|| self.prefix_in_bucket.clone())
|
||||
.map(|mut p| {
|
||||
// required to end with a separator
|
||||
@@ -325,12 +322,13 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
let fetch_response = self
|
||||
.client
|
||||
.list_objects_v2()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.set_prefix(list_prefix.clone())
|
||||
.set_continuation_token(continuation_token)
|
||||
.delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string())
|
||||
.send()
|
||||
.list_objects_v2(ListObjectsV2Request {
|
||||
bucket: self.bucket_name.clone(),
|
||||
prefix: list_prefix.clone(),
|
||||
continuation_token,
|
||||
delimiter: Some(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
|
||||
..ListObjectsV2Request::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_list_objects_fail();
|
||||
@@ -342,7 +340,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.common_prefixes
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
|
||||
.filter_map(|o| Some(RemoteObjectId(o.prefix?))),
|
||||
);
|
||||
|
||||
match fetch_response.continuation_token {
|
||||
@@ -358,7 +356,7 @@ impl RemoteStorage for S3Bucket {
|
||||
&self,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
let _guard = self
|
||||
@@ -368,18 +366,17 @@ impl RemoteStorage for S3Bucket {
|
||||
.context("Concurrency limiter semaphore got closed during S3 upload")?;
|
||||
|
||||
metrics::inc_put_object();
|
||||
|
||||
let body = Body::wrap_stream(ReaderStream::new(from));
|
||||
let bytes_stream = ByteStream::new(SdkBody::from(body));
|
||||
|
||||
self.client
|
||||
.put_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(self.relative_path_to_s3_object(to))
|
||||
.set_metadata(metadata.map(|m| m.0))
|
||||
.content_length(from_size_bytes.try_into()?)
|
||||
.body(bytes_stream)
|
||||
.send()
|
||||
.put_object(PutObjectRequest {
|
||||
body: Some(StreamingBody::new_with_size(
|
||||
ReaderStream::new(from),
|
||||
from_size_bytes,
|
||||
)),
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: to.0.to_owned(),
|
||||
metadata: metadata.map(|m| m.0),
|
||||
..PutObjectRequest::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_put_object_fail();
|
||||
@@ -388,10 +385,10 @@ impl RemoteStorage for S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
|
||||
self.download_object(GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
key: from.0.to_owned(),
|
||||
..GetObjectRequest::default()
|
||||
})
|
||||
.await
|
||||
@@ -399,7 +396,7 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError> {
|
||||
@@ -407,19 +404,20 @@ impl RemoteStorage for S3Bucket {
|
||||
// and needs both ends to be exclusive
|
||||
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
|
||||
let range = Some(match end_inclusive {
|
||||
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
|
||||
None => format!("bytes={start_inclusive}-"),
|
||||
Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
|
||||
None => format!("bytes={}-", start_inclusive),
|
||||
});
|
||||
|
||||
self.download_object(GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
key: from.0.to_owned(),
|
||||
range,
|
||||
..GetObjectRequest::default()
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||
async fn delete(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<()> {
|
||||
let _guard = self
|
||||
.concurrency_limiter
|
||||
.acquire()
|
||||
@@ -429,10 +427,11 @@ impl RemoteStorage for S3Bucket {
|
||||
metrics::inc_delete_object();
|
||||
|
||||
self.client
|
||||
.delete_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(self.relative_path_to_s3_object(path))
|
||||
.send()
|
||||
.delete_object(DeleteObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: remote_object_id.0.to_owned(),
|
||||
..DeleteObjectRequest::default()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| {
|
||||
metrics::inc_delete_object_fail();
|
||||
@@ -441,3 +440,181 @@ impl RemoteStorage for S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_download_destination() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let local_path = workdir.join("one").join("two").join("test_name");
|
||||
let relative_path = local_path.strip_prefix(&workdir)?;
|
||||
|
||||
let key = RemoteObjectId(format!(
|
||||
"{}{}",
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
relative_path
|
||||
.iter()
|
||||
.map(|segment| segment.to_str().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
.join(&REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
local_path,
|
||||
download_destination(&key, &workdir, None),
|
||||
"Download destination should consist of s3 path joined with the workdir prefix"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let segment_1 = "matching";
|
||||
let segment_2 = "file";
|
||||
let local_path = &workdir.join(segment_1).join(segment_2);
|
||||
|
||||
let storage = dummy_storage(workdir);
|
||||
|
||||
let expected_key = RemoteObjectId(format!(
|
||||
"{}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_1}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_2}",
|
||||
storage.prefix_in_bucket.as_deref().unwrap_or_default(),
|
||||
));
|
||||
|
||||
let actual_key = storage
|
||||
.remote_object_id(local_path)
|
||||
.expect("Matching path should map to S3 path normally");
|
||||
assert_eq!(
|
||||
expected_key,
|
||||
actual_key,
|
||||
"S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &S3Bucket, mismatching_path: &Path) -> String {
|
||||
match storage.remote_object_id(mismatching_path) {
|
||||
Ok(wrong_key) => panic!(
|
||||
"Expected path '{}' to error, but got S3 key: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_key,
|
||||
),
|
||||
Err(e) => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = dummy_storage(workdir.clone());
|
||||
|
||||
let error_message = storage_path_error(&storage, &workdir);
|
||||
assert!(
|
||||
error_message.contains("Prefix and the path are equal"),
|
||||
"Message '{}' does not contain the required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
let mismatching_path = PathBuf::from("somewhere").join("else");
|
||||
let error_message = storage_path_error(&storage, &mismatching_path);
|
||||
assert!(
|
||||
error_message.contains(mismatching_path.to_str().unwrap()),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("is not prefixed with"),
|
||||
"Message '{}' does not contain a required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = dummy_storage(workdir.clone());
|
||||
let timeline_dir = workdir.join("timelines").join("test_timeline");
|
||||
let relative_timeline_path = timeline_dir.strip_prefix(&workdir)?;
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join("not a metadata"),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
assert_eq!(
|
||||
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta file"
|
||||
);
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join("metadata"),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
assert_eq!(
|
||||
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let original_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("some name");
|
||||
|
||||
let dummy_storage = dummy_storage(workdir);
|
||||
|
||||
let key = dummy_storage.remote_object_id(&original_path)?;
|
||||
let download_destination = dummy_storage.local_path(&key)?;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dummy_storage(workdir: PathBuf) -> S3Bucket {
|
||||
S3Bucket {
|
||||
workdir,
|
||||
client: S3Client::new("us-east-1".parse().unwrap()),
|
||||
bucket_name: "dummy-bucket".to_string(),
|
||||
prefix_in_bucket: Some("dummy_prefix/".to_string()),
|
||||
concurrency_limiter: Semaphore::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> RemoteObjectId {
|
||||
RemoteObjectId(relative_file_path.iter().fold(
|
||||
prefix.unwrap_or_default().to_string(),
|
||||
|mut path_string, segment| {
|
||||
path_string.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
path_string.push_str(segment.to_str().unwrap());
|
||||
path_string
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
sentry = "0.29.0"
|
||||
async-trait = "0.1"
|
||||
anyhow = "1.0"
|
||||
bincode = "1.3"
|
||||
|
||||
@@ -34,7 +34,6 @@ pub mod sock_split;
|
||||
pub mod logging;
|
||||
|
||||
pub mod lock_file;
|
||||
pub mod pid_file;
|
||||
|
||||
// Misc
|
||||
pub mod accum;
|
||||
@@ -47,7 +46,6 @@ pub mod tcp_listener;
|
||||
pub mod nonblock;
|
||||
|
||||
// Default signal handling
|
||||
pub mod sentry_init;
|
||||
pub mod signals;
|
||||
|
||||
pub mod fs_ext;
|
||||
|
||||
@@ -1,133 +1,81 @@
|
||||
//! A module to create and read lock files.
|
||||
//! A module to create and read lock files. A lock file ensures that only one
|
||||
//! process is running at a time, in a particular directory.
|
||||
//!
|
||||
//! File locking is done using [`fcntl::flock`] exclusive locks.
|
||||
//! The only consumer of this module is currently [`pid_file`].
|
||||
//! See the module-level comment there for potential pitfalls
|
||||
//! with lock files that are used to store PIDs (pidfiles).
|
||||
//! File locking is done using [`fcntl::flock`], which means that holding the
|
||||
//! lock on file only prevents acquiring another lock on it; all other
|
||||
//! operations are still possible on files. Other process can still open, read,
|
||||
//! write, or remove the file, for example.
|
||||
//! If the file is removed while a process is holding a lock on it,
|
||||
//! the process that holds the lock does not get any error or notification.
|
||||
//! Furthermore, you can create a new file with the same name and lock the new file,
|
||||
//! while the old process is still running.
|
||||
//! Deleting the lock file while the locking process is still running is a bad idea!
|
||||
|
||||
use std::{
|
||||
fs,
|
||||
io::{Read, Write},
|
||||
ops::Deref,
|
||||
os::unix::prelude::AsRawFd,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use std::{fs, os::unix::prelude::AsRawFd, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::{errno::Errno::EAGAIN, fcntl};
|
||||
use nix::fcntl;
|
||||
|
||||
use crate::crashsafe;
|
||||
|
||||
/// A handle to an open and unlocked, but not-yet-written lock file.
|
||||
/// Returned by [`create_exclusive`].
|
||||
#[must_use]
|
||||
pub struct UnwrittenLockFile {
|
||||
path: PathBuf,
|
||||
file: fs::File,
|
||||
pub enum LockCreationResult {
|
||||
Created {
|
||||
new_lock_contents: String,
|
||||
file: fs::File,
|
||||
},
|
||||
AlreadyLocked {
|
||||
existing_lock_contents: String,
|
||||
},
|
||||
CreationFailed(anyhow::Error),
|
||||
}
|
||||
|
||||
/// Returned by [`UnwrittenLockFile::write_content`].
|
||||
#[must_use]
|
||||
pub struct LockFileGuard(fs::File);
|
||||
|
||||
impl Deref for LockFileGuard {
|
||||
type Target = fs::File;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl UnwrittenLockFile {
|
||||
/// Replace the content of this lock file with the byte representation of `contents`.
|
||||
pub fn write_content(mut self, contents: String) -> anyhow::Result<LockFileGuard> {
|
||||
self.file
|
||||
.set_len(0)
|
||||
.context("Failed to truncate lockfile")?;
|
||||
self.file
|
||||
.write_all(contents.as_bytes())
|
||||
.with_context(|| format!("Failed to write '{contents}' contents into lockfile"))?;
|
||||
crashsafe::fsync_file_and_parent(&self.path).context("fsync lockfile")?;
|
||||
Ok(LockFileGuard(self.file))
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates and opens a lock file in the path, grabs an exclusive flock on it, and returns
|
||||
/// a handle that allows overwriting the locked file's content.
|
||||
///
|
||||
/// The exclusive lock is released when dropping the returned handle.
|
||||
///
|
||||
/// It is not an error if the file already exists.
|
||||
/// It is an error if the file is already locked.
|
||||
pub fn create_exclusive(lock_file_path: &Path) -> anyhow::Result<UnwrittenLockFile> {
|
||||
let lock_file = fs::OpenOptions::new()
|
||||
/// Creates a lock file in the path given and writes the given contents into the file.
|
||||
/// Note: The lock is automatically released when the file closed. You might want to use Box::leak to make sure it lives until the end of the program.
|
||||
pub fn create_lock_file(lock_file_path: &Path, contents: String) -> LockCreationResult {
|
||||
let lock_file = match fs::OpenOptions::new()
|
||||
.create(true) // O_CREAT
|
||||
.write(true)
|
||||
.open(lock_file_path)
|
||||
.context("open lock file")?;
|
||||
|
||||
let res = fcntl::flock(
|
||||
lock_file.as_raw_fd(),
|
||||
fcntl::FlockArg::LockExclusiveNonblock,
|
||||
);
|
||||
match res {
|
||||
Ok(()) => Ok(UnwrittenLockFile {
|
||||
path: lock_file_path.to_owned(),
|
||||
file: lock_file,
|
||||
}),
|
||||
Err(EAGAIN) => anyhow::bail!("file is already locked"),
|
||||
Err(e) => Err(e).context("flock error"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returned by [`read_and_hold_lock_file`].
|
||||
/// Check out the [`pid_file`] module for what the variants mean
|
||||
/// and potential caveats if the lock files that are used to store PIDs.
|
||||
pub enum LockFileRead {
|
||||
/// No file exists at the given path.
|
||||
NotExist,
|
||||
/// No other process held the lock file, so we grabbed an flock
|
||||
/// on it and read its contents.
|
||||
/// Release the flock by dropping the [`LockFileGuard`].
|
||||
NotHeldByAnyProcess(LockFileGuard, String),
|
||||
/// The file exists but another process was holding an flock on it.
|
||||
LockedByOtherProcess {
|
||||
not_locked_file: fs::File,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
|
||||
/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
|
||||
/// Check the [`LockFileRead`] variants for details.
|
||||
pub fn read_and_hold_lock_file(path: &Path) -> anyhow::Result<LockFileRead> {
|
||||
let res = fs::OpenOptions::new().read(true).open(path);
|
||||
let mut lock_file = match res {
|
||||
Ok(f) => f,
|
||||
Err(e) => match e.kind() {
|
||||
std::io::ErrorKind::NotFound => return Ok(LockFileRead::NotExist),
|
||||
_ => return Err(e).context("open lock file"),
|
||||
},
|
||||
.context("Failed to open lock file")
|
||||
{
|
||||
Ok(file) => file,
|
||||
Err(e) => return LockCreationResult::CreationFailed(e),
|
||||
};
|
||||
let res = fcntl::flock(
|
||||
|
||||
match fcntl::flock(
|
||||
lock_file.as_raw_fd(),
|
||||
fcntl::FlockArg::LockExclusiveNonblock,
|
||||
);
|
||||
// We need the content regardless of lock success / failure.
|
||||
// But, read it after flock so that, if it succeeded, the content is consistent.
|
||||
let mut content = String::new();
|
||||
lock_file
|
||||
.read_to_string(&mut content)
|
||||
.context("read lock file")?;
|
||||
match res {
|
||||
Ok(()) => Ok(LockFileRead::NotHeldByAnyProcess(
|
||||
LockFileGuard(lock_file),
|
||||
content,
|
||||
)),
|
||||
Err(EAGAIN) => Ok(LockFileRead::LockedByOtherProcess {
|
||||
not_locked_file: lock_file,
|
||||
content,
|
||||
}),
|
||||
Err(e) => Err(e).context("flock error"),
|
||||
) {
|
||||
Ok(()) => {
|
||||
match lock_file
|
||||
.set_len(0)
|
||||
.context("Failed to truncate lockfile")
|
||||
.and_then(|()| {
|
||||
fs::write(lock_file_path, &contents).with_context(|| {
|
||||
format!("Failed to write '{contents}' contents into lockfile")
|
||||
})
|
||||
})
|
||||
.and_then(|()| {
|
||||
crashsafe::fsync_file_and_parent(lock_file_path)
|
||||
.context("Failed to fsync lockfile")
|
||||
}) {
|
||||
Ok(()) => LockCreationResult::Created {
|
||||
new_lock_contents: contents,
|
||||
file: lock_file,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(nix::errno::Errno::EAGAIN) => {
|
||||
match fs::read_to_string(lock_file_path).context("Failed to read lockfile contents") {
|
||||
Ok(existing_lock_contents) => LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
LockCreationResult::CreationFailed(anyhow::anyhow!("Failed to lock lockfile: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
//! Abstraction to create & read pidfiles.
|
||||
//!
|
||||
//! A pidfile is a file in the filesystem that stores a process's PID.
|
||||
//! Its purpose is to implement a singleton behavior where only
|
||||
//! one process of some "kind" is supposed to be running at a given time.
|
||||
//! The "kind" is identified by the pidfile.
|
||||
//!
|
||||
//! During process startup, the process that is supposed to be a singleton
|
||||
//! must [claim][`claim_for_current_process`] the pidfile first.
|
||||
//! If that is unsuccessful, the process must not act as the singleton, i.e.,
|
||||
//! it must not access any of the resources that only the singleton may access.
|
||||
//!
|
||||
//! A common need is to signal a running singleton process, e.g., to make
|
||||
//! it shut down and exit.
|
||||
//! For that, we have to [`read`] the pidfile. The result of the `read` operation
|
||||
//! tells us if there is any singleton process, and if so, what PID it has.
|
||||
//! We can then proceed to signal it, although some caveats still apply.
|
||||
//! Read the function-level documentation of [`read`] for that.
|
||||
//!
|
||||
//! ## Never Remove Pidfiles
|
||||
//!
|
||||
//! It would be natural to assume that the process who claimed the pidfile
|
||||
//! should remove it upon exit to avoid leaving a stale pidfile in place.
|
||||
//! However, we already have a reliable way to detect staleness of the pidfile,
|
||||
//! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
|
||||
//!
|
||||
//! And further, removing pidfiles would introduce a **catastrophic race condition**
|
||||
//! where two processes are running that are supposed to be singletons.
|
||||
//! Suppose we were to remove our pidfile during process shutdown.
|
||||
//! Here is how the race plays out:
|
||||
//! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
|
||||
//! - Process `A` starts to shut down.
|
||||
//! - Process `B` is just starting up
|
||||
//! - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
|
||||
//! - It blocks on `flock`
|
||||
//! - Process `A` removes the pidfile as the last step of its shutdown procedure
|
||||
//! - `unlink("myservice.pid")
|
||||
//! - Process `A` exits
|
||||
//! - This releases its `flock` and unblocks `B`
|
||||
//! - Process `B` still has the file descriptor for `myservice.pid` open
|
||||
//! - Process `B` writes its PID into `myservice.pid`.
|
||||
//! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
|
||||
//! in the directory.
|
||||
//! - Process `C` starts
|
||||
//! - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
|
||||
//! - It `flock`s the file, which, since it's a different file, does not block
|
||||
//! - It writes its PID into the file
|
||||
//!
|
||||
//! At this point, `B` and `C` are running, which is hazardous.
|
||||
//! Morale of the story: don't unlink pidfiles, ever.
|
||||
|
||||
use std::{ops::Deref, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::unistd::Pid;
|
||||
|
||||
use crate::lock_file::{self, LockFileRead};
|
||||
|
||||
/// Keeps a claim on a pidfile alive until it is dropped.
|
||||
/// Returned by [`claim_for_current_process`].
|
||||
#[must_use]
|
||||
pub struct PidFileGuard(lock_file::LockFileGuard);
|
||||
|
||||
impl Deref for PidFileGuard {
|
||||
type Target = lock_file::LockFileGuard;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to claim `path` as a pidfile for the current process.
|
||||
///
|
||||
/// If another process has already claimed the pidfile, and it is still running,
|
||||
/// this function returns ane error.
|
||||
/// Otherwise, the function `flock`s the file and updates its contents to the
|
||||
/// current process's PID.
|
||||
/// If the update fails, the flock is released and an error returned.
|
||||
/// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
|
||||
///
|
||||
/// ### Maintaining A Claim
|
||||
///
|
||||
/// It is the caller's responsibility to maintain the claim.
|
||||
/// The claim ends as soon as the returned guard object is dropped.
|
||||
/// To maintain the claim for the remaining lifetime of the current process,
|
||||
/// use [`std::mem::forget`] or similar.
|
||||
pub fn claim_for_current_process(path: &Path) -> anyhow::Result<PidFileGuard> {
|
||||
let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
|
||||
// if any of the next steps fail, we drop the file descriptor and thereby release the lock
|
||||
let guard = unwritten_lock_file
|
||||
.write_content(Pid::this().to_string())
|
||||
.context("write pid to lock file")?;
|
||||
Ok(PidFileGuard(guard))
|
||||
}
|
||||
|
||||
/// Returned by [`read`].
|
||||
pub enum PidFileRead {
|
||||
/// No file exists at the given path.
|
||||
NotExist,
|
||||
/// The given pidfile is currently not claimed by any process.
|
||||
/// To determine this, the [`read`] operation acquired
|
||||
/// an exclusive flock on the file. The lock is still held and responsibility
|
||||
/// to release it is returned through the guard object.
|
||||
/// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
|
||||
/// will fail.
|
||||
///
|
||||
/// ### Caveats
|
||||
///
|
||||
/// Do not unlink the pidfile from the filesystem. See module-comment for why.
|
||||
NotHeldByAnyProcess(PidFileGuard),
|
||||
/// The given pidfile is still claimed by another process whose PID is given
|
||||
/// as part of this variant.
|
||||
///
|
||||
/// ### Caveats
|
||||
///
|
||||
/// 1. The other process might exit at any time, turning the given PID stale.
|
||||
/// 2. There is a small window in which `claim_for_current_process` has already
|
||||
/// locked the file but not yet updates its contents. [`read`] will return
|
||||
/// this variant here, but with the old file contents, i.e., a stale PID.
|
||||
///
|
||||
/// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
|
||||
/// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
|
||||
/// system call on it, bears the risk of killing an unrelated process.
|
||||
/// This is an inherent limitation of using pidfiles.
|
||||
/// The only race-free solution is to have a supervisor-process with a lifetime
|
||||
/// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
|
||||
LockedByOtherProcess(Pid),
|
||||
}
|
||||
|
||||
/// Try to read the file at the given path as a pidfile that was previously created
|
||||
/// through [`claim_for_current_process`].
|
||||
///
|
||||
/// On success, this function returns a [`PidFileRead`].
|
||||
/// Check its docs for a description of the meaning of its different variants.
|
||||
pub fn read(pidfile: &Path) -> anyhow::Result<PidFileRead> {
|
||||
let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
|
||||
let ret = match res {
|
||||
LockFileRead::NotExist => PidFileRead::NotExist,
|
||||
LockFileRead::NotHeldByAnyProcess(guard, _) => {
|
||||
PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
|
||||
}
|
||||
LockFileRead::LockedByOtherProcess {
|
||||
not_locked_file: _not_locked_file,
|
||||
content,
|
||||
} => {
|
||||
// XXX the read races with the write in claim_pid_file_for_pid().
|
||||
// But pids are smaller than a page, so the kernel page cache will lock for us.
|
||||
// The only problem is that we might get the old contents here.
|
||||
// Can only fix that by implementing some scheme that downgrades the
|
||||
// exclusive lock to shared lock in claim_pid_file_for_pid().
|
||||
PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
|
||||
let pid: i32 = content
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
|
||||
if pid < 1 {
|
||||
anyhow::bail!("bad value in pidfile '{pid}'");
|
||||
}
|
||||
Ok(Pid::from_raw(pid))
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use sentry::ClientInitGuard;
|
||||
use std::borrow::Cow;
|
||||
use std::env;
|
||||
|
||||
pub use sentry::release_name;
|
||||
|
||||
#[must_use]
|
||||
pub fn init_sentry(
|
||||
release_name: Option<Cow<'static, str>>,
|
||||
extra_options: &[(&str, &str)],
|
||||
) -> Option<ClientInitGuard> {
|
||||
let dsn = env::var("SENTRY_DSN").ok()?;
|
||||
|
||||
let guard = sentry::init((
|
||||
dsn,
|
||||
sentry::ClientOptions {
|
||||
release: release_name,
|
||||
..Default::default()
|
||||
},
|
||||
));
|
||||
sentry::configure_scope(|scope| {
|
||||
for &(key, value) in extra_options {
|
||||
scope.set_extra(key, value.into());
|
||||
}
|
||||
});
|
||||
Some(guard)
|
||||
}
|
||||
@@ -18,7 +18,7 @@ async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
byteorder = "1.4.3"
|
||||
bytes = "1.0.1"
|
||||
chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
|
||||
chrono = "0.4.19"
|
||||
clap = { version = "4.0", features = ["string"] }
|
||||
close_fds = "0.3.2"
|
||||
const_format = "0.2.21"
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
## Pageserver Benchmarks
|
||||
|
||||
# How to run
|
||||
|
||||
To run all benchmarks:
|
||||
`cargo bench`
|
||||
|
||||
To run a specific file:
|
||||
`cargo bench --bench bench_layer_map`
|
||||
|
||||
To run a specific function:
|
||||
`cargo bench --bench bench_layer_map -- real_map_uniform_queries`
|
||||
File diff suppressed because it is too large
Load Diff
@@ -431,7 +431,7 @@ fn pg_record(will_init: bool, bytes: &'static [u8]) -> NeonWalRecord {
|
||||
struct Request {
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -11,8 +11,8 @@
|
||||
//!
|
||||
//! Example use:
|
||||
//! ```
|
||||
//! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
|
||||
//! $ grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
|
||||
//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ firefox out.svg
|
||||
//! ```
|
||||
//!
|
||||
@@ -25,8 +25,6 @@ use anyhow::Result;
|
||||
use pageserver::repository::Key;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
ops::Range,
|
||||
@@ -67,11 +65,7 @@ fn main() -> Result<()> {
|
||||
let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
|
||||
let stdin = io::stdin();
|
||||
for line in stdin.lock().lines() {
|
||||
let line = line.unwrap();
|
||||
let line = PathBuf::from_str(&line).unwrap();
|
||||
let filename = line.file_name().unwrap();
|
||||
let filename = filename.to_str().unwrap();
|
||||
let range = parse_filename(filename);
|
||||
let range = parse_filename(&line.unwrap());
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
use anyhow::{anyhow, Context};
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use nix::unistd::Pid;
|
||||
use tracing::*;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
@@ -22,10 +23,9 @@ use pageserver::{
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
logging,
|
||||
lock_file, logging,
|
||||
postgres_backend::AuthType,
|
||||
project_git_version,
|
||||
sentry_init::{init_sentry, release_name},
|
||||
signals::{self, Signal},
|
||||
tcp_listener,
|
||||
};
|
||||
@@ -85,9 +85,6 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
};
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.id.to_string())]);
|
||||
|
||||
let tenants_path = conf.tenants_path();
|
||||
if !tenants_path.exists() {
|
||||
utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
@@ -219,13 +216,30 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file =
|
||||
utils::pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
||||
info!("Claimed pid file at {lock_file_path:?}");
|
||||
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; pageserver is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
std::mem::forget(lock_file);
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
info!("Created PID file with PID {}", Pid::this().to_string());
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
@@ -280,23 +294,15 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
let remote_storage = conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.map(GenericRemoteStorage::from_config)
|
||||
.map(|storage_config| {
|
||||
GenericRemoteStorage::from_config(conf.workdir.clone(), storage_config)
|
||||
})
|
||||
.transpose()
|
||||
.context("Failed to init generic remote storage")?;
|
||||
|
||||
let (init_result_sender, init_result_receiver) =
|
||||
std::sync::mpsc::channel::<anyhow::Result<()>>();
|
||||
let storage_for_spawn = remote_storage.clone();
|
||||
let _handler = BACKGROUND_RUNTIME.spawn(async move {
|
||||
let result = tenant_mgr::init_tenant_mgr(conf, storage_for_spawn).await;
|
||||
init_result_sender.send(result)
|
||||
});
|
||||
match init_result_receiver.recv() {
|
||||
Ok(init_result) => init_result.context("Failed to init tenant_mgr")?,
|
||||
Err(_sender_dropped_err) => {
|
||||
anyhow::bail!("Failed to init tenant_mgr: no init status was returned");
|
||||
}
|
||||
}
|
||||
{
|
||||
let _rt_guard = BACKGROUND_RUNTIME.enter();
|
||||
tenant_mgr::init_tenant_mgr(conf, remote_storage.clone())?
|
||||
};
|
||||
|
||||
// Spawn all HTTP related tasks in the MGMT_REQUEST_RUNTIME.
|
||||
// bind before launching separate thread so the error reported before startup exits
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::env;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::ConnectionId;
|
||||
@@ -27,9 +27,7 @@ use utils::{
|
||||
|
||||
use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
|
||||
use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||
use crate::{
|
||||
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX,
|
||||
};
|
||||
use crate::{METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX};
|
||||
|
||||
pub mod defaults {
|
||||
use crate::tenant_config::defaults::*;
|
||||
@@ -333,6 +331,10 @@ impl PageServerConfigBuilder {
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
let broker_endpoints = self
|
||||
.broker_endpoints
|
||||
.ok_or(anyhow!("No broker endpoints provided"))?;
|
||||
|
||||
Ok(PageServerConf {
|
||||
listen_pg_addr: self
|
||||
.listen_pg_addr
|
||||
@@ -368,9 +370,7 @@ impl PageServerConfigBuilder {
|
||||
profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
|
||||
// TenantConf is handled separately
|
||||
default_tenant_conf: TenantConf::default(),
|
||||
broker_endpoints: self
|
||||
.broker_endpoints
|
||||
.ok_or(anyhow!("No broker endpoints provided"))?,
|
||||
broker_endpoints,
|
||||
broker_etcd_prefix: self
|
||||
.broker_etcd_prefix
|
||||
.ok_or(anyhow!("missing broker_etcd_prefix"))?,
|
||||
@@ -402,10 +402,6 @@ impl PageServerConf {
|
||||
.join(TENANT_ATTACHING_MARKER_FILENAME)
|
||||
}
|
||||
|
||||
pub fn tenant_ignore_mark_file_path(&self, tenant_id: TenantId) -> PathBuf {
|
||||
self.tenant_path(&tenant_id).join(IGNORED_TENANT_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain tenant's tenantconf file should be located.
|
||||
pub fn tenant_config_path(&self, tenant_id: TenantId) -> PathBuf {
|
||||
@@ -454,28 +450,6 @@ impl PageServerConf {
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Files on the remote storage are stored with paths, relative to the workdir.
|
||||
/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
|
||||
///
|
||||
/// Errors if the path provided does not start from pageserver's workdir.
|
||||
pub fn remote_path(&self, local_path: &Path) -> anyhow::Result<RemotePath> {
|
||||
local_path
|
||||
.strip_prefix(&self.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
local_path, self.workdir
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Turns storage remote path of a file into its local path.
|
||||
pub fn local_path(&self, remote_path: &RemotePath) -> PathBuf {
|
||||
remote_path.with_base(&self.workdir)
|
||||
}
|
||||
|
||||
//
|
||||
// Postgres distribution paths
|
||||
//
|
||||
@@ -512,7 +486,7 @@ impl PageServerConf {
|
||||
let mut builder = PageServerConfigBuilder::default();
|
||||
builder.workdir(workdir.to_owned());
|
||||
|
||||
let mut t_conf = TenantConfOpt::default();
|
||||
let mut t_conf: TenantConfOpt = Default::default();
|
||||
|
||||
for (key, item) in toml.iter() {
|
||||
match key {
|
||||
@@ -643,12 +617,6 @@ impl PageServerConf {
|
||||
if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
|
||||
t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
|
||||
}
|
||||
if let Some(trace_read_requests) = item.get("trace_read_requests") {
|
||||
t_conf.trace_read_requests =
|
||||
Some(trace_read_requests.as_bool().with_context(|| {
|
||||
"configure option trace_read_requests is not a bool".to_string()
|
||||
})?);
|
||||
}
|
||||
|
||||
Ok(t_conf)
|
||||
}
|
||||
@@ -1048,35 +1016,6 @@ broker_endpoints = ['{broker_endpoint}']
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tenant_config() -> anyhow::Result<()> {
|
||||
let tempdir = tempdir()?;
|
||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
||||
|
||||
let broker_endpoint = "http://127.0.0.1:7777";
|
||||
let trace_read_requests = true;
|
||||
|
||||
let config_string = format!(
|
||||
r#"{ALL_BASE_VALUES_TOML}
|
||||
pg_distrib_dir='{}'
|
||||
broker_endpoints = ['{broker_endpoint}']
|
||||
|
||||
[tenant_config]
|
||||
trace_read_requests = {trace_read_requests}"#,
|
||||
pg_distrib_dir.display(),
|
||||
);
|
||||
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
|
||||
assert_eq!(
|
||||
conf.default_tenant_conf.trace_read_requests, trace_read_requests,
|
||||
"Tenant config from pageserver config file should be parsed and udpated values used as defaults for all tenants",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> {
|
||||
let tempdir_path = tempdir.path();
|
||||
|
||||
|
||||
@@ -274,7 +274,6 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
|
||||
post:
|
||||
description: Schedules attach operation to happen in the background for given tenant
|
||||
responses:
|
||||
@@ -326,9 +325,7 @@ paths:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Remove tenant data (including all corresponding timelines) from pageserver's memory and file system.
|
||||
Files on the remote storage are not affected.
|
||||
description: Detach local tenant
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant detached
|
||||
@@ -357,92 +354,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/ignore:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Remove tenant data (including all corresponding timelines) from pageserver's memory.
|
||||
Files on local disk and remote storage are not affected.
|
||||
|
||||
Future pageserver restarts won't load the data back until `load` is called on such tenant.
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant ignored
|
||||
"400":
|
||||
description: Error when no tenant id found in path parameters
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/load:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Schedules an operation that attempts to load a tenant from the local disk and
|
||||
synchronise it with the remote storage (if enabled), repeating pageserver's restart logic for tenant load.
|
||||
If the tenant was ignored before, removes the ignore mark and continues with load scheduling.
|
||||
|
||||
Errors if the tenant is absent on disk, already present in memory or fails to schedule its load.
|
||||
Scheduling a load does not mean that the tenant would load successfully, check tenant status to ensure load correctness.
|
||||
responses:
|
||||
"202":
|
||||
description: Tenant scheduled to load successfully
|
||||
"400":
|
||||
description: Error when no tenant id found in path parameters
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/size:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
@@ -748,6 +659,7 @@ components:
|
||||
- tenant_id
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
- awaits_download
|
||||
- state
|
||||
- latest_gc_cutoff_lsn
|
||||
properties:
|
||||
@@ -790,6 +702,8 @@ components:
|
||||
format: hex
|
||||
last_received_msg_ts:
|
||||
type: integer
|
||||
awaits_download:
|
||||
type: boolean
|
||||
state:
|
||||
type: string
|
||||
latest_gc_cutoff_lsn:
|
||||
|
||||
@@ -3,7 +3,9 @@ use std::sync::Arc;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use pageserver_api::models::TenantState;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio::task::JoinError;
|
||||
use tracing::*;
|
||||
|
||||
use super::models::{
|
||||
@@ -80,11 +82,12 @@ fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Res
|
||||
|
||||
// Helper function to construct a TimelineInfo struct for a timeline
|
||||
fn build_timeline_info(
|
||||
tenant_state: TenantState,
|
||||
timeline: &Arc<Timeline>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
let mut info = build_timeline_info_common(timeline)?;
|
||||
let mut info = build_timeline_info_common(tenant_state, timeline)?;
|
||||
if include_non_incremental_logical_size {
|
||||
info.current_logical_size_non_incremental =
|
||||
Some(timeline.get_current_logical_size_non_incremental(info.last_record_lsn)?);
|
||||
@@ -96,7 +99,10 @@ fn build_timeline_info(
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<TimelineInfo> {
|
||||
fn build_timeline_info_common(
|
||||
tenant_state: TenantState,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
@@ -148,6 +154,10 @@ fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<Timeli
|
||||
|
||||
state,
|
||||
|
||||
// XXX bring back tracking of downloads per timeline, or, introduce
|
||||
// an 'Attaching' state for the timeline and get rid of this field.
|
||||
awaits_download: tenant_state == TenantState::Attaching,
|
||||
|
||||
// Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
|
||||
// with the control plane.
|
||||
local: LocalTimelineInfo {
|
||||
@@ -179,9 +189,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
.new_timeline_id
|
||||
.unwrap_or_else(TimelineId::generate);
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
match tenant.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
@@ -192,7 +200,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
.await {
|
||||
Ok(Some(new_timeline)) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let timeline_info = build_timeline_info_common(&new_timeline)
|
||||
let timeline_info = build_timeline_info_common(tenant.current_state(), &new_timeline)
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::CREATED, timeline_info)
|
||||
}
|
||||
@@ -209,29 +217,26 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let response_data = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let timelines = tenant.list_timelines();
|
||||
let _entered = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let (tenant_state, timelines) = {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
(tenant.current_state(), tenant.list_timelines())
|
||||
};
|
||||
|
||||
response_data.push(timeline_info);
|
||||
}
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
tenant_state,
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
Ok(response_data)
|
||||
response_data.push(timeline_info);
|
||||
}
|
||||
.instrument(info_span!("timeline_list", tenant = %tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
@@ -276,15 +281,20 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_info = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let (tenant_state, timeline) = tokio::task::spawn_blocking(move || {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
Ok((
|
||||
tenant.current_state(),
|
||||
tenant.get_timeline(timeline_id, false),
|
||||
))
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, false)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let timeline = timeline.map_err(ApiError::NotFound)?;
|
||||
|
||||
let timeline_info = build_timeline_info(
|
||||
tenant_state,
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
@@ -312,7 +322,6 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
|
||||
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
|
||||
|
||||
let timeline = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let result = match timeline
|
||||
@@ -338,13 +347,13 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
|
||||
if let Some(remote_storage) = &state.remote_storage {
|
||||
// FIXME: distinguish between "Tenant already exists" and other errors
|
||||
tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone())
|
||||
tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage)
|
||||
.instrument(info_span!("tenant_attach", tenant = %tenant_id))
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
} else {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"attach_tenant is not possible because pageserver was configured without remote storage"
|
||||
"attach_tenant is possible because pageserver was configured without remote storage"
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -383,49 +392,23 @@ async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::load_tenant(state.conf, tenant_id, state.remote_storage.clone())
|
||||
.instrument(info_span!("load", tenant = %tenant_id))
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
let conf = state.conf;
|
||||
tenant_mgr::ignore_tenant(conf, tenant_id)
|
||||
.instrument(info_span!("ignore_tenant", tenant = %tenant_id))
|
||||
.await
|
||||
// FIXME: Errors from `ignore_tenant` can be caused by both both user and internal errors.
|
||||
// Replace this with better handling once the error type permits it.
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let response_data = tenant_mgr::list_tenants()
|
||||
.instrument(info_span!("tenant_list"))
|
||||
.await
|
||||
.iter()
|
||||
.map(|(id, state)| TenantInfo {
|
||||
id: *id,
|
||||
state: *state,
|
||||
current_physical_size: None,
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>();
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_list").entered();
|
||||
tenant_mgr::list_tenants()
|
||||
.iter()
|
||||
.map(|(id, state)| TenantInfo {
|
||||
id: *id,
|
||||
state: *state,
|
||||
current_physical_size: None,
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>()
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
@@ -434,8 +417,9 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant_info = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant_info = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_status_handler", tenant = %tenant_id).entered();
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false)?;
|
||||
|
||||
// Calculate total physical size of all timelines
|
||||
let mut current_physical_size = 0;
|
||||
@@ -444,15 +428,17 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
}
|
||||
|
||||
let state = tenant.current_state();
|
||||
Ok(TenantInfo {
|
||||
let tenant_info = TenantInfo {
|
||||
id: tenant_id,
|
||||
state,
|
||||
current_physical_size: Some(current_physical_size),
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
|
||||
};
|
||||
|
||||
Ok::<_, anyhow::Error>(tenant_info)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::OK, tenant_info)
|
||||
@@ -462,9 +448,7 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::InternalServerError)?;
|
||||
|
||||
// this can be long operation, it currently is not backed by any request coalescing or similar
|
||||
let inputs = tenant
|
||||
@@ -581,19 +565,22 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
.map(TenantId::from)
|
||||
.unwrap_or_else(TenantId::generate);
|
||||
|
||||
let state = get_state(&request);
|
||||
let new_tenant = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
|
||||
let state = get_state(&request);
|
||||
|
||||
let new_tenant = tenant_mgr::create_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
target_tenant_id,
|
||||
state.remote_storage.clone(),
|
||||
)
|
||||
.instrument(info_span!("tenant_create", tenant = ?target_tenant_id))
|
||||
tenant_mgr::create_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
target_tenant_id,
|
||||
state.remote_storage.clone(),
|
||||
)
|
||||
// FIXME: `create_tenant` can fail from both user and internal errors. Replace this
|
||||
// with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)
|
||||
})
|
||||
.await
|
||||
// FIXME: `create_tenant` can fail from both user and internal errors. Replace this
|
||||
// with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
Ok(match new_tenant {
|
||||
Some(tenant) => {
|
||||
@@ -684,13 +671,17 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
);
|
||||
}
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
|
||||
.instrument(info_span!("tenant_config", tenant = ?tenant_id))
|
||||
.await
|
||||
// FIXME: `update_tenant_config` can fail because of both user and internal errors.
|
||||
// Replace this `map_err` with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
|
||||
// FIXME: `update_tenant_config` can fail because of both user and internal errors.
|
||||
// Replace this `map_err` with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
@@ -737,7 +728,7 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req).await?;
|
||||
let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req)?;
|
||||
let gc_result = wait_task_done
|
||||
.await
|
||||
.context("wait for gc task")
|
||||
@@ -754,9 +745,7 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
@@ -775,9 +764,7 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
@@ -851,8 +838,6 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
||||
.post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
|
||||
.post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
|
||||
.post("/v1/tenant/:tenant_id/load", tenant_load_handler)
|
||||
.post("/v1/tenant/:tenant_id/ignore", tenant_ignore_handler)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
||||
timeline_detail_handler,
|
||||
|
||||
@@ -125,13 +125,6 @@ pub const TEMP_FILE_SUFFIX: &str = "___temp";
|
||||
/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
|
||||
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
|
||||
|
||||
/// A marker file to prevent pageserver from loading a certain tenant on restart.
|
||||
/// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding
|
||||
/// `ignore` management API command, that expects the ignored tenant to be properly loaded
|
||||
/// into pageserver's memory before being ignored.
|
||||
/// Full path: `tenants/<tenant_id>/___ignored_tenant`.
|
||||
pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant";
|
||||
|
||||
pub fn is_temporary(path: &Path) -> bool {
|
||||
match path.file_name() {
|
||||
Some(name) => name.to_string_lossy().ends_with(TEMP_FILE_SUFFIX),
|
||||
|
||||
@@ -941,7 +941,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
|
||||
/// ensures that queries don't fail immediately after pageserver startup, because
|
||||
/// all tenants are still loading.
|
||||
async fn get_active_tenant_with_timeout(tenant_id: TenantId) -> Result<Arc<Tenant>> {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false)?;
|
||||
match tokio::time::timeout(Duration::from_secs(30), tenant.wait_to_become_active()).await {
|
||||
Ok(wait_result) => wait_result
|
||||
// no .context(), the error message is good enough and some tests depend on it
|
||||
|
||||
@@ -202,9 +202,9 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use anyhow::ensure;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||
use tokio::runtime::Runtime;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{info_span, Instrument};
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
@@ -217,7 +217,7 @@ use crate::metrics::RemoteOpKind;
|
||||
use crate::metrics::REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
storage_sync::index::LayerFileMetadata,
|
||||
storage_sync::index::{LayerFileMetadata, RelativePath},
|
||||
task_mgr,
|
||||
task_mgr::TaskKind,
|
||||
task_mgr::BACKGROUND_RUNTIME,
|
||||
@@ -287,7 +287,7 @@ struct UploadQueueInitialized {
|
||||
|
||||
/// All layer files stored in the remote storage, taking into account all
|
||||
/// in-progress and queued operations
|
||||
latest_files: HashMap<RemotePath, LayerFileMetadata>,
|
||||
latest_files: HashMap<RelativePath, LayerFileMetadata>,
|
||||
|
||||
/// Metadata stored in the remote storage, taking into account all
|
||||
/// in-progress and queued operations.
|
||||
@@ -337,18 +337,18 @@ impl UploadQueue {
|
||||
|
||||
let state = UploadQueueInitialized {
|
||||
// As described in the doc comment, it's ok for `latest_files` and `latest_metadata` to be ahead.
|
||||
latest_files: HashMap::new(),
|
||||
latest_files: Default::default(),
|
||||
latest_metadata: metadata.clone(),
|
||||
// We haven't uploaded anything yet, so, `last_uploaded_consistent_lsn` must be 0 to prevent
|
||||
// safekeepers from garbage-collecting anything.
|
||||
last_uploaded_consistent_lsn: Lsn(0),
|
||||
// what follows are boring default initializations
|
||||
task_counter: 0,
|
||||
task_counter: Default::default(),
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
inprogress_tasks: Default::default(),
|
||||
queued_operations: Default::default(),
|
||||
};
|
||||
|
||||
*self = UploadQueue::Initialized(state);
|
||||
@@ -357,10 +357,6 @@ impl UploadQueue {
|
||||
|
||||
fn initialize_with_current_remote_index_part(
|
||||
&mut self,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
|
||||
index_part: &IndexPart,
|
||||
) -> anyhow::Result<&mut UploadQueueInitialized> {
|
||||
match self {
|
||||
@@ -370,19 +366,14 @@ impl UploadQueue {
|
||||
}
|
||||
}
|
||||
|
||||
let mut files = HashMap::with_capacity(index_part.timeline_layers.len());
|
||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
||||
for timeline_name in &index_part.timeline_layers {
|
||||
let local_path = timeline_path.join(timeline_name);
|
||||
let remote_timeline_path = conf.remote_path(&local_path).expect(
|
||||
"Remote timeline path and local timeline path were constructed form the same conf",
|
||||
);
|
||||
let mut files = HashMap::new();
|
||||
for path in &index_part.timeline_layers {
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(timeline_name)
|
||||
.get(path)
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
files.insert(remote_timeline_path, layer_metadata);
|
||||
files.insert(path.clone(), layer_metadata);
|
||||
}
|
||||
|
||||
let index_part_metadata = index_part.parse_metadata()?;
|
||||
@@ -400,8 +391,8 @@ impl UploadQueue {
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
inprogress_tasks: Default::default(),
|
||||
queued_operations: Default::default(),
|
||||
};
|
||||
|
||||
*self = UploadQueue::Initialized(state);
|
||||
@@ -465,12 +456,7 @@ impl RemoteTimelineClient {
|
||||
/// The given `index_part` must be the one on the remote.
|
||||
pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_with_current_remote_index_part(
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
index_part,
|
||||
)?;
|
||||
upload_queue.initialize_with_current_remote_index_part(index_part)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -524,13 +510,15 @@ impl RemoteTimelineClient {
|
||||
/// On success, returns the size of the downloaded file.
|
||||
pub async fn download_layer_file(
|
||||
&self,
|
||||
remote_path: &RemotePath,
|
||||
path: &RelativePath,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
) -> anyhow::Result<u64> {
|
||||
let downloaded_size = download::download_layer_file(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
remote_path,
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
path,
|
||||
layer_metadata,
|
||||
)
|
||||
.measure_remote_op(
|
||||
@@ -548,13 +536,13 @@ impl RemoteTimelineClient {
|
||||
let new_metadata = LayerFileMetadata::new(downloaded_size);
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
if let Some(upgraded) = upload_queue.latest_files.get_mut(remote_path) {
|
||||
if let Some(upgraded) = upload_queue.latest_files.get_mut(path) {
|
||||
upgraded.merge(&new_metadata);
|
||||
} else {
|
||||
// The file should exist, since we just downloaded it.
|
||||
warn!(
|
||||
"downloaded file {:?} not found in local copy of the index file",
|
||||
remote_path
|
||||
path
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -624,9 +612,14 @@ impl RemoteTimelineClient {
|
||||
"file size not initialized in metadata"
|
||||
);
|
||||
|
||||
let relative_path = RelativePath::from_local_path(
|
||||
&self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
path,
|
||||
)?;
|
||||
|
||||
upload_queue
|
||||
.latest_files
|
||||
.insert(self.conf.remote_path(path)?, layer_metadata.clone());
|
||||
.insert(relative_path, layer_metadata.clone());
|
||||
|
||||
let op = UploadOp::UploadLayer(PathBuf::from(path), layer_metadata.clone());
|
||||
self.update_upload_queue_unfinished_metric(1, &op);
|
||||
@@ -648,10 +641,13 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
// Convert the paths into RemotePaths, and gather other information we need.
|
||||
let mut remote_paths = Vec::with_capacity(paths.len());
|
||||
// Convert the paths into RelativePaths, and gather other information we need.
|
||||
let mut relative_paths = Vec::with_capacity(paths.len());
|
||||
for path in paths {
|
||||
remote_paths.push(self.conf.remote_path(path)?);
|
||||
relative_paths.push(RelativePath::from_local_path(
|
||||
&self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
path,
|
||||
)?);
|
||||
}
|
||||
|
||||
// Deleting layers doesn't affect the values stored in TimelineMetadata,
|
||||
@@ -667,8 +663,8 @@ impl RemoteTimelineClient {
|
||||
// from latest_files, but not yet scheduled for deletion. Use a closure
|
||||
// to syntactically forbid ? or bail! calls here.
|
||||
let no_bail_here = || {
|
||||
for remote_path in remote_paths {
|
||||
upload_queue.latest_files.remove(&remote_path);
|
||||
for relative_path in relative_paths {
|
||||
upload_queue.latest_files.remove(&relative_path);
|
||||
}
|
||||
|
||||
let index_part = IndexPart::new(
|
||||
@@ -842,19 +838,14 @@ impl RemoteTimelineClient {
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref path, ref layer_metadata) => {
|
||||
upload::upload_timeline_layer(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
path,
|
||||
layer_metadata,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
)
|
||||
.await
|
||||
upload::upload_timeline_layer(&self.storage_impl, path, layer_metadata)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
)
|
||||
.await
|
||||
}
|
||||
UploadOp::UploadMetadata(ref index_part, _lsn) => {
|
||||
upload::upload_index_part(
|
||||
@@ -873,7 +864,7 @@ impl RemoteTimelineClient {
|
||||
.await
|
||||
}
|
||||
UploadOp::Delete(metric_file_kind, ref path) => {
|
||||
delete::delete_layer(self.conf, &self.storage_impl, path)
|
||||
delete::delete_layer(&self.storage_impl, path)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
@@ -897,20 +888,10 @@ impl RemoteTimelineClient {
|
||||
Err(e) => {
|
||||
let retries = task.retries.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// uploads may fail due to rate limts (IAM, S3) or spurious network and external errors
|
||||
// such issues are relatively regular, so don't use WARN or ERROR to avoid alerting
|
||||
// people and tests until the retries are definitely causing delays.
|
||||
if retries < 3 {
|
||||
info!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
}
|
||||
error!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
|
||||
// sleep until it's time to retry, or we're cancelled
|
||||
tokio::select! {
|
||||
@@ -1102,11 +1083,15 @@ mod tests {
|
||||
TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn assert_file_list(a: &HashSet<String>, b: &[&str]) {
|
||||
let mut avec: Vec<&str> = a.iter().map(|a| a.as_str()).collect();
|
||||
fn assert_file_list(a: &HashSet<RelativePath>, b: &[&str]) {
|
||||
let xx = PathBuf::from("");
|
||||
let mut avec: Vec<String> = a
|
||||
.iter()
|
||||
.map(|x| x.to_local_path(&xx).to_string_lossy().into())
|
||||
.collect();
|
||||
avec.sort();
|
||||
|
||||
let mut bvec = b.to_vec();
|
||||
let mut bvec = b.to_owned();
|
||||
bvec.sort_unstable();
|
||||
|
||||
assert_eq!(avec, bvec);
|
||||
@@ -1174,7 +1159,8 @@ mod tests {
|
||||
|
||||
println!("workdir: {}", harness.conf.workdir.display());
|
||||
|
||||
let storage_impl = GenericRemoteStorage::from_config(&storage_config)?;
|
||||
let storage_impl =
|
||||
GenericRemoteStorage::from_config(harness.conf.workdir.clone(), &storage_config)?;
|
||||
let client = Arc::new(RemoteTimelineClient {
|
||||
conf: harness.conf,
|
||||
runtime,
|
||||
|
||||
@@ -5,24 +5,34 @@ use tracing::debug;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
pub(super) async fn delete_layer<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
local_layer_path: &'a Path,
|
||||
pub(super) async fn delete_layer(
|
||||
storage: &GenericRemoteStorage,
|
||||
local_layer_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
fail::fail_point!("before-delete-layer", |_| {
|
||||
anyhow::bail!("failpoint before-delete-layer")
|
||||
});
|
||||
debug!("Deleting layer from remote storage: {local_layer_path:?}",);
|
||||
debug!(
|
||||
"Deleting layer from remote storage: {:?}",
|
||||
local_layer_path.display()
|
||||
);
|
||||
|
||||
let path_to_delete = conf.remote_path(local_layer_path)?;
|
||||
let storage_path = storage
|
||||
.remote_object_id(local_layer_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
local_layer_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// XXX: If the deletion fails because the object already didn't exist,
|
||||
// it would be good to just issue a warning but consider it success.
|
||||
// https://github.com/neondatabase/neon/issues/2934
|
||||
storage.delete(&path_to_delete).await.with_context(|| {
|
||||
format!("Failed to delete remote layer from storage at {path_to_delete:?}")
|
||||
storage.delete(&storage_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to delete remote layer from storage at '{:?}'",
|
||||
storage_path
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -10,11 +10,12 @@ use tracing::debug;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::storage_sync::index::LayerFileMetadata;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::index::IndexPart;
|
||||
use super::RelativePath;
|
||||
|
||||
async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Error> {
|
||||
fs::File::open(path).await?.sync_all().await
|
||||
@@ -28,10 +29,21 @@ async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Er
|
||||
pub async fn download_layer_file<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
remote_path: &'a RemotePath,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
path: &'a RelativePath,
|
||||
layer_metadata: &'a LayerFileMetadata,
|
||||
) -> anyhow::Result<u64> {
|
||||
let local_path = conf.local_path(remote_path);
|
||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
||||
|
||||
let local_path = path.to_local_path(&timeline_path);
|
||||
|
||||
let layer_storage_path = storage.remote_object_id(&local_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
local_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
@@ -52,13 +64,18 @@ pub async fn download_layer_file<'a>(
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let mut download = storage.download(remote_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a download stream for layer with remote storage path '{remote_path:?}'"
|
||||
)
|
||||
})?;
|
||||
let mut download = storage
|
||||
.download(&layer_storage_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'"
|
||||
)
|
||||
})?;
|
||||
let bytes_amount = tokio::io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| {
|
||||
format!("Failed to download layer with remote storage path '{remote_path:?}' into file {temp_file_path:?}")
|
||||
format!(
|
||||
"Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
@@ -134,7 +151,12 @@ pub async fn list_remote_timelines<'a>(
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<Vec<(TimelineId, IndexPart)>> {
|
||||
let tenant_path = conf.timelines_path(&tenant_id);
|
||||
let tenant_storage_path = conf.remote_path(&tenant_path)?;
|
||||
let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get tenant storage path for local path '{}'",
|
||||
tenant_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let timelines = storage
|
||||
.list_prefixes(Some(&tenant_storage_path))
|
||||
@@ -196,8 +218,14 @@ pub async fn download_index_part(
|
||||
let index_part_path = conf
|
||||
.metadata_path(timeline_id, tenant_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let part_storage_path = conf
|
||||
.remote_path(&index_part_path)
|
||||
let part_storage_path = storage
|
||||
.remote_object_id(&index_part_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the index part storage path for local path '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
|
||||
let mut index_part_download = storage.download(&part_storage_path).await?;
|
||||
@@ -208,12 +236,20 @@ pub async fn download_index_part(
|
||||
&mut index_part_bytes,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to download an index part into file {index_part_path:?}"))
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to download an index part into file '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
|
||||
.with_context(|| {
|
||||
format!("Failed to deserialize index part file into file {index_part_path:?}")
|
||||
format!(
|
||||
"Failed to deserialize index part file into file '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
|
||||
@@ -2,9 +2,12 @@
|
||||
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
||||
//! remote timeline layers and its metadata.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use remote_storage::RemotePath;
|
||||
use anyhow::{Context, Ok};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
|
||||
@@ -12,6 +15,33 @@ use crate::tenant::metadata::TimelineMetadata;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// A part of the filesystem path, that needs a root to become a path again.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct RelativePath(String);
|
||||
|
||||
impl RelativePath {
|
||||
/// Attempts to strip off the base from path, producing a relative path or an error.
|
||||
pub fn from_local_path(timeline_path: &Path, path: &Path) -> anyhow::Result<RelativePath> {
|
||||
let relative = path.strip_prefix(timeline_path).with_context(|| {
|
||||
format!(
|
||||
"path '{}' is not relative to base '{}'",
|
||||
path.display(),
|
||||
timeline_path.display()
|
||||
)
|
||||
})?;
|
||||
Ok(Self::from_filename(relative))
|
||||
}
|
||||
|
||||
pub fn from_filename(path: &Path) -> RelativePath {
|
||||
RelativePath(path.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
pub fn to_local_path(&self, timeline_path: &Path) -> PathBuf {
|
||||
timeline_path.join(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
///
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
@@ -67,22 +97,21 @@ pub struct IndexPart {
|
||||
#[serde(default)]
|
||||
version: usize,
|
||||
|
||||
/// Layer names, which are stored on the remote storage.
|
||||
/// Each of the layers present on remote storage.
|
||||
///
|
||||
/// Additional metadata can might exist in `layer_metadata`.
|
||||
pub timeline_layers: HashSet<String>,
|
||||
pub timeline_layers: HashSet<RelativePath>,
|
||||
|
||||
/// FIXME: unused field. This should be removed, but that changes the on-disk format,
|
||||
/// so we need to make sure we're backwards-` (and maybe forwards-) compatible
|
||||
/// First pass is to move it to Optional and the next would be its removal
|
||||
missing_layers: Option<HashSet<String>>,
|
||||
/// so we need to make sure we're backwards- (and maybe forwards-) compatible
|
||||
missing_layers: HashSet<RelativePath>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
/// Per layer file metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
/// that latest version stores.
|
||||
#[serde(default)]
|
||||
pub layer_metadata: HashMap<String, IndexLayerMetadata>,
|
||||
pub layer_metadata: HashMap<RelativePath, IndexLayerMetadata>,
|
||||
|
||||
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
|
||||
// It's duplicated here for convenience.
|
||||
@@ -100,29 +129,23 @@ impl IndexPart {
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
pub fn new(
|
||||
layers_and_metadata: HashMap<RemotePath, LayerFileMetadata>,
|
||||
layers_and_metadata: HashMap<RelativePath, LayerFileMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata_bytes: Vec<u8>,
|
||||
) -> Self {
|
||||
let mut timeline_layers = HashSet::with_capacity(layers_and_metadata.len());
|
||||
let mut layer_metadata = HashMap::with_capacity(layers_and_metadata.len());
|
||||
let mut timeline_layers = HashSet::new();
|
||||
let mut layer_metadata = HashMap::new();
|
||||
|
||||
for (remote_path, metadata) in &layers_and_metadata {
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
match remote_path.object_name() {
|
||||
Some(layer_name) => {
|
||||
timeline_layers.insert(layer_name.to_owned());
|
||||
layer_metadata.insert(layer_name.to_owned(), metadata);
|
||||
}
|
||||
// TODO move this on a type level: we know, that every layer entry does have a name
|
||||
None => panic!("Layer {remote_path:?} has no file name, skipping"),
|
||||
}
|
||||
}
|
||||
separate_paths_and_metadata(
|
||||
&layers_and_metadata,
|
||||
&mut timeline_layers,
|
||||
&mut layer_metadata,
|
||||
);
|
||||
|
||||
Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
timeline_layers,
|
||||
missing_layers: Some(HashSet::new()),
|
||||
missing_layers: HashSet::new(),
|
||||
layer_metadata,
|
||||
disk_consistent_lsn,
|
||||
metadata_bytes,
|
||||
@@ -148,6 +171,18 @@ impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
fn separate_paths_and_metadata(
|
||||
input: &HashMap<RelativePath, LayerFileMetadata>,
|
||||
output: &mut HashSet<RelativePath>,
|
||||
layer_metadata: &mut HashMap<RelativePath, IndexLayerMetadata>,
|
||||
) {
|
||||
for (path, metadata) in input {
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
layer_metadata.insert(path.clone(), metadata);
|
||||
output.insert(path.clone());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -163,8 +198,8 @@ mod tests {
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 0,
|
||||
timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
|
||||
missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::default(),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
@@ -191,13 +226,13 @@ mod tests {
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
|
||||
missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::from([
|
||||
(String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"), IndexLayerMetadata {
|
||||
(RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned()), IndexLayerMetadata {
|
||||
file_size: Some(25600000),
|
||||
}),
|
||||
(String::from("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
|
||||
(RelativePath("not_a_real_layer_but_adding_coverage".to_owned()), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: Some(9007199254741001),
|
||||
@@ -210,46 +245,4 @@ mod tests {
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v1_indexpart_is_parsed_with_optional_missing_layers() {
|
||||
let example = r#"{
|
||||
"version":1,
|
||||
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata_bytes":[112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string()]),
|
||||
layer_metadata: HashMap::from([
|
||||
(
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string(),
|
||||
IndexLayerMetadata {
|
||||
file_size: Some(25600000),
|
||||
}
|
||||
),
|
||||
(
|
||||
"not_a_real_layer_but_adding_coverage".to_string(),
|
||||
IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: Some(9007199254741001),
|
||||
}
|
||||
)
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
missing_layers: None,
|
||||
};
|
||||
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,9 +30,12 @@ pub(super) async fn upload_index_part<'a>(
|
||||
let index_part_path = conf
|
||||
.metadata_path(timeline_id, tenant_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let storage_path = conf.remote_path(&index_part_path)?;
|
||||
storage
|
||||
.upload_storage_object(Box::new(index_part_bytes), index_part_size, &storage_path)
|
||||
.upload_storage_object(
|
||||
Box::new(index_part_bytes),
|
||||
index_part_size,
|
||||
&index_part_path,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to upload index part for '{tenant_id} / {timeline_id}'"))
|
||||
}
|
||||
@@ -41,26 +44,36 @@ pub(super) async fn upload_index_part<'a>(
|
||||
/// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload.
|
||||
///
|
||||
/// On an error, bumps the retries count and reschedules the entire task.
|
||||
pub(super) async fn upload_timeline_layer<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
source_path: &'a Path,
|
||||
known_metadata: &'a LayerFileMetadata,
|
||||
pub(super) async fn upload_timeline_layer(
|
||||
storage: &GenericRemoteStorage,
|
||||
source_path: &Path,
|
||||
known_metadata: &LayerFileMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
fail_point!("before-upload-layer", |_| {
|
||||
bail!("failpoint before-upload-layer")
|
||||
});
|
||||
let storage_path = conf.remote_path(source_path)?;
|
||||
let storage_path = storage.remote_object_id(source_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let source_file = fs::File::open(&source_path)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open a source file for layer {source_path:?}"))?;
|
||||
let source_file = fs::File::open(&source_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a source file for layer '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let fs_size = source_file
|
||||
.metadata()
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to get the source file metadata for layer {source_path:?}")
|
||||
format!(
|
||||
"Failed to get the source file metadata for layer '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?
|
||||
.len();
|
||||
|
||||
|
||||
@@ -139,7 +139,7 @@ pub struct PageserverTaskId(u64);
|
||||
|
||||
/// Each task that we track is associated with a "task ID". It's just an
|
||||
/// increasing number that we assign. Note that it is different from tokio::task::Id.
|
||||
static NEXT_TASK_ID: AtomicU64 = AtomicU64::new(1);
|
||||
static NEXT_TASK_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
|
||||
|
||||
/// Global registry of tasks
|
||||
static TASKS: Lazy<Mutex<HashMap<u64, Arc<PageServerTask>>>> =
|
||||
|
||||
@@ -571,7 +571,7 @@ impl Tenant {
|
||||
pub fn spawn_attach(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
) -> Arc<Tenant> {
|
||||
// XXX: Attach should provide the config, especially during tenant migration.
|
||||
// See https://github.com/neondatabase/neon/issues/1555
|
||||
@@ -584,7 +584,7 @@ impl Tenant {
|
||||
tenant_conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
Some(remote_storage),
|
||||
Some(remote_storage.clone()),
|
||||
));
|
||||
|
||||
// Do all the hard work in the background
|
||||
@@ -782,7 +782,7 @@ impl Tenant {
|
||||
let tenant_conf = match Self::load_tenant_config(conf, tenant_id) {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
error!("load tenant config failed: {:?}", e);
|
||||
error!("load tenant config failed: {}", e);
|
||||
return Tenant::create_broken_tenant(conf, tenant_id);
|
||||
}
|
||||
};
|
||||
@@ -2669,7 +2669,7 @@ pub mod harness {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
_pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
use crate::page_cache;
|
||||
use crate::page_cache::{ReadBufResult, PAGE_SZ};
|
||||
use bytes::Bytes;
|
||||
use once_cell::sync::Lazy;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::os::unix::fs::FileExt;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
@@ -116,7 +117,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
static NEXT_ID: AtomicU64 = AtomicU64::new(1);
|
||||
static NEXT_ID: Lazy<AtomicU64> = Lazy::new(|| AtomicU64::new(1));
|
||||
|
||||
/// An adapter for reading a (virtual) file using the page cache.
|
||||
///
|
||||
|
||||
@@ -19,7 +19,7 @@ use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::storage_sync::index::IndexPart;
|
||||
use crate::storage_sync::index::{IndexPart, RelativePath};
|
||||
use crate::storage_sync::RemoteTimelineClient;
|
||||
use crate::tenant::{
|
||||
delta_layer::{DeltaLayer, DeltaLayerWriter},
|
||||
@@ -999,9 +999,55 @@ impl Timeline {
|
||||
&self,
|
||||
index_part: &IndexPart,
|
||||
remote_client: &RemoteTimelineClient,
|
||||
local_layers: HashSet<PathBuf>,
|
||||
mut local_filenames: HashSet<PathBuf>,
|
||||
up_to_date_disk_consistent_lsn: Lsn,
|
||||
) -> anyhow::Result<HashSet<PathBuf>> {
|
||||
let mut remote_filenames: HashSet<PathBuf> = HashSet::new();
|
||||
for fname in index_part.timeline_layers.iter() {
|
||||
remote_filenames.insert(fname.to_local_path(&PathBuf::from("")));
|
||||
}
|
||||
|
||||
// Are there any local files that exist, with a size that doesn't match
|
||||
// with the size stored in the remote index file?
|
||||
// If so, rename_to_backup those files so that we re-download them later.
|
||||
local_filenames.retain(|path| {
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(&RelativePath::from_filename(path))
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
|
||||
if let Some(remote_size) = layer_metadata.file_size() {
|
||||
let local_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id).join(&path);
|
||||
match local_path.metadata() {
|
||||
Ok(metadata) => {
|
||||
let local_size = metadata.len();
|
||||
|
||||
if local_size != remote_size {
|
||||
warn!("removing local file \"{}\" because it has unexpected length {}; length in remote index is {}",
|
||||
path.display(),
|
||||
local_size,
|
||||
remote_size);
|
||||
if let Err(err) = rename_to_backup(&local_path) {
|
||||
error!("could not rename file \"{}\": {:?}",
|
||||
local_path.display(), err);
|
||||
}
|
||||
self.metrics.current_physical_size_gauge.sub(local_size);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not get size of local file \"{}\": {:?}", path.display(), err);
|
||||
true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
|
||||
// Are we missing some files that are present in remote storage?
|
||||
// Download them now.
|
||||
// TODO Downloading many files this way is not efficient.
|
||||
@@ -1010,63 +1056,17 @@ impl Timeline {
|
||||
// b) typical case now is that there is nothing to sync, this downloads a lot
|
||||
// 1) if there was another pageserver that came and generated new files
|
||||
// 2) during attach of a timeline with big history which we currently do not do
|
||||
let mut local_only_layers = local_layers;
|
||||
let timeline_dir = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||
for remote_layer_name in &index_part.timeline_layers {
|
||||
let local_layer_path = timeline_dir.join(remote_layer_name);
|
||||
local_only_layers.remove(&local_layer_path);
|
||||
for path in remote_filenames.difference(&local_filenames) {
|
||||
let fname = path.to_str().unwrap();
|
||||
info!("remote layer file {fname} does not exist locally");
|
||||
|
||||
let remote_layer_metadata = index_part
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(remote_layer_name)
|
||||
.get(&RelativePath::from_filename(path))
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
|
||||
let remote_layer_path = self
|
||||
.conf
|
||||
.remote_path(&local_layer_path)
|
||||
.expect("local_layer_path received from the same conf that provided a workdir");
|
||||
|
||||
if local_layer_path.exists() {
|
||||
let mut already_downloaded = true;
|
||||
// Are there any local files that exist, with a size that doesn't match
|
||||
// with the size stored in the remote index file?
|
||||
// If so, rename_to_backup those files so that we re-download them later.
|
||||
if let Some(remote_size) = remote_layer_metadata.file_size() {
|
||||
match local_layer_path.metadata() {
|
||||
Ok(metadata) => {
|
||||
let local_size = metadata.len();
|
||||
|
||||
if local_size != remote_size {
|
||||
warn!("removing local file {local_layer_path:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
|
||||
if let Err(err) = rename_to_backup(&local_layer_path) {
|
||||
error!("could not rename file {local_layer_path:?}: {err:?}");
|
||||
} else {
|
||||
self.metrics.current_physical_size_gauge.sub(local_size);
|
||||
already_downloaded = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not get size of local file {local_layer_path:?}: {err:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if already_downloaded {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
info!("remote layer {remote_layer_path:?} does not exist locally");
|
||||
}
|
||||
|
||||
let layer_name = local_layer_path
|
||||
.file_name()
|
||||
.and_then(|os_str| os_str.to_str())
|
||||
.with_context(|| {
|
||||
format!("Layer file {local_layer_path:?} has no name in unicode")
|
||||
})?;
|
||||
if let Some(imgfilename) = ImageFileName::parse_str(layer_name) {
|
||||
if let Some(imgfilename) = ImageFileName::parse_str(fname) {
|
||||
if imgfilename.lsn > up_to_date_disk_consistent_lsn {
|
||||
warn!(
|
||||
"found future image layer {} on timeline {} remote_consistent_lsn is {}",
|
||||
@@ -1075,13 +1075,11 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
trace!("downloading image file: {remote_layer_path:?}");
|
||||
let downloaded_size = remote_client
|
||||
.download_layer_file(&remote_layer_path, &remote_layer_metadata)
|
||||
trace!("downloading image file: {}", file = path.display());
|
||||
let sz = remote_client
|
||||
.download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("failed to download image layer from path {remote_layer_path:?}")
|
||||
})?;
|
||||
.context("download image layer")?;
|
||||
trace!("done");
|
||||
|
||||
let image_layer =
|
||||
@@ -1091,10 +1089,8 @@ impl Timeline {
|
||||
.write()
|
||||
.unwrap()
|
||||
.insert_historic(Arc::new(image_layer));
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.add(downloaded_size);
|
||||
} else if let Some(deltafilename) = DeltaFileName::parse_str(layer_name) {
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
} else if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
|
||||
// Create a DeltaLayer struct for each delta file.
|
||||
// The end-LSN is exclusive, while disk_consistent_lsn is
|
||||
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
||||
@@ -1109,13 +1105,11 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
trace!("downloading delta file: {remote_layer_path:?}");
|
||||
trace!("downloading image file: {}", file = path.display());
|
||||
let sz = remote_client
|
||||
.download_layer_file(&remote_layer_path, &remote_layer_metadata)
|
||||
.download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("failed to download delta layer from path {remote_layer_path:?}")
|
||||
})?;
|
||||
.context("download delta layer")?;
|
||||
trace!("done");
|
||||
|
||||
let delta_layer =
|
||||
@@ -1126,18 +1120,17 @@ impl Timeline {
|
||||
.unwrap()
|
||||
.insert_historic(Arc::new(delta_layer));
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
} else if layer_name.ends_with(".old") {
|
||||
// For details see https://github.com/neondatabase/neon/issues/3024
|
||||
warn!(
|
||||
"got backup file on the remote storage, ignoring it {file}",
|
||||
file = layer_name
|
||||
)
|
||||
} else {
|
||||
bail!("unexpected layer filename {layer_name} in remote storage path: {remote_layer_path:?}");
|
||||
bail!("unexpected layer filename in remote storage: {}", fname);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(local_only_layers)
|
||||
// now these are local only filenames
|
||||
let local_only_filenames = local_filenames
|
||||
.difference(&remote_filenames)
|
||||
.cloned()
|
||||
.collect();
|
||||
Ok(local_only_filenames)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -1171,46 +1164,47 @@ impl Timeline {
|
||||
let disk_consistent_lsn = up_to_date_metadata.disk_consistent_lsn();
|
||||
|
||||
// Build a map of local layers for quick lookups
|
||||
let local_layers = self
|
||||
.layers
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter_historic_layers()
|
||||
.map(|historic_layer| {
|
||||
historic_layer
|
||||
.local_path()
|
||||
.expect("Historic layers should have a path")
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
let mut local_filenames: HashSet<PathBuf> = HashSet::new();
|
||||
for layer in self.layers.read().unwrap().iter_historic_layers() {
|
||||
local_filenames.insert(layer.filename());
|
||||
}
|
||||
|
||||
let local_only_layers = match index_part {
|
||||
let local_only_filenames = match index_part {
|
||||
Some(index_part) => {
|
||||
info!(
|
||||
"initializing upload queue from remote index with {} layer files",
|
||||
index_part.timeline_layers.len()
|
||||
);
|
||||
remote_client.init_upload_queue(index_part)?;
|
||||
self.download_missing(index_part, remote_client, local_layers, disk_consistent_lsn)
|
||||
.await?
|
||||
let local_only_filenames = self
|
||||
.download_missing(
|
||||
index_part,
|
||||
remote_client,
|
||||
local_filenames,
|
||||
disk_consistent_lsn,
|
||||
)
|
||||
.await?;
|
||||
local_only_filenames
|
||||
}
|
||||
None => {
|
||||
info!("initializing upload queue as empty");
|
||||
remote_client.init_upload_queue_for_empty_remote(up_to_date_metadata)?;
|
||||
local_layers
|
||||
local_filenames
|
||||
}
|
||||
};
|
||||
|
||||
// Are there local files that don't exist remotely? Schedule uploads for them
|
||||
for layer_path in &local_only_layers {
|
||||
let layer_size = layer_path
|
||||
let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||
for fname in &local_only_filenames {
|
||||
let absolute = timeline_path.join(fname);
|
||||
let sz = absolute
|
||||
.metadata()
|
||||
.with_context(|| format!("failed to get file {layer_path:?} metadata"))?
|
||||
.with_context(|| format!("failed to get file {} metadata", fname.display()))?
|
||||
.len();
|
||||
info!("scheduling {layer_path:?} for upload");
|
||||
remote_client
|
||||
.schedule_layer_file_upload(layer_path, &LayerFileMetadata::new(layer_size))?;
|
||||
info!("scheduling {} for upload", fname.display());
|
||||
remote_client.schedule_layer_file_upload(&absolute, &LayerFileMetadata::new(sz))?;
|
||||
}
|
||||
if !local_only_layers.is_empty() {
|
||||
if !local_only_filenames.is_empty() {
|
||||
remote_client.schedule_index_upload(up_to_date_metadata)?;
|
||||
}
|
||||
|
||||
@@ -2648,22 +2642,24 @@ impl Timeline {
|
||||
data.records.len()
|
||||
);
|
||||
} else {
|
||||
if data.img.is_some() {
|
||||
let base_img = if let Some((_lsn, img)) = data.img {
|
||||
trace!(
|
||||
"found {} WAL records and a base image for {} at {}, performing WAL redo",
|
||||
data.records.len(),
|
||||
key,
|
||||
request_lsn
|
||||
);
|
||||
Some(img)
|
||||
} else {
|
||||
trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
|
||||
None
|
||||
};
|
||||
|
||||
let last_rec_lsn = data.records.last().unwrap().0;
|
||||
|
||||
let img = self
|
||||
.walredo_mgr
|
||||
.request_redo(key, request_lsn, data.img, data.records, self.pg_version)
|
||||
.request_redo(key, request_lsn, base_img, data.records, self.pg_version)
|
||||
.context("Failed to reconstruct a page image:")?;
|
||||
|
||||
if img.len() == page_cache::PAGE_SZ {
|
||||
|
||||
@@ -185,9 +185,6 @@ impl TenantConfOpt {
|
||||
if let Some(max_lsn_wal_lag) = other.max_lsn_wal_lag {
|
||||
self.max_lsn_wal_lag = Some(max_lsn_wal_lag);
|
||||
}
|
||||
if let Some(trace_read_requests) = other.trace_read_requests {
|
||||
self.trace_read_requests = Some(trace_read_requests);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,59 +1,75 @@
|
||||
//! This module acts as a switchboard to access different repositories managed by this
|
||||
//! page server.
|
||||
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::collections::hash_map;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio::fs;
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::*;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::crashsafe;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::IGNORED_TENANT_FILE_NAME;
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
mod tenants_state {
|
||||
use once_cell::sync::Lazy;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
|
||||
};
|
||||
use utils::id::TenantId;
|
||||
|
||||
use crate::tenant::Tenant;
|
||||
|
||||
static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
|
||||
TENANTS
|
||||
.read()
|
||||
.expect("Failed to read() tenants lock, it got poisoned")
|
||||
}
|
||||
|
||||
pub(super) fn write_tenants() -> RwLockWriteGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
|
||||
TENANTS
|
||||
.write()
|
||||
.expect("Failed to write() tenants lock, it got poisoned")
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize repositories with locally available timelines.
|
||||
/// Timelines that are only partially available locally (remote storage has more data than this pageserver)
|
||||
/// are scheduled for download and added to the tenant once download is completed.
|
||||
#[instrument(skip(conf, remote_storage))]
|
||||
pub async fn init_tenant_mgr(
|
||||
pub fn init_tenant_mgr(
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<()> {
|
||||
let _entered = info_span!("init_tenant_mgr").entered();
|
||||
|
||||
// Scan local filesystem for attached tenants
|
||||
let mut number_of_tenants = 0;
|
||||
let tenants_dir = conf.tenants_path();
|
||||
|
||||
let mut dir_entries = fs::read_dir(&tenants_dir)
|
||||
.await
|
||||
.with_context(|| format!("Failed to list tenants dir {tenants_dir:?}"))?;
|
||||
|
||||
loop {
|
||||
match dir_entries.next_entry().await {
|
||||
Ok(None) => break,
|
||||
Ok(Some(dir_entry)) => {
|
||||
for dir_entry in std::fs::read_dir(&tenants_dir)
|
||||
.with_context(|| format!("Failed to list tenants dir {}", tenants_dir.display()))?
|
||||
{
|
||||
match &dir_entry {
|
||||
Ok(dir_entry) => {
|
||||
let tenant_dir_path = dir_entry.path();
|
||||
if crate::is_temporary(&tenant_dir_path) {
|
||||
info!(
|
||||
"Found temporary tenant directory, removing: {}",
|
||||
tenant_dir_path.display()
|
||||
);
|
||||
if let Err(e) = fs::remove_dir_all(&tenant_dir_path).await {
|
||||
if let Err(e) = std::fs::remove_dir_all(&tenant_dir_path) {
|
||||
error!(
|
||||
"Failed to remove temporary directory '{}': {:?}",
|
||||
tenant_dir_path.display(),
|
||||
@@ -61,38 +77,27 @@ pub async fn init_tenant_mgr(
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// This case happens if we crash during attach before creating the attach marker file
|
||||
let is_empty = tenant_dir_path.is_empty_dir().with_context(|| {
|
||||
format!("Failed to check whether {tenant_dir_path:?} is an empty dir")
|
||||
})?;
|
||||
if is_empty {
|
||||
info!("removing empty tenant directory {tenant_dir_path:?}");
|
||||
if let Err(e) = fs::remove_dir(&tenant_dir_path).await {
|
||||
error!(
|
||||
"Failed to remove empty tenant directory '{}': {e:#}",
|
||||
tenant_dir_path.display()
|
||||
)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
|
||||
if tenant_ignore_mark_file.exists() {
|
||||
info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
|
||||
continue;
|
||||
}
|
||||
|
||||
match schedule_local_tenant_processing(
|
||||
conf,
|
||||
&tenant_dir_path,
|
||||
remote_storage.clone(),
|
||||
) {
|
||||
Ok(tenant) => {
|
||||
TENANTS.write().await.insert(tenant.tenant_id(), tenant);
|
||||
match load_local_tenant(conf, &tenant_dir_path, remote_storage.clone()) {
|
||||
Ok(Some(tenant)) => {
|
||||
tenants_state::write_tenants().insert(tenant.tenant_id(), tenant);
|
||||
number_of_tenants += 1;
|
||||
}
|
||||
Ok(None) => {
|
||||
// This case happens if we crash during attach before creating the attach marker file
|
||||
if let Err(e) = std::fs::remove_dir(&tenant_dir_path) {
|
||||
error!(
|
||||
"Failed to remove empty tenant directory '{}': {e:#}",
|
||||
tenant_dir_path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to collect tenant files from dir {tenants_dir:?} for entry {dir_entry:?}, reason: {e:#}");
|
||||
error!(
|
||||
"Failed to collect tenant files from dir '{}' for entry {:?}, reason: {:#}",
|
||||
tenants_dir.display(),
|
||||
dir_entry,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,7 +107,10 @@ pub async fn init_tenant_mgr(
|
||||
// here, the pageserver startup fails altogether, causing outage for *all*
|
||||
// tenants. That seems worse.
|
||||
error!(
|
||||
"Failed to list tenants dir entry in directory {tenants_dir:?}, reason: {e:?}"
|
||||
"Failed to list tenants dir entry {:?} in directory {}, reason: {:?}",
|
||||
dir_entry,
|
||||
tenants_dir.display(),
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -112,45 +120,34 @@ pub async fn init_tenant_mgr(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn schedule_local_tenant_processing(
|
||||
fn load_local_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
anyhow::ensure!(
|
||||
tenant_path.is_dir(),
|
||||
"Cannot load tenant from path {tenant_path:?}, it either does not exist or not a directory"
|
||||
);
|
||||
anyhow::ensure!(
|
||||
!crate::is_temporary(tenant_path),
|
||||
"Cannot load tenant from temporary path {tenant_path:?}"
|
||||
);
|
||||
anyhow::ensure!(
|
||||
!tenant_path.is_empty_dir().with_context(|| {
|
||||
format!("Failed to check whether {tenant_path:?} is an empty dir")
|
||||
})?,
|
||||
"Cannot load tenant from empty directory {tenant_path:?}"
|
||||
);
|
||||
) -> anyhow::Result<Option<Arc<Tenant>>> {
|
||||
if !tenant_path.is_dir() {
|
||||
anyhow::bail!("tenant_path is not a directory: {tenant_path:?}")
|
||||
}
|
||||
|
||||
let is_empty = tenant_path
|
||||
.is_empty_dir()
|
||||
.context("check whether tenant_path is an empty dir")?;
|
||||
if is_empty {
|
||||
info!("skipping empty tenant directory {tenant_path:?}");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let tenant_id = tenant_path
|
||||
.file_name()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default()
|
||||
.parse::<TenantId>()
|
||||
.with_context(|| {
|
||||
format!("Could not parse tenant id out of the tenant dir name in path {tenant_path:?}")
|
||||
})?;
|
||||
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
anyhow::ensure!(
|
||||
!conf.tenant_ignore_mark_file_path(tenant_id).exists(),
|
||||
"Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
|
||||
);
|
||||
.context("Could not parse tenant id out of the tenant dir name")?;
|
||||
|
||||
let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
|
||||
info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
Tenant::spawn_attach(conf, tenant_id, remote_storage)
|
||||
Tenant::spawn_attach(conf, tenant_id, &remote_storage)
|
||||
} else {
|
||||
warn!("tenant {tenant_id} has attaching mark file, but pageserver has no remote storage configured");
|
||||
Tenant::create_broken_tenant(conf, tenant_id)
|
||||
@@ -160,7 +157,7 @@ pub fn schedule_local_tenant_processing(
|
||||
// Start loading the tenant into memory. It will initially be in Loading state.
|
||||
Tenant::spawn_load(conf, tenant_id, remote_storage)
|
||||
};
|
||||
Ok(tenant)
|
||||
Ok(Some(tenant))
|
||||
}
|
||||
|
||||
///
|
||||
@@ -168,7 +165,7 @@ pub fn schedule_local_tenant_processing(
|
||||
///
|
||||
pub async fn shutdown_all_tenants() {
|
||||
let tenants_to_shut_down = {
|
||||
let mut m = TENANTS.write().await;
|
||||
let mut m = tenants_state::write_tenants();
|
||||
let mut tenants_to_shut_down = Vec::with_capacity(m.len());
|
||||
for (_, tenant) in m.drain() {
|
||||
if tenant.is_active() {
|
||||
@@ -202,13 +199,13 @@ pub async fn shutdown_all_tenants() {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_tenant(
|
||||
pub fn create_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<Option<Arc<Tenant>>> {
|
||||
match TENANTS.write().await.entry(tenant_id) {
|
||||
match tenants_state::write_tenants().entry(tenant_id) {
|
||||
hash_map::Entry::Occupied(_) => {
|
||||
debug!("tenant {tenant_id} already exists");
|
||||
Ok(None)
|
||||
@@ -218,36 +215,44 @@ pub async fn create_tenant(
|
||||
// If this section ever becomes contentious, introduce a new `TenantState::Creating`.
|
||||
let tenant_directory =
|
||||
super::tenant::create_tenant_files(conf, tenant_conf, tenant_id)?;
|
||||
let created_tenant =
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, remote_storage)?;
|
||||
let crated_tenant_id = created_tenant.tenant_id();
|
||||
anyhow::ensure!(
|
||||
tenant_id == crated_tenant_id,
|
||||
"loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {crated_tenant_id})",
|
||||
);
|
||||
v.insert(Arc::clone(&created_tenant));
|
||||
Ok(Some(created_tenant))
|
||||
let created_tenant = load_local_tenant(conf, &tenant_directory, remote_storage)?;
|
||||
match created_tenant {
|
||||
None => {
|
||||
// We get None in case the directory is empty.
|
||||
// This shouldn't happen here, because we just created the directory.
|
||||
// So, skip any cleanup work for now, we don't know how we reached this state.
|
||||
anyhow::bail!("we just created the tenant directory, it can't be empty");
|
||||
}
|
||||
Some(tenant) => {
|
||||
anyhow::ensure!(
|
||||
tenant_id == tenant.tenant_id(),
|
||||
"loaded created tenant has unexpected tenant id (expect {} != actual {})",
|
||||
tenant_id,
|
||||
tenant.tenant_id()
|
||||
);
|
||||
v.insert(Arc::clone(&tenant));
|
||||
Ok(Some(tenant))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_tenant_config(
|
||||
pub fn update_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
info!("configuring tenant {tenant_id}");
|
||||
get_tenant(tenant_id, true)
|
||||
.await?
|
||||
.update_tenant_config(tenant_conf);
|
||||
get_tenant(tenant_id, true)?.update_tenant_config(tenant_conf);
|
||||
Tenant::persist_tenant_config(&conf.tenant_config_path(tenant_id), tenant_conf, false)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
|
||||
/// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
|
||||
pub async fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
|
||||
let m = TENANTS.read().await;
|
||||
pub fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
|
||||
let m = tenants_state::read_tenants();
|
||||
let tenant = m
|
||||
.get(&tenant_id)
|
||||
.with_context(|| format!("Tenant {tenant_id} not found in the local state"))?;
|
||||
@@ -283,7 +288,7 @@ pub async fn delete_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> an
|
||||
info!("waiting for timeline tasks to shutdown");
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), Some(timeline_id)).await;
|
||||
info!("timeline task shutdown completed");
|
||||
match get_tenant(tenant_id, true).await {
|
||||
match get_tenant(tenant_id, true) {
|
||||
Ok(tenant) => {
|
||||
tenant.delete_timeline(timeline_id).await?;
|
||||
}
|
||||
@@ -297,67 +302,40 @@ pub async fn detach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
remove_tenant_from_memory(tenant_id, async {
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id);
|
||||
fs::remove_dir_all(&local_tenant_directory)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to remove local tenant directory {local_tenant_directory:?}")
|
||||
})?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
let tenant = match {
|
||||
let mut tenants_accessor = tenants_state::write_tenants();
|
||||
tenants_accessor.remove(&tenant_id)
|
||||
} {
|
||||
Some(tenant) => tenant,
|
||||
None => anyhow::bail!("Tenant not found for id {tenant_id}"),
|
||||
};
|
||||
|
||||
pub async fn load_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<()> {
|
||||
run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
if tenant_ignore_mark.exists() {
|
||||
std::fs::remove_file(&tenant_ignore_mark)
|
||||
.with_context(|| format!("Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading"))?;
|
||||
}
|
||||
tenant.set_stopping();
|
||||
// shutdown all tenant and timeline tasks: gc, compaction, page service)
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
|
||||
|
||||
let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, remote_storage)
|
||||
.with_context(|| {
|
||||
format!("Failed to schedule tenant processing in path {tenant_path:?}")
|
||||
})?;
|
||||
// If removal fails there will be no way to successfully retry detach,
|
||||
// because the tenant no longer exists in the in-memory map. And it needs to be removed from it
|
||||
// before we remove files, because it contains references to tenant
|
||||
// which references ephemeral files which are deleted on drop. So if we keep these references,
|
||||
// we will attempt to remove files which no longer exist. This can be fixed by having shutdown
|
||||
// mechanism for tenant that will clean temporary data to avoid any references to ephemeral files
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id);
|
||||
fs::remove_dir_all(&local_tenant_directory).with_context(|| {
|
||||
format!(
|
||||
"Failed to remove local tenant directory '{}'",
|
||||
local_tenant_directory.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
vacant_entry.insert(new_tenant);
|
||||
Ok(())
|
||||
}).await
|
||||
}
|
||||
|
||||
pub async fn ignore_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
remove_tenant_from_memory(tenant_id, async {
|
||||
let ignore_mark_file = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
fs::File::create(&ignore_mark_file)
|
||||
.await
|
||||
.context("Failed to create ignore mark file")
|
||||
.and_then(|_| {
|
||||
crashsafe::fsync_file_and_parent(&ignore_mark_file)
|
||||
.context("Failed to fsync ignore mark file")
|
||||
})
|
||||
.with_context(|| format!("Failed to crate ignore mark for tenant {tenant_id}"))?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Get list of tenants, for the mgmt API
|
||||
///
|
||||
pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
TENANTS
|
||||
.read()
|
||||
.await
|
||||
pub fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
tenants_state::read_tenants()
|
||||
.iter()
|
||||
.map(|(id, tenant)| (*id, tenant.current_state()))
|
||||
.collect()
|
||||
@@ -370,92 +348,25 @@ pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
pub async fn attach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
) -> anyhow::Result<()> {
|
||||
run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
anyhow::ensure!(
|
||||
!tenant_path.exists(),
|
||||
"Cannot attach tenant {tenant_id}, local tenant directory already exists"
|
||||
);
|
||||
|
||||
let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
|
||||
vacant_entry.insert(tenant);
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn run_if_no_tenant_in_memory<F, V>(tenant_id: TenantId, run: F) -> anyhow::Result<V>
|
||||
where
|
||||
F: FnOnce(hash_map::VacantEntry<TenantId, Arc<Tenant>>) -> anyhow::Result<V>,
|
||||
{
|
||||
match TENANTS.write().await.entry(tenant_id) {
|
||||
match tenants_state::write_tenants().entry(tenant_id) {
|
||||
hash_map::Entry::Occupied(e) => {
|
||||
anyhow::bail!(
|
||||
"tenant {tenant_id} already exists, state: {:?}",
|
||||
e.get().current_state()
|
||||
)
|
||||
}
|
||||
hash_map::Entry::Vacant(v) => run(v),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stops and removes the tenant from memory, if it's not [`TenantState::Stopping`] already, bails otherwise.
|
||||
/// Allows to remove other tenant resources manually, via `tenant_cleanup`.
|
||||
/// If the cleanup fails, tenant will stay in memory in [`TenantState::Broken`] state, and another removal
|
||||
/// operation would be needed to remove it.
|
||||
async fn remove_tenant_from_memory<V, F>(
|
||||
tenant_id: TenantId,
|
||||
tenant_cleanup: F,
|
||||
) -> anyhow::Result<V>
|
||||
where
|
||||
F: std::future::Future<Output = anyhow::Result<V>>,
|
||||
{
|
||||
// It's important to keep the tenant in memory after the final cleanup, to avoid cleanup races.
|
||||
// The exclusive lock here ensures we don't miss the tenant state updates before trying another removal.
|
||||
// tenant-wde cleanup operations may take some time (removing the entire tenant directory), we want to
|
||||
// avoid holding the lock for the entire process.
|
||||
{
|
||||
let tenants_accessor = TENANTS.write().await;
|
||||
match tenants_accessor.get(&tenant_id) {
|
||||
Some(tenant) => match tenant.current_state() {
|
||||
TenantState::Attaching
|
||||
| TenantState::Loading
|
||||
| TenantState::Broken
|
||||
| TenantState::Active => tenant.set_stopping(),
|
||||
TenantState::Stopping => {
|
||||
anyhow::bail!("Tenant {tenant_id} is stopping already")
|
||||
// Cannot attach a tenant that already exists. The error message depends on
|
||||
// the state it's in.
|
||||
match e.get().current_state() {
|
||||
TenantState::Attaching => {
|
||||
anyhow::bail!("tenant {tenant_id} attach is already in progress")
|
||||
}
|
||||
current_state => {
|
||||
anyhow::bail!("tenant already exists, current state: {current_state:?}")
|
||||
}
|
||||
},
|
||||
None => anyhow::bail!("Tenant not found for id {tenant_id}"),
|
||||
}
|
||||
}
|
||||
|
||||
// shutdown all tenant and timeline tasks: gc, compaction, page service)
|
||||
// No new tasks will be started for this tenant because it's in `Stopping` state.
|
||||
// Hence, once we're done here, the `tenant_cleanup` callback can mutate tenant on-disk state freely.
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
|
||||
|
||||
match tenant_cleanup
|
||||
.await
|
||||
.with_context(|| format!("Failed to run cleanup for tenant {tenant_id}"))
|
||||
{
|
||||
Ok(hook_value) => {
|
||||
let mut tenants_accessor = TENANTS.write().await;
|
||||
if tenants_accessor.remove(&tenant_id).is_none() {
|
||||
warn!("Tenant {tenant_id} got removed from memory before operation finished");
|
||||
}
|
||||
Ok(hook_value)
|
||||
}
|
||||
Err(e) => {
|
||||
let tenants_accessor = TENANTS.read().await;
|
||||
match tenants_accessor.get(&tenant_id) {
|
||||
Some(tenant) => tenant.set_broken(),
|
||||
None => warn!("Tenant {tenant_id} got removed from memory"),
|
||||
}
|
||||
Err(e)
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
|
||||
v.insert(tenant);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -467,12 +378,12 @@ use {
|
||||
};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub async fn immediate_gc(
|
||||
pub fn immediate_gc(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
gc_req: TimelineGcRequest,
|
||||
) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
|
||||
let guard = TENANTS.read().await;
|
||||
let guard = tenants_state::read_tenants();
|
||||
|
||||
let tenant = guard
|
||||
.get(&tenant_id)
|
||||
|
||||
@@ -155,7 +155,7 @@ async fn wait_for_active_tenant(
|
||||
wait: Duration,
|
||||
) -> ControlFlow<(), Arc<Tenant>> {
|
||||
let tenant = loop {
|
||||
match tenant_mgr::get_tenant(tenant_id, false).await {
|
||||
match tenant_mgr::get_tenant(tenant_id, false) {
|
||||
Ok(tenant) => break tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to get a tenant {tenant_id}: {e:#}");
|
||||
|
||||
@@ -84,7 +84,7 @@ pub trait WalRedoManager: Send + Sync {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError>;
|
||||
@@ -147,7 +147,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
@@ -156,8 +156,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
return Err(WalRedoError::InvalidRequest);
|
||||
}
|
||||
|
||||
let base_img_lsn = base_img.as_ref().map(|p| p.0).unwrap_or(Lsn::INVALID);
|
||||
let mut img = base_img.map(|p| p.1);
|
||||
let mut img: Option<Bytes> = base_img;
|
||||
let mut batch_neon = can_apply_in_neon(&records[0].1);
|
||||
let mut batch_start = 0;
|
||||
for i in 1..records.len() {
|
||||
@@ -171,7 +170,6 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
key,
|
||||
lsn,
|
||||
img,
|
||||
base_img_lsn,
|
||||
&records[batch_start..i],
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
@@ -191,7 +189,6 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
key,
|
||||
lsn,
|
||||
img,
|
||||
base_img_lsn,
|
||||
&records[batch_start..],
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
@@ -226,13 +223,11 @@ impl PostgresRedoManager {
|
||||
///
|
||||
/// Process one request for WAL redo using wal-redo postgres
|
||||
///
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn apply_batch_postgres(
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
base_img_lsn: Lsn,
|
||||
records: &[(Lsn, NeonWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
pg_version: u32,
|
||||
@@ -287,12 +282,9 @@ impl PostgresRedoManager {
|
||||
// next request will launch a new one.
|
||||
if result.is_err() {
|
||||
error!(
|
||||
"error applying {} WAL records {}..{} ({} bytes) to base image with LSN {} to reconstruct page image at LSN {}",
|
||||
"error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
|
||||
records.len(),
|
||||
records.first().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
records.last().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
nbytes,
|
||||
base_img_lsn,
|
||||
lsn
|
||||
);
|
||||
let process = process_guard.take().unwrap();
|
||||
@@ -930,7 +922,8 @@ impl NoLeakChild {
|
||||
|
||||
match child.wait() {
|
||||
Ok(exit_status) => {
|
||||
info!(exit_status = %exit_status, "wait successful");
|
||||
// log at error level since .kill() is something we only do on errors ATM
|
||||
error!(exit_status = %exit_status, "wait successful");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "wait error; might leak the child process; it will show as zombie (defunct)");
|
||||
|
||||
@@ -464,12 +464,12 @@ pg_init_libpagestore(void)
|
||||
NULL, NULL, NULL);
|
||||
DefineCustomIntVariable("neon.readahead_buffer_size",
|
||||
"number of prefetches to buffer",
|
||||
"This buffer is used to hold and manage prefetched "
|
||||
"data; so it is important that this buffer is at "
|
||||
"least as large as the configured value of all "
|
||||
"tablespaces' effective_io_concurrency and "
|
||||
"maintenance_io_concurrency, and your sessions' "
|
||||
"values for these settings.",
|
||||
"This buffer is used to store prefetched data; so "
|
||||
"it is important that this buffer is at least as "
|
||||
"large as the configured value of all tablespaces' "
|
||||
"effective_io_concurrency and maintenance_io_concurrency, "
|
||||
"your sessions' values of these, and the value for "
|
||||
"seqscan_prefetch_buffers.",
|
||||
&readahead_buffer_size,
|
||||
128, 16, 1024,
|
||||
PGC_USERSET,
|
||||
|
||||
@@ -242,14 +242,6 @@ PrefetchState *MyPState;
|
||||
) \
|
||||
)
|
||||
|
||||
#define ReceiveBufferNeedsCompaction() (\
|
||||
(MyPState->n_responses_buffered / 8) < ( \
|
||||
MyPState->ring_receive - \
|
||||
MyPState->ring_last - \
|
||||
MyPState->n_responses_buffered \
|
||||
) \
|
||||
)
|
||||
|
||||
int n_prefetch_hits = 0;
|
||||
int n_prefetch_misses = 0;
|
||||
int n_prefetch_missed_caches = 0;
|
||||
@@ -257,99 +249,17 @@ int n_prefetch_dupes = 0;
|
||||
|
||||
XLogRecPtr prefetch_lsn = 0;
|
||||
|
||||
static bool compact_prefetch_buffers(void);
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup_trailing_unused(void);
|
||||
static void prefetch_cleanup(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
|
||||
ForkNumber forknum, BlockNumber blkno);
|
||||
|
||||
static bool
|
||||
compact_prefetch_buffers(void)
|
||||
{
|
||||
uint64 empty_ring_index = MyPState->ring_last;
|
||||
uint64 search_ring_index = MyPState->ring_receive;
|
||||
int n_moved = 0;
|
||||
|
||||
if (MyPState->ring_receive == MyPState->ring_last)
|
||||
return false;
|
||||
|
||||
while (search_ring_index > MyPState->ring_last)
|
||||
{
|
||||
search_ring_index--;
|
||||
if (GetPrfSlot(search_ring_index)->status == PRFS_UNUSED)
|
||||
{
|
||||
empty_ring_index = search_ring_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we have established:
|
||||
* slots < search_ring_index may be unused (not scanned)
|
||||
* slots >= search_ring_index and <= empty_ring_index are unused
|
||||
* slots > empty_ring_index are in use, or outside our buffer's range.
|
||||
*
|
||||
* Therefore, there is a gap of at least one unused items between
|
||||
* search_ring_index and empty_ring_index, which grows as we hit
|
||||
* more unused items while moving backwards through the array.
|
||||
*/
|
||||
|
||||
while (search_ring_index > MyPState->ring_last)
|
||||
{
|
||||
PrefetchRequest *source_slot;
|
||||
PrefetchRequest *target_slot;
|
||||
bool found;
|
||||
|
||||
search_ring_index--;
|
||||
|
||||
source_slot = GetPrfSlot(search_ring_index);
|
||||
|
||||
if (source_slot->status == PRFS_UNUSED)
|
||||
continue;
|
||||
|
||||
target_slot = GetPrfSlot(empty_ring_index);
|
||||
|
||||
Assert(source_slot->status == PRFS_RECEIVED);
|
||||
Assert(target_slot->status == PRFS_UNUSED);
|
||||
|
||||
target_slot->buftag = source_slot->buftag;
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->effective_request_lsn = source_slot->effective_request_lsn;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
prfh_delete(MyPState->prf_hash, source_slot);
|
||||
prfh_insert(MyPState->prf_hash, target_slot, &found);
|
||||
|
||||
Assert(!found);
|
||||
|
||||
/* Adjust the location of our known-empty slot */
|
||||
empty_ring_index--;
|
||||
|
||||
source_slot->status = PRFS_UNUSED;
|
||||
source_slot->buftag = (BufferTag) {0};
|
||||
source_slot->response = NULL;
|
||||
source_slot->my_ring_index = 0;
|
||||
source_slot->effective_request_lsn = 0;
|
||||
|
||||
n_moved++;
|
||||
}
|
||||
|
||||
if (MyPState->ring_last != empty_ring_index)
|
||||
{
|
||||
prefetch_cleanup_trailing_unused();
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
readahead_buffer_resize(int newsize, void *extra)
|
||||
{
|
||||
@@ -413,7 +323,7 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
prfh_insert(newPState->prf_hash, newslot, &found);
|
||||
|
||||
Assert(!found);
|
||||
|
||||
|
||||
switch (newslot->status)
|
||||
{
|
||||
case PRFS_UNUSED:
|
||||
@@ -460,7 +370,7 @@ consume_prefetch_responses(void)
|
||||
}
|
||||
|
||||
static void
|
||||
prefetch_cleanup_trailing_unused(void)
|
||||
prefetch_cleanup(void)
|
||||
{
|
||||
uint64 ring_index;
|
||||
PrefetchRequest *slot;
|
||||
@@ -621,10 +531,7 @@ prefetch_set_unused(uint64 ring_index)
|
||||
|
||||
/* run cleanup if we're holding back ring_last */
|
||||
if (MyPState->ring_last == ring_index)
|
||||
prefetch_cleanup_trailing_unused();
|
||||
/* ... and try to store the buffered responses more compactly if > 12.5% of the buffer is gaps */
|
||||
else if (ReceiveBufferNeedsCompaction())
|
||||
compact_prefetch_buffers();
|
||||
prefetch_cleanup();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -675,33 +582,6 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
|
||||
request.req.lsn = lsn;
|
||||
prefetch_lsn = Max(prefetch_lsn, lsn);
|
||||
slot->effective_request_lsn = prefetch_lsn;
|
||||
|
||||
/*
|
||||
* Remember request LSN in the last-written LSN cache to avoid false
|
||||
* prefetch invalidations.
|
||||
*
|
||||
* Imagine what would happen without this, when you perform a large
|
||||
* sequential scan with UPDATE. The sequential scan issues a prefetch
|
||||
* request for each page in order, and every page is also dirtied. On
|
||||
* each page, the oldest page in the last-written LSN cache is evicted,
|
||||
* which advances the global last-written LSN. The pages being scanned are
|
||||
* not in the last-written cache, so each prefetch request will use the
|
||||
* global last-written LSN in the request and memorize that in the
|
||||
* slot. However, when we receive the response to the prefetch request,
|
||||
* the global last-written LSN has already moved forwards, and the
|
||||
* cross-check we make that the last-written LSN matches will fail, and we
|
||||
* discard the prefetched response unnecessary.
|
||||
*
|
||||
* Inserting the LSN we use in the prefetch request to the last-written LSN
|
||||
* cache avoids that problem. With that, we will use the cached value in
|
||||
* the cross-check, instead of the more recent global last-written LSN value.
|
||||
*/
|
||||
SetLastWrittenLSNForBlock(
|
||||
request.req.lsn,
|
||||
slot->buftag.rnode,
|
||||
slot->buftag.forkNum,
|
||||
slot->buftag.blockNum
|
||||
);
|
||||
}
|
||||
|
||||
Assert(slot->response == NULL);
|
||||
@@ -822,31 +702,20 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
|
||||
/*
|
||||
* If there is good reason to run compaction on the prefetch buffers,
|
||||
* try to do that.
|
||||
*/
|
||||
if (ReceiveBufferNeedsCompaction() && compact_prefetch_buffers())
|
||||
/* We have the slot for ring_last, so that must still be in progress */
|
||||
switch (slot->status)
|
||||
{
|
||||
Assert(slot->status == PRFS_UNUSED);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We have the slot for ring_last, so that must still be in progress */
|
||||
switch (slot->status)
|
||||
{
|
||||
case PRFS_REQUESTED:
|
||||
Assert(MyPState->ring_receive == cleanup_index);
|
||||
prefetch_wait_for(cleanup_index);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
case PRFS_TAG_REMAINS:
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
default:
|
||||
pg_unreachable();
|
||||
}
|
||||
case PRFS_REQUESTED:
|
||||
Assert(MyPState->ring_receive == cleanup_index);
|
||||
prefetch_wait_for(cleanup_index);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
case PRFS_TAG_REMAINS:
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
default:
|
||||
pg_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1233,7 +1102,7 @@ PageIsEmptyHeapPage(char *buffer)
|
||||
}
|
||||
|
||||
static void
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
XLogRecPtr lsn = PageGetLSN(buffer);
|
||||
|
||||
@@ -1247,7 +1116,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
* correctness, the non-logged updates are not critical. But we want to
|
||||
* have a reasonably up-to-date VM and FSM in the page server.
|
||||
*/
|
||||
if ((force || forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM) && !RecoveryInProgress())
|
||||
if (forknum == FSM_FORKNUM && !RecoveryInProgress())
|
||||
{
|
||||
/* FSM is never WAL-logged and we don't care. */
|
||||
XLogRecPtr recptr;
|
||||
@@ -1256,7 +1125,30 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
XLogFlush(recptr);
|
||||
lsn = recptr;
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
|
||||
(errmsg("FSM page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
forknum, LSN_FORMAT_ARGS(lsn))));
|
||||
}
|
||||
else if (forknum == VISIBILITYMAP_FORKNUM && !RecoveryInProgress())
|
||||
{
|
||||
/*
|
||||
* Always WAL-log vm. We should never miss clearing visibility map
|
||||
* bits.
|
||||
*
|
||||
* TODO Is it too bad for performance? Hopefully we do not evict
|
||||
* actively used vm too often.
|
||||
*/
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false);
|
||||
XLogFlush(recptr);
|
||||
lsn = recptr;
|
||||
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Visibilitymap page %u of relation %u/%u/%u.%u was force logged at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
@@ -1651,7 +1543,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
BlockNumber n_blocks = 0;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -1691,16 +1582,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
errhint("This limit is defined by neon.max_cluster_size GUC")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Usually Postgres doesn't extend relation on more than one page
|
||||
* (leaving holes). But this rule is violated in PG-15 where CreateAndCopyRelationData
|
||||
* call smgrextend for destination relation n using size of source relation
|
||||
*/
|
||||
get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks);
|
||||
while (n_blocks < blkno)
|
||||
neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
|
||||
|
||||
neon_wallog_page(reln, forkNum, blkno, buffer, false);
|
||||
neon_wallog_page(reln, forkNum, blkno, buffer);
|
||||
set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
@@ -1898,17 +1780,6 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
&request_lsn);
|
||||
slot = GetPrfSlot(ring_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Empty our reference to the prefetch buffer's hash entry.
|
||||
* When we wait for prefetches, the entry reference is invalidated by
|
||||
* potential updates to the hash, and when we reconnect to the
|
||||
* pageserver the prefetch we're waiting for may be dropped,
|
||||
* in which case we need to retry and take the branch above.
|
||||
*/
|
||||
entry = NULL;
|
||||
}
|
||||
|
||||
Assert(slot->my_ring_index == ring_index);
|
||||
Assert(MyPState->ring_last <= ring_index &&
|
||||
@@ -1947,7 +1818,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
/* buffer was used, clean up for later reuse */
|
||||
prefetch_set_unused(ring_index);
|
||||
prefetch_cleanup_trailing_unused();
|
||||
prefetch_cleanup();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2128,7 +1999,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
neon_wallog_page(reln, forknum, blocknum, buffer, false);
|
||||
neon_wallog_page(reln, forknum, blocknum, buffer);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
|
||||
|
||||
29
poetry.lock
generated
29
poetry.lock
generated
@@ -525,7 +525,7 @@ typing-extensions = ">=4.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2022.12.7"
|
||||
version = "2022.9.24"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
category = "main"
|
||||
optional = false
|
||||
@@ -1248,8 +1248,8 @@ python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
pytest = [
|
||||
{version = ">=5.0", markers = "python_version < \"3.10\""},
|
||||
{version = ">=6.2.4", markers = "python_version >= \"3.10\""},
|
||||
{version = ">=5.0", markers = "python_version < \"3.10\""},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1702,8 +1702,8 @@ botocore-stubs = [
|
||||
{file = "botocore_stubs-1.27.38-py3-none-any.whl", hash = "sha256:7add7641e9a479a9c8366893bb522fd9ca3d58714201e43662a200a148a1bc38"},
|
||||
]
|
||||
certifi = [
|
||||
{file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"},
|
||||
{file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"},
|
||||
{file = "certifi-2022.9.24-py3-none-any.whl", hash = "sha256:90c1a32f1d68f940488354e36370f6cca89f0f106db09518524c88d6ed83f382"},
|
||||
{file = "certifi-2022.9.24.tar.gz", hash = "sha256:0d9c601124e5a6ba9712dbc60d9c53c21e34f5f641fe83002317394311bdce14"},
|
||||
]
|
||||
cffi = [
|
||||
{file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"},
|
||||
@@ -2036,7 +2036,6 @@ psutil = [
|
||||
psycopg2-binary = [
|
||||
{file = "psycopg2-binary-2.9.3.tar.gz", hash = "sha256:761df5313dc15da1502b21453642d7599d26be88bff659382f8f9747c7ebea4e"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:539b28661b71da7c0e428692438efbcd048ca21ea81af618d845e06ebfd29478"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f2534ab7dc7e776a263b463a16e189eb30e85ec9bbe1bff9e78dae802608932"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e82d38390a03da28c7985b394ec3f56873174e2c88130e6966cb1c946508e65"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57804fc02ca3ce0dbfbef35c4b3a4a774da66d66ea20f4bda601294ad2ea6092"},
|
||||
{file = "psycopg2_binary-2.9.3-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:083a55275f09a62b8ca4902dd11f4b33075b743cf0d360419e2051a8a5d5ff76"},
|
||||
@@ -2070,7 +2069,6 @@ psycopg2-binary = [
|
||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win32.whl", hash = "sha256:adf20d9a67e0b6393eac162eb81fb10bc9130a80540f4df7e7355c2dd4af9fba"},
|
||||
{file = "psycopg2_binary-2.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f9ffd643bc7349eeb664eba8864d9e01f057880f510e4681ba40a6532f93c71"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:def68d7c21984b0f8218e8a15d514f714d96904265164f75f8d3a70f9c295667"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6aa71ae45f952a2205377773e76f4e3f27951df38e69a4c95440c779e013560"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dffc08ca91c9ac09008870c9eb77b00a46b3378719584059c034b8945e26b272"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:280b0bb5cbfe8039205c7981cceb006156a675362a00fe29b16fbc264e242834"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-manylinux_2_24_aarch64.whl", hash = "sha256:af9813db73395fb1fc211bac696faea4ca9ef53f32dc0cfa27e4e7cf766dcf24"},
|
||||
@@ -2082,7 +2080,6 @@ psycopg2-binary = [
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win32.whl", hash = "sha256:6472a178e291b59e7f16ab49ec8b4f3bdada0a879c68d3817ff0963e722a82ce"},
|
||||
{file = "psycopg2_binary-2.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:35168209c9d51b145e459e05c31a9eaeffa9a6b0fd61689b48e07464ffd1a83e"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl", hash = "sha256:47133f3f872faf28c1e87d4357220e809dfd3fa7c64295a4a148bcd1e6e34ec9"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b3a24a1982ae56461cc24f6680604fffa2c1b818e9dc55680da038792e004d18"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:91920527dea30175cc02a1099f331aa8c1ba39bf8b7762b7b56cbf54bc5cce42"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:887dd9aac71765ac0d0bac1d0d4b4f2c99d5f5c1382d8b770404f0f3d0ce8a39"},
|
||||
{file = "psycopg2_binary-2.9.3-cp39-cp39-manylinux_2_24_aarch64.whl", hash = "sha256:1f14c8b0942714eb3c74e1e71700cbbcb415acbc311c730370e70c578a44a25c"},
|
||||
@@ -2099,7 +2096,18 @@ py = [
|
||||
{file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"},
|
||||
]
|
||||
pyasn1 = [
|
||||
{file = "pyasn1-0.4.8-py2.4.egg", hash = "sha256:fec3e9d8e36808a28efb59b489e4528c10ad0f480e57dcc32b4de5c9d8c9fdf3"},
|
||||
{file = "pyasn1-0.4.8-py2.5.egg", hash = "sha256:0458773cfe65b153891ac249bcf1b5f8f320b7c2ce462151f8fa74de8934becf"},
|
||||
{file = "pyasn1-0.4.8-py2.6.egg", hash = "sha256:5c9414dcfede6e441f7e8f81b43b34e834731003427e5b09e4e00e3172a10f00"},
|
||||
{file = "pyasn1-0.4.8-py2.7.egg", hash = "sha256:6e7545f1a61025a4e58bb336952c5061697da694db1cae97b116e9c46abcf7c8"},
|
||||
{file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"},
|
||||
{file = "pyasn1-0.4.8-py3.1.egg", hash = "sha256:78fa6da68ed2727915c4767bb386ab32cdba863caa7dbe473eaae45f9959da86"},
|
||||
{file = "pyasn1-0.4.8-py3.2.egg", hash = "sha256:08c3c53b75eaa48d71cf8c710312316392ed40899cb34710d092e96745a358b7"},
|
||||
{file = "pyasn1-0.4.8-py3.3.egg", hash = "sha256:03840c999ba71680a131cfaee6fab142e1ed9bbd9c693e285cc6aca0d555e576"},
|
||||
{file = "pyasn1-0.4.8-py3.4.egg", hash = "sha256:7ab8a544af125fb704feadb008c99a88805126fb525280b2270bb25cc1d78a12"},
|
||||
{file = "pyasn1-0.4.8-py3.5.egg", hash = "sha256:e89bf84b5437b532b0803ba5c9a5e054d21fec423a89952a74f87fa2c9b7bce2"},
|
||||
{file = "pyasn1-0.4.8-py3.6.egg", hash = "sha256:014c0e9976956a08139dc0712ae195324a75e142284d5f87f1a87ee1b068a359"},
|
||||
{file = "pyasn1-0.4.8-py3.7.egg", hash = "sha256:99fcc3c8d804d1bc6d9a099921e39d827026409a58f2a720dcdb89374ea0c776"},
|
||||
{file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"},
|
||||
]
|
||||
pycodestyle = [
|
||||
@@ -2205,13 +2213,6 @@ pyyaml = [
|
||||
{file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
|
||||
{file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
|
||||
{file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
|
||||
{file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
|
||||
|
||||
@@ -28,7 +28,6 @@ use std::{borrow::Cow, future::Future, net::SocketAddr};
|
||||
use tokio::{net::TcpListener, task::JoinError};
|
||||
use tracing::info;
|
||||
use utils::project_git_version;
|
||||
use utils::sentry_init::{init_sentry, release_name};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -46,9 +45,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
.with_target(false)
|
||||
.init();
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[]);
|
||||
|
||||
let arg_matches = cli().get_matches();
|
||||
|
||||
let tls_config = match (
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, Command};
|
||||
use const_format::formatcp;
|
||||
use nix::unistd::Pid;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{ErrorKind, Write};
|
||||
@@ -14,7 +15,7 @@ use tokio::sync::mpsc;
|
||||
use toml_edit::Document;
|
||||
use tracing::*;
|
||||
use url::{ParseError, Url};
|
||||
use utils::pid_file;
|
||||
use utils::lock_file;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
use safekeeper::broker;
|
||||
@@ -34,14 +35,11 @@ use utils::{
|
||||
http::endpoint,
|
||||
id::NodeId,
|
||||
logging::{self, LogFormat},
|
||||
project_git_version,
|
||||
sentry_init::{init_sentry, release_name},
|
||||
signals, tcp_listener,
|
||||
project_git_version, signals, tcp_listener,
|
||||
};
|
||||
|
||||
const PID_FILE_NAME: &str = "safekeeper.pid";
|
||||
const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@@ -135,8 +133,6 @@ fn main() -> anyhow::Result<()> {
|
||||
conf.log_format = LogFormat::from_config(log_format)?;
|
||||
}
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.my_id.to_string())]);
|
||||
start_safekeeper(conf, given_id, arg_matches.get_flag("init"))
|
||||
}
|
||||
|
||||
@@ -146,13 +142,30 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
|
||||
|
||||
// Prevent running multiple safekeepers on the same directory
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file =
|
||||
pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
||||
info!("Claimed pid file at {lock_file_path:?}");
|
||||
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; safekeeper is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
std::mem::forget(lock_file);
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
info!("Created PID file with PID {}", Pid::this().to_string());
|
||||
|
||||
// Set or read our ID.
|
||||
set_id(&mut conf, given_id)?;
|
||||
@@ -160,12 +173,19 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
info!(
|
||||
"Starting safekeeper http handler on {}",
|
||||
conf.listen_http_addr
|
||||
);
|
||||
let http_listener = tcp_listener::bind(conf.listen_http_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_http_addr, e);
|
||||
e
|
||||
})?;
|
||||
|
||||
info!("Starting safekeeper on {}", conf.listen_pg_addr);
|
||||
info!(
|
||||
"Starting safekeeper pg protocol handler on {}",
|
||||
conf.listen_pg_addr
|
||||
);
|
||||
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
|
||||
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);
|
||||
e
|
||||
@@ -264,10 +284,10 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
|
||||
signals.handle(|signal| {
|
||||
// TODO: implement graceful shutdown with joining threads etc
|
||||
info!(
|
||||
"received {}, terminating in immediate shutdown mode",
|
||||
"Got {}. Terminating in immediate shutdown mode",
|
||||
signal.name()
|
||||
);
|
||||
std::process::exit(0);
|
||||
std::process::exit(111);
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -226,7 +226,6 @@ impl ReplicationConn {
|
||||
let mut end_pos = stop_pos.unwrap_or(inmem_state.commit_lsn);
|
||||
|
||||
let mut wal_reader = WalReader::new(
|
||||
spg.conf.workdir.clone(),
|
||||
spg.conf.timeline_dir(&tli.ttid),
|
||||
&persisted_state,
|
||||
start_pos,
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::time::Duration;
|
||||
use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
|
||||
use postgres_ffi::XLogFileName;
|
||||
use postgres_ffi::{XLogSegNo, PG_TLI};
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio::fs::File;
|
||||
use tokio::runtime::Builder;
|
||||
|
||||
@@ -151,7 +151,7 @@ async fn update_task(
|
||||
let timeline_dir = conf.timeline_dir(&ttid);
|
||||
|
||||
let handle = tokio::spawn(
|
||||
backup_task_main(ttid, timeline_dir, conf.workdir.clone(), shutdown_rx)
|
||||
backup_task_main(ttid, timeline_dir, shutdown_rx)
|
||||
.instrument(info_span!("WAL backup task", ttid = %ttid)),
|
||||
);
|
||||
|
||||
@@ -182,10 +182,10 @@ async fn wal_backup_launcher_main_loop(
|
||||
|
||||
let conf_ = conf.clone();
|
||||
REMOTE_STORAGE.get_or_init(|| {
|
||||
conf_
|
||||
.remote_storage
|
||||
.as_ref()
|
||||
.map(|c| GenericRemoteStorage::from_config(c).expect("failed to create remote storage"))
|
||||
conf_.remote_storage.as_ref().map(|c| {
|
||||
GenericRemoteStorage::from_config(conf_.workdir, c)
|
||||
.expect("failed to create remote storage")
|
||||
})
|
||||
});
|
||||
|
||||
// Presense in this map means launcher is aware s3 offloading is needed for
|
||||
@@ -234,7 +234,6 @@ async fn wal_backup_launcher_main_loop(
|
||||
struct WalBackupTask {
|
||||
timeline: Arc<Timeline>,
|
||||
timeline_dir: PathBuf,
|
||||
workspace_dir: PathBuf,
|
||||
wal_seg_size: usize,
|
||||
commit_lsn_watch_rx: watch::Receiver<Lsn>,
|
||||
}
|
||||
@@ -243,7 +242,6 @@ struct WalBackupTask {
|
||||
async fn backup_task_main(
|
||||
ttid: TenantTimelineId,
|
||||
timeline_dir: PathBuf,
|
||||
workspace_dir: PathBuf,
|
||||
mut shutdown_rx: Receiver<()>,
|
||||
) {
|
||||
info!("started");
|
||||
@@ -259,7 +257,6 @@ async fn backup_task_main(
|
||||
commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(),
|
||||
timeline: tli,
|
||||
timeline_dir,
|
||||
workspace_dir,
|
||||
};
|
||||
|
||||
// task is spinned up only when wal_seg_size already initialized
|
||||
@@ -324,7 +321,6 @@ impl WalBackupTask {
|
||||
commit_lsn,
|
||||
self.wal_seg_size,
|
||||
&self.timeline_dir,
|
||||
&self.workspace_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -357,12 +353,11 @@ pub async fn backup_lsn_range(
|
||||
end_lsn: Lsn,
|
||||
wal_seg_size: usize,
|
||||
timeline_dir: &Path,
|
||||
workspace_dir: &Path,
|
||||
) -> Result<Lsn> {
|
||||
let mut res = start_lsn;
|
||||
let segments = get_segments(start_lsn, end_lsn, wal_seg_size);
|
||||
for s in &segments {
|
||||
backup_single_segment(s, timeline_dir, workspace_dir)
|
||||
backup_single_segment(s, timeline_dir)
|
||||
.await
|
||||
.with_context(|| format!("offloading segno {}", s.seg_no))?;
|
||||
|
||||
@@ -377,24 +372,11 @@ pub async fn backup_lsn_range(
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn backup_single_segment(
|
||||
seg: &Segment,
|
||||
timeline_dir: &Path,
|
||||
workspace_dir: &Path,
|
||||
) -> Result<()> {
|
||||
let segment_file_path = seg.file_path(timeline_dir)?;
|
||||
let remote_segment_path = segment_file_path
|
||||
.strip_prefix(&workspace_dir)
|
||||
.context("Failed to strip workspace dir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {segment_file_path:?} for base {workspace_dir:?}",
|
||||
)
|
||||
})?;
|
||||
async fn backup_single_segment(seg: &Segment, timeline_dir: &Path) -> Result<()> {
|
||||
let segment_file_name = seg.file_path(timeline_dir)?;
|
||||
|
||||
backup_object(&segment_file_path, &remote_segment_path, seg.size()).await?;
|
||||
debug!("Backup of {} done", segment_file_path.display());
|
||||
backup_object(&segment_file_name, seg.size()).await?;
|
||||
debug!("Backup of {} done", segment_file_name.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -444,7 +426,7 @@ fn get_segments(start: Lsn, end: Lsn, seg_size: usize) -> Vec<Segment> {
|
||||
|
||||
static REMOTE_STORAGE: OnceCell<Option<GenericRemoteStorage>> = OnceCell::new();
|
||||
|
||||
async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize) -> Result<()> {
|
||||
async fn backup_object(source_file: &Path, size: usize) -> Result<()> {
|
||||
let storage = REMOTE_STORAGE
|
||||
.get()
|
||||
.expect("failed to get remote storage")
|
||||
@@ -459,12 +441,12 @@ async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize
|
||||
})?);
|
||||
|
||||
storage
|
||||
.upload_storage_object(Box::new(file), size, target_file)
|
||||
.upload_storage_object(Box::new(file), size, source_file)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn read_object(
|
||||
file_path: &RemotePath,
|
||||
file_path: PathBuf,
|
||||
offset: u64,
|
||||
) -> anyhow::Result<Pin<Box<dyn tokio::io::AsyncRead>>> {
|
||||
let storage = REMOTE_STORAGE
|
||||
@@ -473,13 +455,19 @@ pub async fn read_object(
|
||||
.as_ref()
|
||||
.context("No remote storage configured")?;
|
||||
|
||||
info!("segment download about to start from remote path {file_path:?} at offset {offset}");
|
||||
|
||||
info!(
|
||||
"segment download about to start for local path {} at offset {}",
|
||||
file_path.display(),
|
||||
offset
|
||||
);
|
||||
let download = storage
|
||||
.download_storage_object(Some((offset, None)), file_path)
|
||||
.download_storage_object(Some((offset, None)), &file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open WAL segment download stream for remote path {file_path:?}")
|
||||
format!(
|
||||
"Failed to open WAL segment download stream for local path {}",
|
||||
file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(download.download_stream)
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
//! Note that last file has `.partial` suffix, that's different from postgres.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use remote_storage::RemotePath;
|
||||
|
||||
use std::io::{self, Seek, SeekFrom};
|
||||
use std::pin::Pin;
|
||||
@@ -446,7 +445,6 @@ fn remove_segments_from_disk(
|
||||
}
|
||||
|
||||
pub struct WalReader {
|
||||
workdir: PathBuf,
|
||||
timeline_dir: PathBuf,
|
||||
wal_seg_size: usize,
|
||||
pos: Lsn,
|
||||
@@ -461,7 +459,6 @@ pub struct WalReader {
|
||||
|
||||
impl WalReader {
|
||||
pub fn new(
|
||||
workdir: PathBuf,
|
||||
timeline_dir: PathBuf,
|
||||
state: &SafeKeeperState,
|
||||
start_pos: Lsn,
|
||||
@@ -481,7 +478,6 @@ impl WalReader {
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
workdir,
|
||||
timeline_dir,
|
||||
wal_seg_size: state.server.wal_seg_size as usize,
|
||||
pos: start_pos,
|
||||
@@ -549,17 +545,7 @@ impl WalReader {
|
||||
|
||||
// Try to open remote file, if remote reads are enabled
|
||||
if self.enable_remote_read {
|
||||
let remote_wal_file_path = wal_file_path
|
||||
.strip_prefix(&self.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
wal_file_path, self.workdir,
|
||||
)
|
||||
})?;
|
||||
return read_object(&remote_wal_file_path, xlogoff as u64).await;
|
||||
return read_object(wal_file_path, xlogoff as u64).await;
|
||||
}
|
||||
|
||||
bail!("WAL segment is not found")
|
||||
|
||||
@@ -1119,14 +1119,6 @@ class PageserverHttpClient(requests.Session):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_load(self, tenant_id: TenantId):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_ignore(self, tenant_id: TenantId):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/ignore")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_status(self, tenant_id: TenantId) -> Dict[Any, Any]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
|
||||
self.verbose_error(res)
|
||||
@@ -1588,7 +1580,17 @@ class NeonCli(AbstractNeonCli):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
extra_env_vars = (extra_env_vars or {}) | s3_env_vars
|
||||
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
try:
|
||||
return self.raw_cli(start_args, extra_env_vars=extra_env_vars)
|
||||
except Exception:
|
||||
# A common reason for startup failure is that the port is already in use. We
|
||||
# coordinate port assignment with PortDistributor, but it's a common mistake
|
||||
# when writing a new test to use a hardcoded port, or assign the port without
|
||||
# using the distributor, causing races where two tests runnign concurrently
|
||||
# sometimes choose the same port. To help debug such cases, get a listing
|
||||
# of all inuse ports and the processes holding them.
|
||||
list_inuse_ports()
|
||||
raise
|
||||
|
||||
def pageserver_stop(self, immediate=False) -> "subprocess.CompletedProcess[str]":
|
||||
cmd = ["pageserver", "stop"]
|
||||
@@ -1603,7 +1605,11 @@ class NeonCli(AbstractNeonCli):
|
||||
if self.env.remote_storage is not None and isinstance(self.env.remote_storage, S3Storage):
|
||||
s3_env_vars = self.env.remote_storage.access_env_vars()
|
||||
|
||||
return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars)
|
||||
try:
|
||||
return self.raw_cli(["safekeeper", "start", str(id)], extra_env_vars=s3_env_vars)
|
||||
except Exception:
|
||||
list_inuse_ports() # see comment in pageserver_start
|
||||
raise
|
||||
|
||||
def safekeeper_stop(
|
||||
self, id: Optional[int] = None, immediate=False
|
||||
@@ -1751,7 +1757,6 @@ class NeonPageserver(PgProtocol):
|
||||
".*Connection aborted: error communicating with the server: Connection reset by peer.*",
|
||||
".*kill_and_wait_impl.*: wait successful.*",
|
||||
".*end streaming to Some.*",
|
||||
".*query handler for 'pagestream.*failed: Broken pipe.*", # pageserver notices compute shut down
|
||||
# safekeeper connection can fail with this, in the window between timeline creation
|
||||
# and streaming start
|
||||
".*Failed to process query for timeline .*: state uninitialized, no data to read.*",
|
||||
@@ -1777,7 +1782,6 @@ class NeonPageserver(PgProtocol):
|
||||
".*gc_loop.*Gc failed, retrying in.*timeline is Stopping", # When gc checks timeline state after acquiring layer_removal_cs
|
||||
".*compaction_loop.*Compaction failed, retrying in.*timeline is Stopping", # When compaction checks timeline state after acquiring layer_removal_cs
|
||||
".*query handler for 'pagestream.*failed: Timeline .* was not found", # postgres reconnects while timeline_delete doesn't hold the tenant's timelines.lock()
|
||||
".*query handler for 'pagestream.*failed: Timeline .* is not active", # timeline delete in progress
|
||||
]
|
||||
|
||||
def start(
|
||||
@@ -2896,7 +2900,6 @@ def assert_no_in_progress_downloads_for_tenant(
|
||||
):
|
||||
tenant_status = pageserver_http_client.tenant_status(tenant)
|
||||
assert tenant_status["has_in_progress_downloads"] is False, tenant_status
|
||||
assert tenant_status["state"] == "Active"
|
||||
|
||||
|
||||
def remote_consistent_lsn(
|
||||
@@ -2938,27 +2941,6 @@ def wait_for_upload(
|
||||
)
|
||||
|
||||
|
||||
# Does not use `wait_until` for debugging purposes
|
||||
def wait_until_tenant_state(
|
||||
pageserver_http: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
expected_state: str,
|
||||
iterations: int,
|
||||
) -> bool:
|
||||
for _ in range(iterations):
|
||||
try:
|
||||
tenant = pageserver_http.tenant_status(tenant_id=tenant_id)
|
||||
log.debug(f"Tenant {tenant_id} data: {tenant}")
|
||||
if tenant["state"] == expected_state:
|
||||
return True
|
||||
except Exception as e:
|
||||
log.debug(f"Tenant {tenant_id} state retrieval failure: {e}")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
raise Exception(f"Tenant {tenant_id} did not become {expected_state} in {iterations} seconds")
|
||||
|
||||
|
||||
def last_record_lsn(
|
||||
pageserver_http_client: PageserverHttpClient, tenant: TenantId, timeline: TimelineId
|
||||
) -> Lsn:
|
||||
@@ -3013,3 +2995,24 @@ def fork_at_current_lsn(
|
||||
"""
|
||||
current_lsn = pg.safe_psql("SELECT pg_current_wal_lsn()")[0][0]
|
||||
return env.neon_cli.create_branch(new_branch_name, ancestor_branch_name, tenant_id, current_lsn)
|
||||
|
||||
|
||||
def list_inuse_ports():
|
||||
"""
|
||||
Print "netstat -tnlap" output to the test log. This is useful for debugging
|
||||
port collisions in tests.
|
||||
"""
|
||||
|
||||
# This won't work on all platforms, because not all platforms have 'netstat',
|
||||
# and the CLI arguments vary across platforms, too. macOS's netstat doesn't have
|
||||
# the -p option, for example. So this is just best-effort.
|
||||
res = subprocess.run(
|
||||
["netstat", "-tnlap"],
|
||||
check=False,
|
||||
universal_newlines=True,
|
||||
capture_output=True,
|
||||
)
|
||||
if res.returncode:
|
||||
log.info(f"netstat -tnlap failed with return code {res.returncode}")
|
||||
log.info(f"netstat -tnlap stdout: \n{res.stdout}\n")
|
||||
log.info(f"netstat -tnlap stderr: \n{res.stderr}\n")
|
||||
|
||||
@@ -42,8 +42,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
|
||||
cur.execute("drop table t")
|
||||
cur.execute("set enable_seqscan_prefetch=on")
|
||||
cur.execute("set effective_io_concurrency=32")
|
||||
cur.execute("set maintenance_io_concurrency=32")
|
||||
cur.execute("set seqscan_prefetch_buffers=100")
|
||||
|
||||
cur.execute(f"create table t2(x integer) WITH (fillfactor={fillfactor})")
|
||||
|
||||
|
||||
@@ -2,10 +2,8 @@ from dataclasses import dataclass
|
||||
from typing import Dict, Tuple
|
||||
|
||||
import pytest
|
||||
from _pytest.mark import ParameterSet
|
||||
from fixtures.compare_fixtures import RemoteCompare
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import get_self_dir
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -111,36 +109,3 @@ def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare):
|
||||
"""
|
||||
|
||||
run_psql(remote_compare, query, times=3)
|
||||
|
||||
|
||||
def tpch_queuies() -> Tuple[ParameterSet, ...]:
|
||||
"""
|
||||
A list of queries to run for the TPC-H benchmark.
|
||||
- querues in returning tuple are ordered by the query number
|
||||
- pytest parameters id is adjusted to match the query id (the numbering starts from 1)
|
||||
"""
|
||||
queries_dir = get_self_dir().parent / "performance" / "tpc-h" / "queries"
|
||||
assert queries_dir.exists(), f"TPC-H queries dir not found: {queries_dir}"
|
||||
|
||||
return tuple(
|
||||
pytest.param(LabelledQuery(f"Q{f.stem}", f.read_text()), id=f"query{f.stem}")
|
||||
for f in sorted(queries_dir.glob("*.sql"), key=lambda f: int(f.stem))
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("query", tpch_queuies())
|
||||
@pytest.mark.remote_cluster
|
||||
def test_tpch(query: LabelledQuery, remote_compare: RemoteCompare):
|
||||
"""
|
||||
TCP-H Benchmark
|
||||
|
||||
The DB prepared manually in advance:
|
||||
- schema: test_runner/performance/tpc-h/create-schema.sql
|
||||
- indexes: test_runner/performance/tpc-h/create-indexes.sql
|
||||
- data generated by `dbgen` program of the official TPC-H benchmark
|
||||
- `VACUUM (FREEZE, PARALLEL 0);`
|
||||
|
||||
For query generation `1669822882` is used as a seed to the RNG
|
||||
"""
|
||||
|
||||
run_psql(remote_compare, query, times=1)
|
||||
|
||||
@@ -1,14 +1,10 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_record_lsn
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
# This test demonstrates how to collect a read trace. It's useful until
|
||||
# it gets replaced by a test that actually does stuff with the trace.
|
||||
#
|
||||
# Additionally, tests that pageserver is able to create tenants with custom configs.
|
||||
def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start()
|
||||
@@ -27,12 +23,6 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("create table t (i integer);")
|
||||
cur.execute(f"insert into t values (generate_series(1,{10000}));")
|
||||
cur.execute("select count(*) from t;")
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
# wait until pageserver receives that data
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# Stop pg so we drop the connection and flush the traces
|
||||
pg.stop()
|
||||
@@ -1,43 +0,0 @@
|
||||
-- Section 1.4.2.2
|
||||
|
||||
ALTER TABLE part ADD PRIMARY KEY (p_partkey);
|
||||
ALTER TABLE supplier ADD PRIMARY KEY (s_suppkey);
|
||||
ALTER TABLE partsupp ADD PRIMARY KEY (ps_partkey, ps_suppkey);
|
||||
ALTER TABLE customer ADD PRIMARY KEY (c_custkey);
|
||||
ALTER TABLE orders ADD PRIMARY KEY (o_orderkey);
|
||||
ALTER TABLE lineitem ADD PRIMARY KEY (l_orderkey, l_linenumber);
|
||||
ALTER TABLE nation ADD PRIMARY KEY (n_nationkey);
|
||||
ALTER TABLE region ADD PRIMARY KEY (r_regionkey);
|
||||
|
||||
-- Section 1.4.2.3
|
||||
|
||||
CREATE INDEX ON supplier USING btree (s_nationkey);
|
||||
ALTER TABLE supplier ADD FOREIGN KEY (s_nationkey) REFERENCES nation (n_nationkey);
|
||||
|
||||
/* IGNORE: implied by primary key */
|
||||
-- CREATE INDEX ON partsupp USING btree (ps_partkey);
|
||||
CREATE INDEX ON partsupp USING btree (ps_suppkey);
|
||||
ALTER TABLE partsupp ADD FOREIGN KEY (ps_partkey) REFERENCES part (p_partkey);
|
||||
ALTER TABLE partsupp ADD FOREIGN KEY (ps_suppkey) REFERENCES supplier (s_suppkey);
|
||||
|
||||
CREATE INDEX ON customer USING btree (c_nationkey);
|
||||
ALTER TABLE customer ADD FOREIGN KEY (c_nationkey) REFERENCES nation (n_nationkey);
|
||||
|
||||
CREATE INDEX ON orders USING btree (o_custkey);
|
||||
ALTER TABLE orders ADD FOREIGN KEY (o_custkey) REFERENCES customer (c_custkey);
|
||||
|
||||
/* IGNORE: implied by primary key */
|
||||
-- CREATE INDEX ON lineitem USING btree (l_orderkey);
|
||||
CREATE INDEX ON lineitem USING btree (l_partkey, l_suppkey);
|
||||
CREATE INDEX ON lineitem USING btree (l_suppkey);
|
||||
ALTER TABLE lineitem ADD FOREIGN KEY (l_orderkey) REFERENCES orders (o_orderkey);
|
||||
ALTER TABLE lineitem ADD FOREIGN KEY (l_partkey) REFERENCES part (p_partkey);
|
||||
ALTER TABLE lineitem ADD FOREIGN KEY (l_suppkey) REFERENCES supplier (s_suppkey);
|
||||
ALTER TABLE lineitem ADD FOREIGN KEY (l_partkey, l_suppkey) REFERENCES partsupp (ps_partkey, ps_suppkey);
|
||||
|
||||
CREATE INDEX ON nation USING btree (n_regionkey);
|
||||
ALTER TABLE nation ADD FOREIGN KEY (n_regionkey) REFERENCES region (r_regionkey);
|
||||
|
||||
-- Section 1.4.2.4
|
||||
|
||||
ALTER TABLE lineitem ADD CHECK (l_shipdate <= l_receiptdate);
|
||||
@@ -1,69 +0,0 @@
|
||||
-- Sccsid: @(#)dss.ddl 2.1.8.1
|
||||
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL,
|
||||
N_NAME CHAR(25) NOT NULL,
|
||||
N_REGIONKEY INTEGER NOT NULL,
|
||||
N_COMMENT VARCHAR(152));
|
||||
|
||||
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL,
|
||||
R_NAME CHAR(25) NOT NULL,
|
||||
R_COMMENT VARCHAR(152));
|
||||
|
||||
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL,
|
||||
P_NAME VARCHAR(55) NOT NULL,
|
||||
P_MFGR CHAR(25) NOT NULL,
|
||||
P_BRAND CHAR(10) NOT NULL,
|
||||
P_TYPE VARCHAR(25) NOT NULL,
|
||||
P_SIZE INTEGER NOT NULL,
|
||||
P_CONTAINER CHAR(10) NOT NULL,
|
||||
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
|
||||
P_COMMENT VARCHAR(23) NOT NULL );
|
||||
|
||||
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL,
|
||||
S_NAME CHAR(25) NOT NULL,
|
||||
S_ADDRESS VARCHAR(40) NOT NULL,
|
||||
S_NATIONKEY INTEGER NOT NULL,
|
||||
S_PHONE CHAR(15) NOT NULL,
|
||||
S_ACCTBAL DECIMAL(15,2) NOT NULL,
|
||||
S_COMMENT VARCHAR(101) NOT NULL);
|
||||
|
||||
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
|
||||
PS_SUPPKEY INTEGER NOT NULL,
|
||||
PS_AVAILQTY INTEGER NOT NULL,
|
||||
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
|
||||
PS_COMMENT VARCHAR(199) NOT NULL );
|
||||
|
||||
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
|
||||
C_NAME VARCHAR(25) NOT NULL,
|
||||
C_ADDRESS VARCHAR(40) NOT NULL,
|
||||
C_NATIONKEY INTEGER NOT NULL,
|
||||
C_PHONE CHAR(15) NOT NULL,
|
||||
C_ACCTBAL DECIMAL(15,2) NOT NULL,
|
||||
C_MKTSEGMENT CHAR(10) NOT NULL,
|
||||
C_COMMENT VARCHAR(117) NOT NULL);
|
||||
|
||||
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
|
||||
O_CUSTKEY INTEGER NOT NULL,
|
||||
O_ORDERSTATUS CHAR(1) NOT NULL,
|
||||
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
|
||||
O_ORDERDATE DATE NOT NULL,
|
||||
O_ORDERPRIORITY CHAR(15) NOT NULL,
|
||||
O_CLERK CHAR(15) NOT NULL,
|
||||
O_SHIPPRIORITY INTEGER NOT NULL,
|
||||
O_COMMENT VARCHAR(79) NOT NULL);
|
||||
|
||||
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
|
||||
L_PARTKEY INTEGER NOT NULL,
|
||||
L_SUPPKEY INTEGER NOT NULL,
|
||||
L_LINENUMBER INTEGER NOT NULL,
|
||||
L_QUANTITY DECIMAL(15,2) NOT NULL,
|
||||
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
|
||||
L_DISCOUNT DECIMAL(15,2) NOT NULL,
|
||||
L_TAX DECIMAL(15,2) NOT NULL,
|
||||
L_RETURNFLAG CHAR(1) NOT NULL,
|
||||
L_LINESTATUS CHAR(1) NOT NULL,
|
||||
L_SHIPDATE DATE NOT NULL,
|
||||
L_COMMITDATE DATE NOT NULL,
|
||||
L_RECEIPTDATE DATE NOT NULL,
|
||||
L_SHIPINSTRUCT CHAR(25) NOT NULL,
|
||||
L_SHIPMODE CHAR(10) NOT NULL,
|
||||
L_COMMENT VARCHAR(44) NOT NULL);
|
||||
@@ -1,27 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Pricing Summary Report Query (Q1)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
l_returnflag,
|
||||
l_linestatus,
|
||||
sum(l_quantity) as sum_qty,
|
||||
sum(l_extendedprice) as sum_base_price,
|
||||
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
|
||||
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
|
||||
avg(l_quantity) as avg_qty,
|
||||
avg(l_extendedprice) as avg_price,
|
||||
avg(l_discount) as avg_disc,
|
||||
count(*) as count_order
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_shipdate <= date '1998-12-01' - interval '89' day
|
||||
group by
|
||||
l_returnflag,
|
||||
l_linestatus
|
||||
order by
|
||||
l_returnflag,
|
||||
l_linestatus;
|
||||
@@ -1,38 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Returned Item Reporting Query (Q10)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
c_custkey,
|
||||
c_name,
|
||||
sum(l_extendedprice * (1 - l_discount)) as revenue,
|
||||
c_acctbal,
|
||||
n_name,
|
||||
c_address,
|
||||
c_phone,
|
||||
c_comment
|
||||
from
|
||||
customer,
|
||||
orders,
|
||||
lineitem,
|
||||
nation
|
||||
where
|
||||
c_custkey = o_custkey
|
||||
and l_orderkey = o_orderkey
|
||||
and o_orderdate >= date '1993-08-01'
|
||||
and o_orderdate < date '1993-08-01' + interval '3' month
|
||||
and l_returnflag = 'R'
|
||||
and c_nationkey = n_nationkey
|
||||
group by
|
||||
c_custkey,
|
||||
c_name,
|
||||
c_acctbal,
|
||||
c_phone,
|
||||
n_name,
|
||||
c_address,
|
||||
c_comment
|
||||
order by
|
||||
revenue desc
|
||||
limit 20;
|
||||
@@ -1,34 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Important Stock Identification Query (Q11)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
ps_partkey,
|
||||
sum(ps_supplycost * ps_availqty) as value
|
||||
from
|
||||
partsupp,
|
||||
supplier,
|
||||
nation
|
||||
where
|
||||
ps_suppkey = s_suppkey
|
||||
and s_nationkey = n_nationkey
|
||||
and n_name = 'INDONESIA'
|
||||
group by
|
||||
ps_partkey having
|
||||
sum(ps_supplycost * ps_availqty) > (
|
||||
select
|
||||
sum(ps_supplycost * ps_availqty) * 0.0001000000
|
||||
from
|
||||
partsupp,
|
||||
supplier,
|
||||
nation
|
||||
where
|
||||
ps_suppkey = s_suppkey
|
||||
and s_nationkey = n_nationkey
|
||||
and n_name = 'INDONESIA'
|
||||
)
|
||||
order by
|
||||
value desc
|
||||
;
|
||||
@@ -1,35 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
l_shipmode,
|
||||
sum(case
|
||||
when o_orderpriority = '1-URGENT'
|
||||
or o_orderpriority = '2-HIGH'
|
||||
then 1
|
||||
else 0
|
||||
end) as high_line_count,
|
||||
sum(case
|
||||
when o_orderpriority <> '1-URGENT'
|
||||
and o_orderpriority <> '2-HIGH'
|
||||
then 1
|
||||
else 0
|
||||
end) as low_line_count
|
||||
from
|
||||
orders,
|
||||
lineitem
|
||||
where
|
||||
o_orderkey = l_orderkey
|
||||
and l_shipmode in ('REG AIR', 'AIR')
|
||||
and l_commitdate < l_receiptdate
|
||||
and l_shipdate < l_commitdate
|
||||
and l_receiptdate >= date '1995-01-01'
|
||||
and l_receiptdate < date '1995-01-01' + interval '1' year
|
||||
group by
|
||||
l_shipmode
|
||||
order by
|
||||
l_shipmode
|
||||
;
|
||||
@@ -1,27 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Customer Distribution Query (Q13)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
c_count,
|
||||
count(*) as custdist
|
||||
from
|
||||
(
|
||||
select
|
||||
c_custkey,
|
||||
count(o_orderkey)
|
||||
from
|
||||
customer left outer join orders on
|
||||
c_custkey = o_custkey
|
||||
and o_comment not like '%special%accounts%'
|
||||
group by
|
||||
c_custkey
|
||||
) as c_orders (c_custkey, c_count)
|
||||
group by
|
||||
c_count
|
||||
order by
|
||||
custdist desc,
|
||||
c_count desc
|
||||
;
|
||||
@@ -1,20 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Promotion Effect Query (Q14)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
100.00 * sum(case
|
||||
when p_type like 'PROMO%'
|
||||
then l_extendedprice * (1 - l_discount)
|
||||
else 0
|
||||
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
|
||||
from
|
||||
lineitem,
|
||||
part
|
||||
where
|
||||
l_partkey = p_partkey
|
||||
and l_shipdate >= date '1995-07-01'
|
||||
and l_shipdate < date '1995-07-01' + interval '1' month
|
||||
;
|
||||
@@ -1,40 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Top Supplier Query (Q15)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
create view revenue0 (supplier_no, total_revenue) as
|
||||
select
|
||||
l_suppkey,
|
||||
sum(l_extendedprice * (1 - l_discount))
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_shipdate >= date '1995-01-01'
|
||||
and l_shipdate < date '1995-01-01' + interval '3' month
|
||||
group by
|
||||
l_suppkey;
|
||||
|
||||
|
||||
select
|
||||
s_suppkey,
|
||||
s_name,
|
||||
s_address,
|
||||
s_phone,
|
||||
total_revenue
|
||||
from
|
||||
supplier,
|
||||
revenue0
|
||||
where
|
||||
s_suppkey = supplier_no
|
||||
and total_revenue = (
|
||||
select
|
||||
max(total_revenue)
|
||||
from
|
||||
revenue0
|
||||
)
|
||||
order by
|
||||
s_suppkey;
|
||||
|
||||
drop view revenue0
|
||||
;
|
||||
@@ -1,37 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size,
|
||||
count(distinct ps_suppkey) as supplier_cnt
|
||||
from
|
||||
partsupp,
|
||||
part
|
||||
where
|
||||
p_partkey = ps_partkey
|
||||
and p_brand <> 'Brand#43'
|
||||
and p_type not like 'PROMO POLISHED%'
|
||||
and p_size in (35, 5, 42, 13, 11, 40, 50, 47)
|
||||
and ps_suppkey not in (
|
||||
select
|
||||
s_suppkey
|
||||
from
|
||||
supplier
|
||||
where
|
||||
s_comment like '%Customer%Complaints%'
|
||||
)
|
||||
group by
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size
|
||||
order by
|
||||
supplier_cnt desc,
|
||||
p_brand,
|
||||
p_type,
|
||||
p_size
|
||||
;
|
||||
@@ -1,25 +0,0 @@
|
||||
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
sum(l_extendedprice) / 7.0 as avg_yearly
|
||||
from
|
||||
lineitem,
|
||||
part
|
||||
where
|
||||
p_partkey = l_partkey
|
||||
and p_brand = 'Brand#35'
|
||||
and p_container = 'JUMBO JAR'
|
||||
and l_quantity < (
|
||||
select
|
||||
0.2 * avg(l_quantity)
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_partkey = p_partkey
|
||||
)
|
||||
;
|
||||
@@ -1,39 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Large Volume Customer Query (Q18)
|
||||
-- Function Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
c_name,
|
||||
c_custkey,
|
||||
o_orderkey,
|
||||
o_orderdate,
|
||||
o_totalprice,
|
||||
sum(l_quantity)
|
||||
from
|
||||
customer,
|
||||
orders,
|
||||
lineitem
|
||||
where
|
||||
o_orderkey in (
|
||||
select
|
||||
l_orderkey
|
||||
from
|
||||
lineitem
|
||||
group by
|
||||
l_orderkey having
|
||||
sum(l_quantity) > 315
|
||||
)
|
||||
and c_custkey = o_custkey
|
||||
and o_orderkey = l_orderkey
|
||||
group by
|
||||
c_name,
|
||||
c_custkey,
|
||||
o_orderkey,
|
||||
o_orderdate,
|
||||
o_totalprice
|
||||
order by
|
||||
o_totalprice desc,
|
||||
o_orderdate
|
||||
limit 100;
|
||||
@@ -1,42 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Discounted Revenue Query (Q19)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
sum(l_extendedprice* (1 - l_discount)) as revenue
|
||||
from
|
||||
lineitem,
|
||||
part
|
||||
where
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
and p_brand = 'Brand#41'
|
||||
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
|
||||
and l_quantity >= 10 and l_quantity <= 10 + 10
|
||||
and p_size between 1 and 5
|
||||
and l_shipmode in ('AIR', 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
and p_brand = 'Brand#52'
|
||||
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
|
||||
and l_quantity >= 20 and l_quantity <= 20 + 10
|
||||
and p_size between 1 and 10
|
||||
and l_shipmode in ('AIR', 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
or
|
||||
(
|
||||
p_partkey = l_partkey
|
||||
and p_brand = 'Brand#14'
|
||||
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
|
||||
and l_quantity >= 22 and l_quantity <= 22 + 10
|
||||
and p_size between 1 and 15
|
||||
and l_shipmode in ('AIR', 'AIR REG')
|
||||
and l_shipinstruct = 'DELIVER IN PERSON'
|
||||
)
|
||||
;
|
||||
@@ -1,50 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Minimum Cost Supplier Query (Q2)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
s_acctbal,
|
||||
s_name,
|
||||
n_name,
|
||||
p_partkey,
|
||||
p_mfgr,
|
||||
s_address,
|
||||
s_phone,
|
||||
s_comment
|
||||
from
|
||||
part,
|
||||
supplier,
|
||||
partsupp,
|
||||
nation,
|
||||
region
|
||||
where
|
||||
p_partkey = ps_partkey
|
||||
and s_suppkey = ps_suppkey
|
||||
and p_size = 39
|
||||
and p_type like '%BRASS'
|
||||
and s_nationkey = n_nationkey
|
||||
and n_regionkey = r_regionkey
|
||||
and r_name = 'MIDDLE EAST'
|
||||
and ps_supplycost = (
|
||||
select
|
||||
min(ps_supplycost)
|
||||
from
|
||||
partsupp,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
where
|
||||
p_partkey = ps_partkey
|
||||
and s_suppkey = ps_suppkey
|
||||
and s_nationkey = n_nationkey
|
||||
and n_regionkey = r_regionkey
|
||||
and r_name = 'MIDDLE EAST'
|
||||
)
|
||||
order by
|
||||
s_acctbal desc,
|
||||
n_name,
|
||||
s_name,
|
||||
p_partkey
|
||||
limit 100;
|
||||
@@ -1,44 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Potential Part Promotion Query (Q20)
|
||||
-- Function Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
s_name,
|
||||
s_address
|
||||
from
|
||||
supplier,
|
||||
nation
|
||||
where
|
||||
s_suppkey in (
|
||||
select
|
||||
ps_suppkey
|
||||
from
|
||||
partsupp
|
||||
where
|
||||
ps_partkey in (
|
||||
select
|
||||
p_partkey
|
||||
from
|
||||
part
|
||||
where
|
||||
p_name like 'bisque%'
|
||||
)
|
||||
and ps_availqty > (
|
||||
select
|
||||
0.5 * sum(l_quantity)
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_partkey = ps_partkey
|
||||
and l_suppkey = ps_suppkey
|
||||
and l_shipdate >= date '1997-01-01'
|
||||
and l_shipdate < date '1997-01-01' + interval '1' year
|
||||
)
|
||||
)
|
||||
and s_nationkey = n_nationkey
|
||||
and n_name = 'ETHIOPIA'
|
||||
order by
|
||||
s_name
|
||||
;
|
||||
@@ -1,46 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
s_name,
|
||||
count(*) as numwait
|
||||
from
|
||||
supplier,
|
||||
lineitem l1,
|
||||
orders,
|
||||
nation
|
||||
where
|
||||
s_suppkey = l1.l_suppkey
|
||||
and o_orderkey = l1.l_orderkey
|
||||
and o_orderstatus = 'F'
|
||||
and l1.l_receiptdate > l1.l_commitdate
|
||||
and exists (
|
||||
select
|
||||
*
|
||||
from
|
||||
lineitem l2
|
||||
where
|
||||
l2.l_orderkey = l1.l_orderkey
|
||||
and l2.l_suppkey <> l1.l_suppkey
|
||||
)
|
||||
and not exists (
|
||||
select
|
||||
*
|
||||
from
|
||||
lineitem l3
|
||||
where
|
||||
l3.l_orderkey = l1.l_orderkey
|
||||
and l3.l_suppkey <> l1.l_suppkey
|
||||
and l3.l_receiptdate > l3.l_commitdate
|
||||
)
|
||||
and s_nationkey = n_nationkey
|
||||
and n_name = 'SAUDI ARABIA'
|
||||
group by
|
||||
s_name
|
||||
order by
|
||||
numwait desc,
|
||||
s_name
|
||||
limit 100;
|
||||
@@ -1,44 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Global Sales Opportunity Query (Q22)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
cntrycode,
|
||||
count(*) as numcust,
|
||||
sum(c_acctbal) as totacctbal
|
||||
from
|
||||
(
|
||||
select
|
||||
substring(c_phone from 1 for 2) as cntrycode,
|
||||
c_acctbal
|
||||
from
|
||||
customer
|
||||
where
|
||||
substring(c_phone from 1 for 2) in
|
||||
('15', '14', '29', '34', '33', '19', '13')
|
||||
and c_acctbal > (
|
||||
select
|
||||
avg(c_acctbal)
|
||||
from
|
||||
customer
|
||||
where
|
||||
c_acctbal > 0.00
|
||||
and substring(c_phone from 1 for 2) in
|
||||
('15', '14', '29', '34', '33', '19', '13')
|
||||
)
|
||||
and not exists (
|
||||
select
|
||||
*
|
||||
from
|
||||
orders
|
||||
where
|
||||
o_custkey = c_custkey
|
||||
)
|
||||
) as custsale
|
||||
group by
|
||||
cntrycode
|
||||
order by
|
||||
cntrycode
|
||||
;
|
||||
@@ -1,29 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Shipping Priority Query (Q3)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
l_orderkey,
|
||||
sum(l_extendedprice * (1 - l_discount)) as revenue,
|
||||
o_orderdate,
|
||||
o_shippriority
|
||||
from
|
||||
customer,
|
||||
orders,
|
||||
lineitem
|
||||
where
|
||||
c_mktsegment = 'AUTOMOBILE'
|
||||
and c_custkey = o_custkey
|
||||
and l_orderkey = o_orderkey
|
||||
and o_orderdate < date '1995-03-26'
|
||||
and l_shipdate > date '1995-03-26'
|
||||
group by
|
||||
l_orderkey,
|
||||
o_orderdate,
|
||||
o_shippriority
|
||||
order by
|
||||
revenue desc,
|
||||
o_orderdate
|
||||
limit 10;
|
||||
@@ -1,28 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Order Priority Checking Query (Q4)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
o_orderpriority,
|
||||
count(*) as order_count
|
||||
from
|
||||
orders
|
||||
where
|
||||
o_orderdate >= date '1996-12-01'
|
||||
and o_orderdate < date '1996-12-01' + interval '3' month
|
||||
and exists (
|
||||
select
|
||||
*
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_orderkey = o_orderkey
|
||||
and l_commitdate < l_receiptdate
|
||||
)
|
||||
group by
|
||||
o_orderpriority
|
||||
order by
|
||||
o_orderpriority
|
||||
;
|
||||
@@ -1,31 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Local Supplier Volume Query (Q5)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
n_name,
|
||||
sum(l_extendedprice * (1 - l_discount)) as revenue
|
||||
from
|
||||
customer,
|
||||
orders,
|
||||
lineitem,
|
||||
supplier,
|
||||
nation,
|
||||
region
|
||||
where
|
||||
c_custkey = o_custkey
|
||||
and l_orderkey = o_orderkey
|
||||
and l_suppkey = s_suppkey
|
||||
and c_nationkey = s_nationkey
|
||||
and s_nationkey = n_nationkey
|
||||
and n_regionkey = r_regionkey
|
||||
and r_name = 'ASIA'
|
||||
and o_orderdate >= date '1996-01-01'
|
||||
and o_orderdate < date '1996-01-01' + interval '1' year
|
||||
group by
|
||||
n_name
|
||||
order by
|
||||
revenue desc
|
||||
;
|
||||
@@ -1,16 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Forecasting Revenue Change Query (Q6)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
sum(l_extendedprice * l_discount) as revenue
|
||||
from
|
||||
lineitem
|
||||
where
|
||||
l_shipdate >= date '1996-01-01'
|
||||
and l_shipdate < date '1996-01-01' + interval '1' year
|
||||
and l_discount between 0.02 - 0.01 and 0.02 + 0.01
|
||||
and l_quantity < 24
|
||||
;
|
||||
@@ -1,46 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Volume Shipping Query (Q7)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year,
|
||||
sum(volume) as revenue
|
||||
from
|
||||
(
|
||||
select
|
||||
n1.n_name as supp_nation,
|
||||
n2.n_name as cust_nation,
|
||||
extract(year from l_shipdate) as l_year,
|
||||
l_extendedprice * (1 - l_discount) as volume
|
||||
from
|
||||
supplier,
|
||||
lineitem,
|
||||
orders,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2
|
||||
where
|
||||
s_suppkey = l_suppkey
|
||||
and o_orderkey = l_orderkey
|
||||
and c_custkey = o_custkey
|
||||
and s_nationkey = n1.n_nationkey
|
||||
and c_nationkey = n2.n_nationkey
|
||||
and (
|
||||
(n1.n_name = 'ALGERIA' and n2.n_name = 'CANADA')
|
||||
or (n1.n_name = 'CANADA' and n2.n_name = 'ALGERIA')
|
||||
)
|
||||
and l_shipdate between date '1995-01-01' and date '1996-12-31'
|
||||
) as shipping
|
||||
group by
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year
|
||||
order by
|
||||
supp_nation,
|
||||
cust_nation,
|
||||
l_year
|
||||
;
|
||||
@@ -1,44 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R National Market Share Query (Q8)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
o_year,
|
||||
sum(case
|
||||
when nation = 'CANADA' then volume
|
||||
else 0
|
||||
end) / sum(volume) as mkt_share
|
||||
from
|
||||
(
|
||||
select
|
||||
extract(year from o_orderdate) as o_year,
|
||||
l_extendedprice * (1 - l_discount) as volume,
|
||||
n2.n_name as nation
|
||||
from
|
||||
part,
|
||||
supplier,
|
||||
lineitem,
|
||||
orders,
|
||||
customer,
|
||||
nation n1,
|
||||
nation n2,
|
||||
region
|
||||
where
|
||||
p_partkey = l_partkey
|
||||
and s_suppkey = l_suppkey
|
||||
and l_orderkey = o_orderkey
|
||||
and o_custkey = c_custkey
|
||||
and c_nationkey = n1.n_nationkey
|
||||
and n1.n_regionkey = r_regionkey
|
||||
and r_name = 'AMERICA'
|
||||
and s_nationkey = n2.n_nationkey
|
||||
and o_orderdate between date '1995-01-01' and date '1996-12-31'
|
||||
and p_type = 'SMALL POLISHED BRASS'
|
||||
) as all_nations
|
||||
group by
|
||||
o_year
|
||||
order by
|
||||
o_year
|
||||
;
|
||||
@@ -1,39 +0,0 @@
|
||||
-- $ID$
|
||||
-- TPC-H/TPC-R Product Type Profit Measure Query (Q9)
|
||||
-- Functional Query Definition
|
||||
-- Approved February 1998
|
||||
|
||||
|
||||
select
|
||||
nation,
|
||||
o_year,
|
||||
sum(amount) as sum_profit
|
||||
from
|
||||
(
|
||||
select
|
||||
n_name as nation,
|
||||
extract(year from o_orderdate) as o_year,
|
||||
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
|
||||
from
|
||||
part,
|
||||
supplier,
|
||||
lineitem,
|
||||
partsupp,
|
||||
orders,
|
||||
nation
|
||||
where
|
||||
s_suppkey = l_suppkey
|
||||
and ps_suppkey = l_suppkey
|
||||
and ps_partkey = l_partkey
|
||||
and p_partkey = l_partkey
|
||||
and o_orderkey = l_orderkey
|
||||
and s_nationkey = n_nationkey
|
||||
and p_name like '%firebrick%'
|
||||
) as profit
|
||||
group by
|
||||
nation,
|
||||
o_year
|
||||
order by
|
||||
nation,
|
||||
o_year desc
|
||||
;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user