mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-21 04:12:55 +00:00
Compare commits
23 Commits
always-fai
...
layer_map_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87d75f6070 | ||
|
|
f7201cd3cf | ||
|
|
2dcbdd9e47 | ||
|
|
72db121a8a | ||
|
|
5444e6ff32 | ||
|
|
809b04eccb | ||
|
|
e7b2b5ae12 | ||
|
|
f476e56315 | ||
|
|
72835371cc | ||
|
|
924d91c47d | ||
|
|
4e61edef7c | ||
|
|
b20df9b90a | ||
|
|
8261455019 | ||
|
|
aad88d6c39 | ||
|
|
6188315b51 | ||
|
|
3a4b932d8a | ||
|
|
cc2b3c986c | ||
|
|
c250c2664b | ||
|
|
e5550a01b0 | ||
|
|
45617ceaef | ||
|
|
29b39301fe | ||
|
|
b01a93be60 | ||
|
|
4c68d019e3 |
@@ -11,3 +11,6 @@ opt-level = 3
|
||||
[profile.dev]
|
||||
# Turn on a small amount of optimization in Development mode.
|
||||
opt-level = 1
|
||||
|
||||
[alias]
|
||||
build_testing = ["build", "--features", "testing"]
|
||||
|
||||
6
.github/ansible/scripts/init_pageserver.sh
vendored
6
.github/ansible/scripts/init_pageserver.sh
vendored
@@ -1,8 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
# fetch params from meta-data service
|
||||
# get instance id from meta-data service
|
||||
INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
|
||||
AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
|
||||
|
||||
# store fqdn hostname in var
|
||||
HOST=$(hostname -f)
|
||||
@@ -17,8 +16,7 @@ cat <<EOF | tee /tmp/payload
|
||||
"instance_id": "${INSTANCE_ID}",
|
||||
"http_host": "${HOST}",
|
||||
"http_port": 9898,
|
||||
"active": false,
|
||||
"availability_zone_id": "${AZ_ID}"
|
||||
"active": false
|
||||
}
|
||||
EOF
|
||||
|
||||
|
||||
2
.github/ansible/systemd/pageserver.service
vendored
2
.github/ansible/systemd/pageserver.service
vendored
@@ -5,7 +5,7 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=pageserver
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }}
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoints=['{{ etcd_endpoints }}']" -D /storage/pageserver/data
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
|
||||
2
.github/ansible/systemd/safekeeper.service
vendored
2
.github/ansible/systemd/safekeeper.service
vendored
@@ -5,7 +5,7 @@ After=network.target auditd.service
|
||||
[Service]
|
||||
Type=simple
|
||||
User=safekeeper
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }}
|
||||
Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib
|
||||
ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoints={{ etcd_endpoints }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}'
|
||||
ExecReload=/bin/kill -HUP $MAINPID
|
||||
KillMode=mixed
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-zeta.eu-west-1.aws.neon.build
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-zeta.eu-west-1.aws.neon.build
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-beta.us-east-2.aws.neon.build
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-beta.us-east-2.aws.neon.build
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: neon-stress
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-stress.stage.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-epsilon.ap-southeast-1.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-epsilon.ap-southeast-1.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-gamma.eu-central-1.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-gamma.eu-central-1.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-delta.us-east-2.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-delta.us-east-2.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
# Helm chart values for neon-proxy-scram.
|
||||
# This is a YAML-formatted file.
|
||||
|
||||
image:
|
||||
repository: neondatabase/neon
|
||||
|
||||
settings:
|
||||
authBackend: "console"
|
||||
authEndpoint: "http://console-release.local/management/api/v2"
|
||||
domain: "*.us-west-2.aws.neon.tech"
|
||||
|
||||
# -- Additional labels for neon-proxy pods
|
||||
podLabels:
|
||||
zenith_service: proxy-scram
|
||||
zenith_env: prod
|
||||
zenith_region: us-west-2
|
||||
zenith_region_slug: us-west-2
|
||||
|
||||
exposedService:
|
||||
annotations:
|
||||
service.beta.kubernetes.io/aws-load-balancer-type: external
|
||||
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
|
||||
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
|
||||
external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
|
||||
|
||||
#metrics:
|
||||
# enabled: true
|
||||
# serviceMonitor:
|
||||
# enabled: true
|
||||
# selector:
|
||||
# release: kube-prometheus-stack
|
||||
@@ -1,53 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: nginx-internal
|
||||
nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
|
||||
nginx.ingress.kubernetes.io/ssl-redirect: "true"
|
||||
nginx.ingress.kubernetes.io/force-ssl-redirect: "true"
|
||||
cert-manager.io/cluster-issuer: "cert-manager-clusterissuer"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker-eta.us-west-2.aws.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- hosts:
|
||||
- storage-broker-eta.us-west-2.aws.neon.tech
|
||||
secretName: storage-broker-tls
|
||||
|
||||
|
||||
metrics:
|
||||
enabled: false
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: production
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
# Helm chart values for neon-storage-broker
|
||||
podLabels:
|
||||
neon_env: staging
|
||||
neon_service: storage-broker
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: alb
|
||||
alb.ingress.kubernetes.io/healthcheck-path: /status
|
||||
alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS":443}]'
|
||||
alb.ingress.kubernetes.io/scheme: "internal"
|
||||
alb.ingress.kubernetes.io/target-type: "ip"
|
||||
alb.ingress.kubernetes.io/ssl-redirect: "443"
|
||||
alb.ingress.kubernetes.io/backend-protocol-version: "GRPC"
|
||||
|
||||
hosts:
|
||||
- host: storage-broker.stage.neon.tech
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
selector:
|
||||
release: kube-prometheus-stack
|
||||
|
||||
extraManifests:
|
||||
- apiVersion: operator.victoriametrics.com/v1beta1
|
||||
kind: VMServiceScrape
|
||||
metadata:
|
||||
name: "{{ include \"neon-storage-broker.fullname\" . }}"
|
||||
labels:
|
||||
helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
|
||||
app.kubernetes.io/name: neon-storage-broker
|
||||
app.kubernetes.io/instance: neon-storage-broker
|
||||
app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
namespace: "{{ .Release.Namespace }}"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: "neon-storage-broker"
|
||||
endpoints:
|
||||
- port: broker
|
||||
path: /metrics
|
||||
interval: 10s
|
||||
scrapeTimeout: 10s
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "{{ .Release.Namespace }}"
|
||||
|
||||
14
.github/workflows/benchmarking.yml
vendored
14
.github/workflows/benchmarking.yml
vendored
@@ -231,11 +231,8 @@ jobs:
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
run: |
|
||||
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
|
||||
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE neondb SET seqscan_prefetch_buffers=10"
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
@@ -377,11 +374,8 @@ jobs:
|
||||
- name: Set database options
|
||||
if: matrix.platform == 'neon-captest-prefetch'
|
||||
run: |
|
||||
DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
|
||||
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET enable_seqscan_prefetch=on"
|
||||
psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE main SET seqscan_prefetch_buffers=10"
|
||||
env:
|
||||
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
|
||||
|
||||
|
||||
156
.github/workflows/build_and_test.yml
vendored
156
.github/workflows/build_and_test.yml
vendored
@@ -100,11 +100,11 @@ jobs:
|
||||
run: |
|
||||
if [[ $BUILD_TYPE == "debug" ]]; then
|
||||
cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
|
||||
CARGO_FEATURES=""
|
||||
CARGO_FEATURES="--features testing"
|
||||
CARGO_FLAGS="--locked --timings $CARGO_FEATURES"
|
||||
elif [[ $BUILD_TYPE == "release" ]]; then
|
||||
cov_prefix=""
|
||||
CARGO_FEATURES="--features profiling"
|
||||
CARGO_FEATURES="--features testing,profiling"
|
||||
CARGO_FLAGS="--locked --timings --release $CARGO_FEATURES"
|
||||
fi
|
||||
echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
|
||||
@@ -539,9 +539,9 @@ jobs:
|
||||
# `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
|
||||
# Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
|
||||
# Regular pageserver version string looks like
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c features: []
|
||||
# Neon page server git-env:32d14403bd6ab4f4520a94cbfd81a6acef7a526c failpoints: true, features: []
|
||||
# Bad versions might loop like:
|
||||
# Neon page server git-env:local features: [""]
|
||||
# Neon page server git-env:local failpoints: true, features: ["testing"]
|
||||
# Ensure that we don't have bad versions.
|
||||
- name: Verify image versions
|
||||
shell: bash # ensure no set -e for better error messages
|
||||
@@ -555,6 +555,11 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! echo "$pageserver_version" | grep -qv '"testing"' ; then
|
||||
echo "Pageserver version should have no testing feature enabled"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify docker-compose example
|
||||
run: env REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com TAG=${{needs.tag.outputs.build-tag}} ./docker-compose/docker_compose_test.sh
|
||||
|
||||
@@ -663,11 +668,11 @@ jobs:
|
||||
- id: set-matrix
|
||||
run: |
|
||||
if [[ "$GITHUB_REF_NAME" == "main" ]]; then
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "staging.neon-storage-broker", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "storage_broker_ns": "neon-stress-storage-broker", "storage_broker_config": "neon-stress.neon-storage-broker", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY", storage_broker_config: }'
|
||||
STAGING='{"env_name": "staging", "proxy_job": "neon-proxy", "proxy_config": "staging.proxy", "kubeconfig_secret": "STAGING_KUBECONFIG_DATA", "console_api_key_secret": "NEON_STAGING_API_KEY"}'
|
||||
NEON_STRESS='{"env_name": "neon-stress", "proxy_job": "neon-stress-proxy", "proxy_config": "neon-stress.proxy", "kubeconfig_secret": "NEON_STRESS_KUBECONFIG_DATA", "console_api_key_secret": "NEON_CAPTEST_API_KEY"}'
|
||||
echo "include=[$STAGING, $NEON_STRESS]" >> $GITHUB_OUTPUT
|
||||
elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "storage_broker_ns": "neon-storage-broker", "storage_broker_config": "production.neon-storage-broker", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
PRODUCTION='{"env_name": "production", "proxy_job": "neon-proxy", "proxy_config": "production.proxy", "kubeconfig_secret": "PRODUCTION_KUBECONFIG_DATA", "console_api_key_secret": "NEON_PRODUCTION_API_KEY"}'
|
||||
echo "include=[$PRODUCTION]" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
|
||||
@@ -727,7 +732,7 @@ jobs:
|
||||
ssh-add ssh-key
|
||||
rm -f ssh-key ssh-key-cert.pub
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i ${{ matrix.env_name }}.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets[matrix.console_api_key_secret] }}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-new:
|
||||
@@ -765,7 +770,7 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-pr-test-new:
|
||||
@@ -798,7 +803,7 @@ jobs:
|
||||
./get_binaries.sh
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_STAGING_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-prod-new:
|
||||
@@ -838,7 +843,7 @@ jobs:
|
||||
fi
|
||||
|
||||
ansible-galaxy collection install sivel.toiletwater
|
||||
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
|
||||
ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{secrets.NEON_PRODUCTION_API_KEY}}
|
||||
rm -f neon_install.tar.gz .neon_current_version
|
||||
|
||||
deploy-proxy:
|
||||
@@ -880,49 +885,8 @@ jobs:
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-staging:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, calculate-deploy-targets, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main' || github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
|
||||
env:
|
||||
KUBECONFIG: .kubeconfig
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Add curl
|
||||
run: apt update && apt install curl -y
|
||||
|
||||
- name: Store kubeconfig file
|
||||
run: |
|
||||
echo "${{ secrets[matrix.kubeconfig_secret] }}" | base64 --decode > ${KUBECONFIG}
|
||||
chmod 0600 ${KUBECONFIG}
|
||||
|
||||
- name: Setup helm v3
|
||||
run: |
|
||||
curl -s https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
DOCKER_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace ${{ matrix.storage_broker_ns }} --create-namespace --install -f .github/helm-values/${{ matrix.storage_broker_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }} neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-new:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
@@ -961,54 +925,19 @@ jobs:
|
||||
- name: Re-deploy scram proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
- name: Re-deploy link proxy
|
||||
if: matrix.deploy_link_proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
- name: Re-deploy legacy scram proxy
|
||||
if: matrix.deploy_legacy_scram_proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-dev-new:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'main') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: dev-us-east-2-beta
|
||||
- target_region: eu-west-1
|
||||
target_cluster: dev-eu-west-1-zeta
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
DOCKER_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
deploy-proxy-prod-new:
|
||||
runs-on: prod
|
||||
@@ -1026,8 +955,6 @@ jobs:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: prod-us-east-2-delta
|
||||
- target_region: us-west-2
|
||||
target_cluster: prod-us-west-2-eta
|
||||
- target_region: eu-central-1
|
||||
target_cluster: prod-eu-central-1-gamma
|
||||
- target_region: ap-southeast-1
|
||||
@@ -1047,46 +974,7 @@ jobs:
|
||||
- name: Re-deploy proxy
|
||||
run: |
|
||||
DOCKER_TAG=${{needs.tag.outputs.build-tag}}
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
|
||||
|
||||
deploy-storage-broker-prod-new:
|
||||
runs-on: prod
|
||||
container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
|
||||
# Compute image isn't strictly required for proxy deploy, but let's still wait for it to run all deploy jobs consistently.
|
||||
needs: [ push-docker-hub, tag, regress-tests ]
|
||||
if: |
|
||||
(github.ref_name == 'release') &&
|
||||
github.event_name != 'workflow_dispatch'
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- target_region: us-east-2
|
||||
target_cluster: prod-us-east-2-delta
|
||||
- target_region: us-west-2
|
||||
target_cluster: prod-us-west-2-eta
|
||||
- target_region: eu-central-1
|
||||
target_cluster: prod-eu-central-1-gamma
|
||||
- target_region: ap-southeast-1
|
||||
target_cluster: prod-ap-southeast-1-epsilon
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: true
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure environment
|
||||
run: |
|
||||
helm repo add neondatabase https://neondatabase.github.io/helm-charts
|
||||
aws --region ${{ matrix.target_region }} eks update-kubeconfig --name ${{ matrix.target_cluster }}
|
||||
|
||||
- name: Deploy storage-broker
|
||||
run:
|
||||
DOCKER_TAG=${{ needs.tag.outputs.build-tag }}
|
||||
helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --wait --timeout 15m0s
|
||||
|
||||
promote-compatibility-data:
|
||||
runs-on: [ self-hosted, dev, x64 ]
|
||||
|
||||
347
Cargo.lock
generated
347
Cargo.lock
generated
@@ -66,6 +66,15 @@ dependencies = [
|
||||
"backtrace",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "archery"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a8da9bc4c4053ee067669762bcaeea6e241841295a2b6c948312dad6ef4cc02"
|
||||
dependencies = [
|
||||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.7.2"
|
||||
@@ -85,7 +94,7 @@ dependencies = [
|
||||
"num-traits",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -190,7 +199,7 @@ dependencies = [
|
||||
"http",
|
||||
"hyper",
|
||||
"ring",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
"tokio",
|
||||
"tower",
|
||||
"tracing",
|
||||
@@ -331,7 +340,7 @@ dependencies = [
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
"ring",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
@@ -468,7 +477,7 @@ dependencies = [
|
||||
"itoa",
|
||||
"num-integer",
|
||||
"ryu",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -713,23 +722,20 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "chrono"
|
||||
version = "0.4.23"
|
||||
version = "0.4.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f"
|
||||
checksum = "bfd4d1b31faaa3a89d7934dbded3111da0d2ef28e3ebccdb4f0179f5929d1ef1"
|
||||
dependencies = [
|
||||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"time 0.1.44",
|
||||
"wasm-bindgen",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "chunked_transfer"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fff857943da45f546682664a79488be82e69e43c1a7a2307679ab9afb3a66d2e"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.0"
|
||||
@@ -1231,16 +1237,6 @@ dependencies = [
|
||||
"uuid 0.8.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "debugid"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"uuid 1.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "der-parser"
|
||||
version = "8.1.0"
|
||||
@@ -1410,21 +1406,6 @@ version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.1.0"
|
||||
@@ -1566,7 +1547,7 @@ checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1698,17 +1679,6 @@ dependencies = [
|
||||
"digest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hostname"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"match_cfg",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.8"
|
||||
@@ -1816,19 +1786,6 @@ dependencies = [
|
||||
"tokio-io-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"hyper",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iana-time-zone"
|
||||
version = "0.1.51"
|
||||
@@ -2054,12 +2011,6 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "match_cfg"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.1.0"
|
||||
@@ -2162,7 +2113,7 @@ checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"wasi 0.11.0+wasi-snapshot-preview1",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
@@ -2172,24 +2123,6 @@ version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
|
||||
|
||||
[[package]]
|
||||
name = "native-tls"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"log",
|
||||
"openssl",
|
||||
"openssl-probe",
|
||||
"openssl-sys",
|
||||
"schannel",
|
||||
"security-framework",
|
||||
"security-framework-sys",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nb"
|
||||
version = "0.1.3"
|
||||
@@ -2360,62 +2293,12 @@ version = "11.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
version = "0.10.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "020433887e44c27ff16365eaa2d380547a94544ad509aff6eb5b6e3e0b27b376"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cfg-if",
|
||||
"foreign-types",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
"openssl-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-macros"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openssl-probe"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
|
||||
|
||||
[[package]]
|
||||
name = "openssl-sys"
|
||||
version = "0.9.78"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "07d5c8cb6e57b3a3612064d7b18b117912b4ce70955c2504d4b741c9e244b132"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_info"
|
||||
version = "3.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4750134fb6a5d49afc80777394ad5d95b04bc12068c6abb92fae8f43817270f"
|
||||
dependencies = [
|
||||
"log",
|
||||
"serde",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "os_str_bytes"
|
||||
version = "6.3.0"
|
||||
@@ -2460,6 +2343,7 @@ dependencies = [
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"pageserver_api",
|
||||
"persistent_range_query",
|
||||
"pin-project-lite",
|
||||
"postgres",
|
||||
"postgres-protocol",
|
||||
@@ -2471,6 +2355,7 @@ dependencies = [
|
||||
"rand",
|
||||
"regex",
|
||||
"remote_storage",
|
||||
"rpds",
|
||||
"rstar",
|
||||
"scopeguard",
|
||||
"serde",
|
||||
@@ -2577,6 +2462,14 @@ version = "2.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "persistent_range_query"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"rand",
|
||||
"workspace_hack",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "petgraph"
|
||||
version = "0.6.2"
|
||||
@@ -2637,12 +2530,6 @@ version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.4"
|
||||
@@ -3117,7 +3004,7 @@ checksum = "ffbe84efe2f38dea12e9bfc1f65377fdf03e53a18cb3b995faedf7934c7e785b"
|
||||
dependencies = [
|
||||
"pem",
|
||||
"ring",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
"yasna",
|
||||
]
|
||||
|
||||
@@ -3205,12 +3092,10 @@ dependencies = [
|
||||
"http-body",
|
||||
"hyper",
|
||||
"hyper-rustls",
|
||||
"hyper-tls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
@@ -3220,7 +3105,6 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tokio-rustls",
|
||||
"tower-service",
|
||||
"url",
|
||||
@@ -3289,6 +3173,15 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rpds"
|
||||
version = "0.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "66262ea963eff99163e6b741fbc3417a52cc13074728c1047e9911789df9b000"
|
||||
dependencies = [
|
||||
"archery",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rstar"
|
||||
version = "0.9.3"
|
||||
@@ -3557,89 +3450,6 @@ version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
|
||||
|
||||
[[package]]
|
||||
name = "sentry"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c6425e2a14006415449fb0a3e9a119df5032f59e7a2d9350cf8738eca290dfc5"
|
||||
dependencies = [
|
||||
"httpdate",
|
||||
"native-tls",
|
||||
"reqwest",
|
||||
"sentry-backtrace",
|
||||
"sentry-contexts",
|
||||
"sentry-core",
|
||||
"sentry-panic",
|
||||
"tokio",
|
||||
"ureq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-backtrace"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "04d79c194e5c20fe602e81faf39f3cff0f275ec61283f437a892cfd6544da592"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-contexts"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1c2a57601eeb870521cc241caee27e57a012f297ece3c1b7eee87f2a531edb5"
|
||||
dependencies = [
|
||||
"hostname",
|
||||
"libc",
|
||||
"os_info",
|
||||
"rustc_version 0.4.0",
|
||||
"sentry-core",
|
||||
"uname",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-core"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8be90ea119c6d0664c8ab534013bc9e90355e7004d782d5d1492ca513393b929"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"rand",
|
||||
"sentry-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-panic"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ec217c3290e3f0d128154da731c28efa8f62cf8e3c3a006fd4bc3407c959176"
|
||||
dependencies = [
|
||||
"sentry-backtrace",
|
||||
"sentry-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sentry-types"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "67ad85f0addf16310a1fbcf3facc7acb17ef5dbf6ae059d2f3c38442a471404d"
|
||||
dependencies = [
|
||||
"debugid 0.8.0",
|
||||
"getrandom",
|
||||
"hex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
"time",
|
||||
"url",
|
||||
"uuid 1.2.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.145"
|
||||
@@ -3696,7 +3506,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with_macros",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3787,7 +3597,7 @@ dependencies = [
|
||||
"num-bigint",
|
||||
"num-traits",
|
||||
"thiserror",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3842,6 +3652,12 @@ version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||
|
||||
[[package]]
|
||||
name = "storage_broker"
|
||||
version = "0.1.0"
|
||||
@@ -3927,7 +3743,7 @@ version = "8.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540"
|
||||
dependencies = [
|
||||
"debugid 0.7.3",
|
||||
"debugid",
|
||||
"memmap2",
|
||||
"stable_deref_trait",
|
||||
"uuid 0.8.2",
|
||||
@@ -4049,6 +3865,17 @@ dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.1.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"wasi 0.10.0+wasi-snapshot-preview1",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.3.15"
|
||||
@@ -4134,16 +3961,6 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-native-tls"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
|
||||
dependencies = [
|
||||
"native-tls",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-postgres"
|
||||
version = "0.7.6"
|
||||
@@ -4476,15 +4293,6 @@ version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
|
||||
|
||||
[[package]]
|
||||
name = "uname"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.8"
|
||||
@@ -4524,20 +4332,6 @@ version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"
|
||||
|
||||
[[package]]
|
||||
name = "ureq"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b97acb4c28a254fd7a4aeec976c46a7fa404eac4d7c134b30c75144846d7cb8f"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"chunked_transfer",
|
||||
"log",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.3.1"
|
||||
@@ -4547,7 +4341,6 @@ dependencies = [
|
||||
"form_urlencoded",
|
||||
"idna",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4580,7 +4373,6 @@ dependencies = [
|
||||
"rustls",
|
||||
"rustls-pemfile",
|
||||
"rustls-split",
|
||||
"sentry",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_with",
|
||||
@@ -4624,12 +4416,6 @@ version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77439c1b53d2303b20d9459b1ade71a83c716e3f9c34f3228c00e6f185d6c002"
|
||||
|
||||
[[package]]
|
||||
name = "vcpkg"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
@@ -4687,6 +4473,12 @@ dependencies = [
|
||||
"try-lock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
@@ -4892,6 +4684,7 @@ dependencies = [
|
||||
"clap 4.0.15",
|
||||
"crossbeam-utils",
|
||||
"either",
|
||||
"fail",
|
||||
"futures-channel",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
@@ -4914,12 +4707,12 @@ dependencies = [
|
||||
"serde",
|
||||
"stable_deref_trait",
|
||||
"syn",
|
||||
"time 0.3.15",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4937,7 +4730,7 @@ dependencies = [
|
||||
"oid-registry",
|
||||
"rusticata-macros",
|
||||
"thiserror",
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4961,7 +4754,7 @@ version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "346d34a236c9d3e5f3b9b74563f238f955bbd05fa0b8b4efa53c130c43982f4c"
|
||||
dependencies = [
|
||||
"time",
|
||||
"time 0.3.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
29
README.md
29
README.md
@@ -2,20 +2,29 @@
|
||||
|
||||
Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
|
||||
|
||||
The project used to be called "Zenith". Many of the commands and code comments
|
||||
still refer to "zenith", but we are in the process of renaming things.
|
||||
|
||||
## Quick start
|
||||
Try the [Neon Free Tier](https://neon.tech/docs/introduction/technical-preview-free-tier/) to create a serverless Postgres instance. Then connect to it with your preferred Postgres client (psql, dbeaver, etc) or use the online [SQL Editor](https://neon.tech/docs/get-started-with-neon/query-with-neon-sql-editor/). See [Connect from any application](https://neon.tech/docs/connect/connect-from-any-app/) for connection instructions.
|
||||
[Join the waitlist](https://neon.tech/) for our free tier to receive your serverless postgres instance. Then connect to it with your preferred postgres client (psql, dbeaver, etc) or use the online SQL editor.
|
||||
|
||||
Alternatively, compile and run the project [locally](#running-local-installation).
|
||||
|
||||
## Architecture overview
|
||||
|
||||
A Neon installation consists of compute nodes and the Neon storage engine. Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
|
||||
A Neon installation consists of compute nodes and a Neon storage engine.
|
||||
|
||||
Compute nodes are stateless PostgreSQL nodes backed by the Neon storage engine.
|
||||
|
||||
The Neon storage engine consists of two major components:
|
||||
- Pageserver. Scalable storage backend for the compute nodes.
|
||||
- Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.
|
||||
- WAL service. The service receives WAL from the compute node and ensures that it is stored durably.
|
||||
|
||||
See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
|
||||
Pageserver consists of:
|
||||
- Repository - Neon storage implementation.
|
||||
- WAL receiver - service that receives WAL from WAL service and stores it in the repository.
|
||||
- Page service - service that communicates with compute nodes and responds with pages from the repository.
|
||||
- WAL redo - service that builds pages from base images and WAL records on Page service request
|
||||
|
||||
## Running local installation
|
||||
|
||||
@@ -213,27 +222,19 @@ Ensure your dependencies are installed as described [here](https://github.com/ne
|
||||
```sh
|
||||
git clone --recursive https://github.com/neondatabase/neon.git
|
||||
|
||||
make
|
||||
CARGO_BUILD_FLAGS="--features=testing" make
|
||||
|
||||
./scripts/pytest
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
|
||||
Now we use README files to cover design ideas and overall architecture for each module and `rustdoc` style documentation comments. See also [/docs/](/docs/) a top-level overview of all available markdown documentation.
|
||||
|
||||
- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
|
||||
|
||||
To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`
|
||||
|
||||
See also README files in some source directories, and `rustdoc` style documentation comments.
|
||||
|
||||
Other resources:
|
||||
|
||||
- [SELECT 'Hello, World'](https://neon.tech/blog/hello-world/): Blog post by Nikita Shamgunov on the high level architecture
|
||||
- [Architecture decisions in Neon](https://neon.tech/blog/architecture-decisions-in-neon/): Blog post by Heikki Linnakangas
|
||||
- [Neon: Serverless PostgreSQL!](https://www.youtube.com/watch?v=rES0yzeERns): Presentation on storage system by Heikki Linnakangas in the CMU Database Group seminar series
|
||||
|
||||
### Postgres-specific terms
|
||||
|
||||
Due to Neon's very close relation with PostgreSQL internals, numerous specific terms are used.
|
||||
|
||||
@@ -5,7 +5,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
chrono = { version = "0.4", default-features = false, features = ["clock"] }
|
||||
chrono = "0.4"
|
||||
clap = "4.0"
|
||||
env_logger = "0.9"
|
||||
futures = "0.3.13"
|
||||
|
||||
@@ -14,19 +14,17 @@
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::io::Write;
|
||||
use std::os::unix::prelude::AsRawFd;
|
||||
use std::os::unix::process::CommandExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::process::{Child, Command};
|
||||
use std::time::Duration;
|
||||
use std::{fs, io, thread};
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use nix::errno::Errno;
|
||||
use nix::fcntl::{FcntlArg, FdFlag};
|
||||
use nix::sys::signal::{kill, Signal};
|
||||
use nix::unistd::Pid;
|
||||
use utils::pid_file::{self, PidFileRead};
|
||||
|
||||
use utils::lock_file;
|
||||
|
||||
// These constants control the loop used to poll for process start / stop.
|
||||
//
|
||||
@@ -88,14 +86,6 @@ where
|
||||
let filled_cmd = fill_aws_secrets_vars(fill_rust_env_vars(background_command));
|
||||
filled_cmd.envs(envs);
|
||||
|
||||
let pid_file_to_check = match initial_pid_file {
|
||||
InitialPidFile::Create(path) => {
|
||||
pre_exec_create_pidfile(filled_cmd, path);
|
||||
path
|
||||
}
|
||||
InitialPidFile::Expect(path) => path,
|
||||
};
|
||||
|
||||
let mut spawned_process = filled_cmd.spawn().with_context(|| {
|
||||
format!("Could not spawn {process_name}, see console output and log files for details.")
|
||||
})?;
|
||||
@@ -105,8 +95,29 @@ where
|
||||
.with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
|
||||
);
|
||||
|
||||
let pid_file_to_check = match initial_pid_file {
|
||||
InitialPidFile::Create(target_pid_file_path) => {
|
||||
match lock_file::create_lock_file(target_pid_file_path, pid.to_string()) {
|
||||
lock_file::LockCreationResult::Created { .. } => {
|
||||
// We use "lock" file here only to create the pid file. The lock on the pidfile will be dropped as soon
|
||||
// as this CLI invocation exits, so it's a bit useless, but doesn't any harm either.
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked { .. } => {
|
||||
anyhow::bail!("Cannot write pid file for {process_name} at path {target_pid_file_path:?}: file is already locked by another process")
|
||||
}
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!(
|
||||
"Failed to create pid file for {process_name} at path {target_pid_file_path:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
InitialPidFile::Expect(pid_file_path) => Some(pid_file_path),
|
||||
};
|
||||
|
||||
for retries in 0..RETRIES {
|
||||
match process_started(pid, Some(pid_file_to_check), &process_status_check) {
|
||||
match process_started(pid, pid_file_to_check, &process_status_check) {
|
||||
Ok(true) => {
|
||||
println!("\n{process_name} started, pid: {pid}");
|
||||
return Ok(spawned_process);
|
||||
@@ -154,27 +165,12 @@ pub fn send_stop_child_process(child: &std::process::Child) -> anyhow::Result<()
|
||||
|
||||
/// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
|
||||
pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
|
||||
let pid = match pid_file::read(pid_file)
|
||||
.with_context(|| format!("read pid_file {pid_file:?}"))?
|
||||
{
|
||||
PidFileRead::NotExist => {
|
||||
println!("{process_name} is already stopped: no pid file present at {pid_file:?}");
|
||||
return Ok(());
|
||||
}
|
||||
PidFileRead::NotHeldByAnyProcess(_) => {
|
||||
// Don't try to kill according to file contents beacuse the pid might have been re-used by another process.
|
||||
// Don't delete the file either, it can race with new pid file creation.
|
||||
// Read `pid_file` module comment for details.
|
||||
println!(
|
||||
"No process is holding the pidfile. The process must have already exited. Leave in place to avoid race conditions: {pid_file:?}"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
PidFileRead::LockedByOtherProcess(pid) => pid,
|
||||
};
|
||||
// XXX the pid could become invalid (and recycled) at any time before the kill() below.
|
||||
if !pid_file.exists() {
|
||||
println!("{process_name} is already stopped: no pid file {pid_file:?} is present");
|
||||
return Ok(());
|
||||
}
|
||||
let pid = read_pidfile(pid_file)?;
|
||||
|
||||
// send signal
|
||||
let sig = if immediate {
|
||||
print!("Stopping {process_name} with pid {pid} immediately..");
|
||||
Signal::SIGQUIT
|
||||
@@ -186,9 +182,8 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
|
||||
match kill(pid, sig) {
|
||||
Ok(()) => (),
|
||||
Err(Errno::ESRCH) => {
|
||||
// Again, don't delete the pid file. The unlink can race with a new pid file being created.
|
||||
println!(
|
||||
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found. Likely the pid got recycled. Lucky we didn't harm anyone."
|
||||
"{process_name} with pid {pid} does not exist, but a pid file {pid_file:?} was found"
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
@@ -257,69 +252,6 @@ fn fill_aws_secrets_vars(mut cmd: &mut Command) -> &mut Command {
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
|
||||
/// 1. Claims a pidfile with a fcntl lock on it and
|
||||
/// 2. Sets up the pidfile's file descriptor so that it (and the lock)
|
||||
/// will remain held until the cmd exits.
|
||||
fn pre_exec_create_pidfile<P>(cmd: &mut Command, path: P) -> &mut Command
|
||||
where
|
||||
P: Into<PathBuf>,
|
||||
{
|
||||
let path: PathBuf = path.into();
|
||||
// SAFETY
|
||||
// pre_exec is marked unsafe because it runs between fork and exec.
|
||||
// Why is that dangerous in various ways?
|
||||
// Long answer: https://github.com/rust-lang/rust/issues/39575
|
||||
// Short answer: in a multi-threaded program, other threads may have
|
||||
// been inside of critical sections at the time of fork. In the
|
||||
// original process, that was allright, assuming they protected
|
||||
// the critical sections appropriately, e.g., through locks.
|
||||
// Fork adds another process to the mix that
|
||||
// 1. Has a single thread T
|
||||
// 2. In an exact copy of the address space at the time of fork.
|
||||
// A variety of problems scan occur now:
|
||||
// 1. T tries to grab a lock that was locked at the time of fork.
|
||||
// It will wait forever since in its address space, the lock
|
||||
// is in state 'taken' but the thread that would unlock it is
|
||||
// not there.
|
||||
// 2. A rust object that represented some external resource in the
|
||||
// parent now got implicitly copied by the the fork, even though
|
||||
// the object's type is not `Copy`. The parent program may use
|
||||
// non-copyability as way to enforce unique ownership of an
|
||||
// external resource in the typesystem. The fork breaks that
|
||||
// assumption, as now both parent and child process have an
|
||||
// owned instance of the object that represents the same
|
||||
// underlying resource.
|
||||
// While these seem like niche problems, (1) in particular is
|
||||
// highly relevant. For example, `malloc()` may grab a mutex internally,
|
||||
// and so, if we forked while another thread was mallocing' and our
|
||||
// pre_exec closure allocates as well, it will block on the malloc
|
||||
// mutex forever
|
||||
//
|
||||
// The proper solution is to only use C library functions that are marked
|
||||
// "async-signal-safe": https://man7.org/linux/man-pages/man7/signal-safety.7.html
|
||||
//
|
||||
// With this specific pre_exec() closure, the non-error path doesn't allocate.
|
||||
// The error path uses `anyhow`, and hence does allocate.
|
||||
// We take our chances there, hoping that any potential disaster is constrained
|
||||
// to the child process (e.g., malloc has no state ourside of the child process).
|
||||
// Last, `expect` prints to stderr, and stdio is not async-signal-safe.
|
||||
// Again, we take our chances, making the same assumptions as for malloc.
|
||||
unsafe {
|
||||
cmd.pre_exec(move || {
|
||||
let file = pid_file::claim_for_current_process(&path).expect("claim pid file");
|
||||
// Remove the FD_CLOEXEC flag on the pidfile descriptor so that the pidfile
|
||||
// remains locked after exec.
|
||||
nix::fcntl::fcntl(file.as_raw_fd(), FcntlArg::F_SETFD(FdFlag::empty()))
|
||||
.expect("remove FD_CLOEXEC");
|
||||
// Don't run drop(file), it would close the file before we actually exec.
|
||||
std::mem::forget(file);
|
||||
Ok(())
|
||||
});
|
||||
}
|
||||
cmd
|
||||
}
|
||||
|
||||
fn process_started<F>(
|
||||
pid: Pid,
|
||||
pid_file_to_check: Option<&Path>,
|
||||
@@ -330,11 +262,14 @@ where
|
||||
{
|
||||
match status_check() {
|
||||
Ok(true) => match pid_file_to_check {
|
||||
Some(pid_file_path) => match pid_file::read(pid_file_path)? {
|
||||
PidFileRead::NotExist => Ok(false),
|
||||
PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
|
||||
PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
|
||||
},
|
||||
Some(pid_file_path) => {
|
||||
if pid_file_path.exists() {
|
||||
let pid_in_file = read_pidfile(pid_file_path)?;
|
||||
Ok(pid_in_file == pid)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
None => Ok(true),
|
||||
},
|
||||
Ok(false) => Ok(false),
|
||||
@@ -342,6 +277,21 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a PID file
|
||||
///
|
||||
/// We expect a file that contains a single integer.
|
||||
fn read_pidfile(pidfile: &Path) -> Result<Pid> {
|
||||
let pid_str = fs::read_to_string(pidfile)
|
||||
.with_context(|| format!("failed to read pidfile {pidfile:?}"))?;
|
||||
let pid: i32 = pid_str
|
||||
.parse()
|
||||
.map_err(|_| anyhow!("failed to parse pidfile {pidfile:?}"))?;
|
||||
if pid < 1 {
|
||||
bail!("pidfile {pidfile:?} contained bad value '{pid}'");
|
||||
}
|
||||
Ok(Pid::from_raw(pid))
|
||||
}
|
||||
|
||||
fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
|
||||
match kill(pid, None) {
|
||||
// Process exists, keep waiting
|
||||
|
||||
@@ -324,7 +324,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
|
||||
pg_version,
|
||||
)
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("pageserver init failed: {e:?}");
|
||||
eprintln!("pageserver init failed: {e}");
|
||||
exit(1);
|
||||
});
|
||||
|
||||
|
||||
@@ -156,8 +156,6 @@ pub struct PageServerConf {
|
||||
|
||||
// jwt auth token used for communication with pageserver
|
||||
pub auth_token: String,
|
||||
|
||||
pub testing_mode: bool,
|
||||
}
|
||||
|
||||
impl Default for PageServerConf {
|
||||
@@ -168,7 +166,6 @@ impl Default for PageServerConf {
|
||||
listen_http_addr: String::new(),
|
||||
auth_type: AuthType::Trust,
|
||||
auth_token: String::new(),
|
||||
testing_mode: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,9 +141,6 @@ impl PageServerNode {
|
||||
init_config_overrides.push(&listen_http_addr_param);
|
||||
init_config_overrides.push(&listen_pg_addr_param);
|
||||
init_config_overrides.push(&broker_endpoints_param);
|
||||
if self.env.pageserver.testing_mode {
|
||||
init_config_overrides.push("testing_mode=true");
|
||||
}
|
||||
|
||||
if let Some(broker_etcd_prefix_param) = broker_etcd_prefix_param.as_deref() {
|
||||
init_config_overrides.push(broker_etcd_prefix_param);
|
||||
|
||||
@@ -45,9 +45,9 @@ and create new databases and accounts (control plane API in our case).
|
||||
|
||||
Integration tests, written in Python using the `pytest` framework.
|
||||
|
||||
`/vendor/postgres-v14` and `/vendor/postgres-v15`:
|
||||
`/vendor/postgres-v14`:
|
||||
|
||||
PostgreSQL source tree per version, with the modifications needed for Neon.
|
||||
PostgreSQL source tree, with the modifications needed for Neon.
|
||||
|
||||
`/pgxn/neon`:
|
||||
|
||||
|
||||
@@ -201,6 +201,8 @@ pub struct TimelineInfo {
|
||||
pub last_received_msg_ts: Option<u128>,
|
||||
pub pg_version: u32,
|
||||
|
||||
pub awaits_download: bool,
|
||||
|
||||
pub state: TimelineState,
|
||||
|
||||
// Some of the above fields are duplicated in 'local' and 'remote', for backwards-
|
||||
|
||||
12
libs/persistent_range_query/Cargo.toml
Normal file
12
libs/persistent_range_query/Cargo.toml
Normal file
@@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "persistent_range_query"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
workspace_hack = { version = "0.1", path = "../../workspace_hack" }
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.3"
|
||||
78
libs/persistent_range_query/src/lib.rs
Normal file
78
libs/persistent_range_query/src/lib.rs
Normal file
@@ -0,0 +1,78 @@
|
||||
use std::ops::Range;
|
||||
|
||||
pub mod naive;
|
||||
pub mod ops;
|
||||
pub mod segment_tree;
|
||||
|
||||
/// Should be a monoid:
|
||||
/// * Identity element: for all a: combine(new_for_empty_range(), a) = combine(a, new_for_empty_range()) = a
|
||||
/// * Associativity: for all a, b, c: combine(combine(a, b), c) == combine(a, combine(b, c))
|
||||
pub trait RangeQueryResult<Key>: Sized + Clone {
|
||||
// Clone is equivalent to combine with an empty range.
|
||||
|
||||
fn new_for_empty_range() -> Self;
|
||||
|
||||
// Contract: left_range.end == right_range.start
|
||||
// left_range.start == left_range.end == right_range.start == right_range.end is still possible
|
||||
fn combine(
|
||||
left: &Self,
|
||||
left_range: &Range<Key>,
|
||||
right: &Self,
|
||||
right_range: &Range<Key>,
|
||||
) -> Self;
|
||||
|
||||
fn add(left: &mut Self, left_range: &Range<Key>, right: &Self, right_range: &Range<Key>);
|
||||
}
|
||||
|
||||
pub trait LazyRangeInitializer<Result: RangeQueryResult<Key>, Key> {
|
||||
fn get(&self, range: &Range<Key>) -> Result;
|
||||
}
|
||||
|
||||
/// Should be a monoid:
|
||||
/// * Identity element: for all op: compose(no_op(), op) == compose(op, no_op()) == op
|
||||
/// * Associativity: for all op_1, op_2, op_3: compose(compose(op_1, op_2), op_3) == compose(op_1, compose(op_2, op_3))
|
||||
///
|
||||
/// Should left act on Result:
|
||||
/// * Identity operation: for all r: no_op().apply(r) == r
|
||||
/// * Compatibility: for all op_1, op_2, r: op_1.apply(op_2.apply(r)) == compose(op_1, op_2).apply(r)
|
||||
pub trait RangeModification<Key> {
|
||||
type Result: RangeQueryResult<Key>;
|
||||
|
||||
fn no_op() -> Self;
|
||||
fn is_no_op(&self) -> bool;
|
||||
fn is_reinitialization(&self) -> bool;
|
||||
fn apply(&self, result: &mut Self::Result, range: &Range<Key>);
|
||||
fn compose(later: &Self, earlier: &mut Self);
|
||||
}
|
||||
|
||||
pub trait VecReadableVersion<Modification: RangeModification<Key>, Key> {
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result;
|
||||
}
|
||||
|
||||
// TODO: use trait alias when stabilized
|
||||
pub trait VecFrozenVersion<Modification: RangeModification<Key>, Key>:
|
||||
Clone + VecReadableVersion<Modification, Key>
|
||||
{
|
||||
}
|
||||
|
||||
impl<
|
||||
T: Clone + VecReadableVersion<Modification, Key>,
|
||||
Modification: RangeModification<Key>,
|
||||
Key,
|
||||
> VecFrozenVersion<Modification, Key> for T
|
||||
{
|
||||
}
|
||||
|
||||
pub trait PersistentVecStorage<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key,
|
||||
>: VecReadableVersion<Modification, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self;
|
||||
|
||||
type FrozenVersion: VecFrozenVersion<Modification, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification);
|
||||
fn freeze(&mut self) -> Self::FrozenVersion;
|
||||
}
|
||||
115
libs/persistent_range_query/src/naive.rs
Normal file
115
libs/persistent_range_query/src/naive.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
use crate::{
|
||||
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||
VecReadableVersion,
|
||||
};
|
||||
use std::marker::PhantomData;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub struct NaiveFrozenVersion<Modification: RangeModification<Key>, Key> {
|
||||
all_keys: Range<Key>,
|
||||
values: Rc<Box<Vec<Modification::Result>>>,
|
||||
}
|
||||
|
||||
pub trait IndexableKey: Clone {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize;
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self>;
|
||||
}
|
||||
|
||||
fn get<Modification: RangeModification<Key>, Key: IndexableKey>(
|
||||
all_keys: &Range<Key>,
|
||||
values: &Vec<Modification::Result>,
|
||||
keys: &Range<Key>,
|
||||
) -> Modification::Result {
|
||||
let mut result = Modification::Result::new_for_empty_range();
|
||||
let mut result_range = keys.start.clone()..keys.start.clone();
|
||||
for index in
|
||||
IndexableKey::index(&all_keys, &keys.start)..IndexableKey::index(&all_keys, &keys.end)
|
||||
{
|
||||
let element_range = IndexableKey::element_range(&all_keys, index);
|
||||
Modification::Result::add(&mut result, &result_range, &values[index], &element_range);
|
||||
result_range.end = element_range.end;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
impl<Modification: RangeModification<Key>, Key: IndexableKey> VecReadableVersion<Modification, Key>
|
||||
for NaiveFrozenVersion<Modification, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get::<Modification, Key>(&self.all_keys, &self.values, keys)
|
||||
}
|
||||
}
|
||||
|
||||
// Manual implementation of `Clone` becase `derive` requires `Modification: Clone`
|
||||
impl<Modification: RangeModification<Key>, Key: Clone> Clone
|
||||
for NaiveFrozenVersion<Modification, Key>
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
all_keys: self.all_keys.clone(),
|
||||
values: self.values.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: is it at all possible to store previous versions in this struct,
|
||||
// without any Rc<>?
|
||||
pub struct NaiveVecStorage<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> {
|
||||
all_keys: Range<Key>,
|
||||
last_version: Vec<Modification::Result>,
|
||||
_initializer: PhantomData<Initializer>,
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> VecReadableVersion<Modification, Key> for NaiveVecStorage<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get::<Modification, Key>(&self.all_keys, &self.last_version, keys)
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: IndexableKey,
|
||||
> PersistentVecStorage<Modification, Initializer, Key>
|
||||
for NaiveVecStorage<Modification, Initializer, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||
let mut values = Vec::with_capacity(IndexableKey::index(&all_keys, &all_keys.end));
|
||||
for index in 0..values.capacity() {
|
||||
values.push(initializer.get(&IndexableKey::element_range(&all_keys, index)));
|
||||
}
|
||||
NaiveVecStorage {
|
||||
all_keys,
|
||||
last_version: values,
|
||||
_initializer: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
type FrozenVersion = NaiveFrozenVersion<Modification, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||
for index in IndexableKey::index(&self.all_keys, &keys.start)
|
||||
..IndexableKey::index(&self.all_keys, &keys.end)
|
||||
{
|
||||
let element_range = IndexableKey::element_range(&self.all_keys, index);
|
||||
modification.apply(&mut self.last_version[index], &element_range);
|
||||
}
|
||||
}
|
||||
|
||||
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||
NaiveFrozenVersion::<Modification, Key> {
|
||||
all_keys: self.all_keys.clone(),
|
||||
values: Rc::new(Box::new(self.last_version.clone())),
|
||||
}
|
||||
}
|
||||
}
|
||||
14
libs/persistent_range_query/src/ops/mod.rs
Normal file
14
libs/persistent_range_query/src/ops/mod.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
pub mod rsq;
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct SameElementsInitializer<T> {
|
||||
initial_element_value: T,
|
||||
}
|
||||
|
||||
impl<T> SameElementsInitializer<T> {
|
||||
pub fn new(initial_element_value: T) -> Self {
|
||||
SameElementsInitializer {
|
||||
initial_element_value,
|
||||
}
|
||||
}
|
||||
}
|
||||
118
libs/persistent_range_query/src/ops/rsq.rs
Normal file
118
libs/persistent_range_query/src/ops/rsq.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
//! # Range Sum Query
|
||||
|
||||
use crate::ops::SameElementsInitializer;
|
||||
use crate::{LazyRangeInitializer, RangeModification, RangeQueryResult};
|
||||
use std::borrow::Borrow;
|
||||
use std::ops::{Add, AddAssign, Range};
|
||||
|
||||
// TODO: commutative Add
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct SumResult<T> {
|
||||
sum: T,
|
||||
}
|
||||
|
||||
impl<T> SumResult<T> {
|
||||
pub fn sum(&self) -> &T {
|
||||
&self.sum
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Clone + for<'a> AddAssign<&'a T> + From<u8>, Key> RangeQueryResult<Key> for SumResult<T>
|
||||
where
|
||||
for<'a> &'a T: Add<&'a T, Output = T>,
|
||||
{
|
||||
fn new_for_empty_range() -> Self {
|
||||
SumResult { sum: 0.into() }
|
||||
}
|
||||
|
||||
fn combine(
|
||||
left: &Self,
|
||||
_left_range: &Range<Key>,
|
||||
right: &Self,
|
||||
_right_range: &Range<Key>,
|
||||
) -> Self {
|
||||
SumResult {
|
||||
sum: &left.sum + &right.sum,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(left: &mut Self, _left_range: &Range<Key>, right: &Self, _right_range: &Range<Key>) {
|
||||
left.sum += &right.sum
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SumOfSameElements<Key> {
|
||||
fn sum(initial_element_value: &Self, keys: &Range<Key>) -> Self;
|
||||
}
|
||||
|
||||
impl<T: SumOfSameElements<Key>, TB: Borrow<T>, Key> LazyRangeInitializer<SumResult<T>, Key>
|
||||
for SameElementsInitializer<TB>
|
||||
where
|
||||
SumResult<T>: RangeQueryResult<Key>,
|
||||
{
|
||||
fn get(&self, range: &Range<Key>) -> SumResult<T> {
|
||||
SumResult {
|
||||
sum: SumOfSameElements::sum(self.initial_element_value.borrow(), range),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub enum AddAssignModification<T> {
|
||||
None,
|
||||
Add(T),
|
||||
Assign(T),
|
||||
}
|
||||
|
||||
impl<T: Clone + for<'a> AddAssign<&'a T>, Key> RangeModification<Key> for AddAssignModification<T>
|
||||
where
|
||||
SumResult<T>: RangeQueryResult<Key>,
|
||||
for<'a> SameElementsInitializer<&'a T>: LazyRangeInitializer<SumResult<T>, Key>,
|
||||
{
|
||||
type Result = SumResult<T>;
|
||||
|
||||
fn no_op() -> Self {
|
||||
AddAssignModification::None
|
||||
}
|
||||
|
||||
fn is_no_op(&self) -> bool {
|
||||
match self {
|
||||
AddAssignModification::None => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_reinitialization(&self) -> bool {
|
||||
match self {
|
||||
AddAssignModification::Assign(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn apply(&self, result: &mut SumResult<T>, range: &Range<Key>) {
|
||||
use AddAssignModification::*;
|
||||
match self {
|
||||
None => {}
|
||||
Add(x) | Assign(x) => {
|
||||
let to_add = SameElementsInitializer::new(x).get(range).sum;
|
||||
if let Assign(_) = self {
|
||||
result.sum = to_add;
|
||||
} else {
|
||||
result.sum += &to_add;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compose(later: &Self, earlier: &mut Self) {
|
||||
use AddAssignModification::*;
|
||||
match (later, earlier) {
|
||||
(_, e @ None) => *e = later.clone(),
|
||||
(None, _) => {}
|
||||
(Assign(_), e) => *e = later.clone(),
|
||||
(Add(x), Add(y)) => *y += x,
|
||||
(Add(x), Assign(value)) => *value += x,
|
||||
}
|
||||
}
|
||||
}
|
||||
255
libs/persistent_range_query/src/segment_tree.rs
Normal file
255
libs/persistent_range_query/src/segment_tree.rs
Normal file
@@ -0,0 +1,255 @@
|
||||
//! # Segment Tree
|
||||
//! It is a competitive programming folklore data structure. Do not confuse with the interval tree.
|
||||
|
||||
use crate::{LazyRangeInitializer, PersistentVecStorage, RangeQueryResult, VecReadableVersion};
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
pub trait MidpointableKey: Clone + Ord + Sized {
|
||||
fn midpoint(range: &Range<Self>) -> Self;
|
||||
}
|
||||
|
||||
pub trait RangeModification<Key>: Clone + crate::RangeModification<Key> {}
|
||||
|
||||
// TODO: use trait alias when stabilized
|
||||
impl<T: Clone + crate::RangeModification<Key>, Key> RangeModification<Key> for T {}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Node<Modification: RangeModification<Key>, Key> {
|
||||
result: Modification::Result,
|
||||
modify_children: Modification,
|
||||
left: Option<Rc<Self>>,
|
||||
right: Option<Rc<Self>>,
|
||||
}
|
||||
|
||||
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||
impl<Modification: RangeModification<Key>, Key> Clone for Node<Modification, Key> {
|
||||
fn clone(&self) -> Self {
|
||||
Node {
|
||||
result: self.result.clone(),
|
||||
modify_children: self.modify_children.clone(),
|
||||
left: self.left.clone(),
|
||||
right: self.right.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Modification: RangeModification<Key>, Key> Node<Modification, Key> {
|
||||
fn new<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||
range: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
) -> Self {
|
||||
Node {
|
||||
result: initializer.get(range),
|
||||
modify_children: Modification::no_op(),
|
||||
left: None,
|
||||
right: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply(&mut self, modification: &Modification, range: &Range<Key>) {
|
||||
modification.apply(&mut self.result, range);
|
||||
Modification::compose(modification, &mut self.modify_children);
|
||||
if self.modify_children.is_reinitialization() {
|
||||
self.left = None;
|
||||
self.right = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn force_children<Initializer: LazyRangeInitializer<Modification::Result, Key>>(
|
||||
&mut self,
|
||||
initializer: &Initializer,
|
||||
range_left: &Range<Key>,
|
||||
range_right: &Range<Key>,
|
||||
) {
|
||||
let left = Rc::make_mut(
|
||||
self.left
|
||||
.get_or_insert_with(|| Rc::new(Node::new(&range_left, initializer))),
|
||||
);
|
||||
let right = Rc::make_mut(
|
||||
self.right
|
||||
.get_or_insert_with(|| Rc::new(Node::new(&range_right, initializer))),
|
||||
);
|
||||
left.apply(&self.modify_children, &range_left);
|
||||
right.apply(&self.modify_children, &range_right);
|
||||
self.modify_children = Modification::no_op();
|
||||
}
|
||||
|
||||
pub fn recalculate_from_children(&mut self, range_left: &Range<Key>, range_right: &Range<Key>) {
|
||||
assert!(self.modify_children.is_no_op());
|
||||
assert!(self.left.is_some());
|
||||
assert!(self.right.is_some());
|
||||
self.result = Modification::Result::combine(
|
||||
&self.left.as_ref().unwrap().result,
|
||||
&range_left,
|
||||
&self.right.as_ref().unwrap().result,
|
||||
&range_right,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn split_range<Key: MidpointableKey>(range: &Range<Key>) -> (Range<Key>, Range<Key>) {
|
||||
let range_left = range.start.clone()..MidpointableKey::midpoint(range);
|
||||
let range_right = range_left.end.clone()..range.end.clone();
|
||||
(range_left, range_right)
|
||||
}
|
||||
|
||||
pub struct PersistentSegmentTreeVersion<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: Clone,
|
||||
> {
|
||||
root: Rc<Node<Modification, Key>>,
|
||||
all_keys: Range<Key>,
|
||||
initializer: Rc<Initializer>,
|
||||
}
|
||||
|
||||
// Manual implementation because we don't need `Key: Clone` for this, unlike with `derive`.
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: Clone,
|
||||
> Clone for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
root: self.root.clone(),
|
||||
all_keys: self.all_keys.clone(),
|
||||
initializer: self.initializer.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(
|
||||
node: &mut Rc<Node<Modification, Key>>,
|
||||
node_keys: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
keys: &Range<Key>,
|
||||
) -> Modification::Result {
|
||||
if node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||
return Modification::Result::new_for_empty_range();
|
||||
}
|
||||
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||
return node.result.clone();
|
||||
}
|
||||
let node = Rc::make_mut(node);
|
||||
let (left_keys, right_keys) = split_range(node_keys);
|
||||
node.force_children(initializer, &left_keys, &right_keys);
|
||||
let mut result = get(node.left.as_mut().unwrap(), &left_keys, initializer, keys);
|
||||
Modification::Result::add(
|
||||
&mut result,
|
||||
&left_keys,
|
||||
&get(node.right.as_mut().unwrap(), &right_keys, initializer, keys),
|
||||
&right_keys,
|
||||
);
|
||||
result
|
||||
}
|
||||
|
||||
fn modify<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(
|
||||
node: &mut Rc<Node<Modification, Key>>,
|
||||
node_keys: &Range<Key>,
|
||||
initializer: &Initializer,
|
||||
keys: &Range<Key>,
|
||||
modification: &Modification,
|
||||
) {
|
||||
if modification.is_no_op() || node_keys.end <= keys.start || keys.end <= node_keys.start {
|
||||
return;
|
||||
}
|
||||
let node = Rc::make_mut(node);
|
||||
if keys.start <= node_keys.start && node_keys.end <= keys.end {
|
||||
node.apply(modification, node_keys);
|
||||
return;
|
||||
}
|
||||
let (left_keys, right_keys) = split_range(node_keys);
|
||||
node.force_children(initializer, &left_keys, &right_keys);
|
||||
modify(
|
||||
node.left.as_mut().unwrap(),
|
||||
&left_keys,
|
||||
initializer,
|
||||
keys,
|
||||
&modification,
|
||||
);
|
||||
modify(
|
||||
node.right.as_mut().unwrap(),
|
||||
&right_keys,
|
||||
initializer,
|
||||
keys,
|
||||
&modification,
|
||||
);
|
||||
node.recalculate_from_children(&left_keys, &right_keys);
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> VecReadableVersion<Modification, Key>
|
||||
for PersistentSegmentTreeVersion<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
get(
|
||||
&mut self.root.clone(), // TODO: do not always force a branch
|
||||
&self.all_keys,
|
||||
self.initializer.as_ref(),
|
||||
keys,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PersistentSegmentTree<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
>(PersistentSegmentTreeVersion<Modification, Initializer, Key>);
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> VecReadableVersion<Modification, Key>
|
||||
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||
{
|
||||
fn get(&self, keys: &Range<Key>) -> Modification::Result {
|
||||
self.0.get(keys)
|
||||
}
|
||||
}
|
||||
|
||||
impl<
|
||||
Modification: RangeModification<Key>,
|
||||
Initializer: LazyRangeInitializer<Modification::Result, Key>,
|
||||
Key: MidpointableKey,
|
||||
> PersistentVecStorage<Modification, Initializer, Key>
|
||||
for PersistentSegmentTree<Modification, Initializer, Key>
|
||||
{
|
||||
fn new(all_keys: Range<Key>, initializer: Initializer) -> Self {
|
||||
PersistentSegmentTree(PersistentSegmentTreeVersion {
|
||||
root: Rc::new(Node::new(&all_keys, &initializer)),
|
||||
all_keys: all_keys,
|
||||
initializer: Rc::new(initializer),
|
||||
})
|
||||
}
|
||||
|
||||
type FrozenVersion = PersistentSegmentTreeVersion<Modification, Initializer, Key>;
|
||||
|
||||
fn modify(&mut self, keys: &Range<Key>, modification: &Modification) {
|
||||
modify(
|
||||
&mut self.0.root, // TODO: do not always force a branch
|
||||
&self.0.all_keys,
|
||||
self.0.initializer.as_ref(),
|
||||
keys,
|
||||
modification,
|
||||
)
|
||||
}
|
||||
|
||||
fn freeze(&mut self) -> Self::FrozenVersion {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
||||
295
libs/persistent_range_query/tests/layer_map_test.rs
Normal file
295
libs/persistent_range_query/tests/layer_map_test.rs
Normal file
@@ -0,0 +1,295 @@
|
||||
use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
|
||||
use persistent_range_query::ops::SameElementsInitializer;
|
||||
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||
use persistent_range_query::{
|
||||
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||
VecReadableVersion,
|
||||
};
|
||||
use std::cmp::Ordering;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||
struct PageIndex(u32);
|
||||
type LayerId = String;
|
||||
|
||||
impl IndexableKey for PageIndex {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||
}
|
||||
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||
PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl MidpointableKey for PageIndex {
|
||||
fn midpoint(range: &Range<Self>) -> Self {
|
||||
PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct LayerMapInformation {
|
||||
// Only make sense for a range of length 1.
|
||||
last_layer: Option<LayerId>,
|
||||
last_image_layer: Option<LayerId>,
|
||||
// Work for all ranges
|
||||
max_delta_layers: (usize, Range<PageIndex>),
|
||||
}
|
||||
|
||||
impl LayerMapInformation {
|
||||
fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
|
||||
(&self.last_layer, &self.last_image_layer)
|
||||
}
|
||||
|
||||
fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
|
||||
&self.max_delta_layers
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
|
||||
if left.is_empty() {
|
||||
right.clone()
|
||||
} else if right.is_empty() {
|
||||
left.clone()
|
||||
} else if left.end == right.start {
|
||||
left.start..right.end
|
||||
} else {
|
||||
left.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeQueryResult<PageIndex> for LayerMapInformation {
|
||||
fn new_for_empty_range() -> Self {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn combine(
|
||||
left: &Self,
|
||||
_left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
_right_range: &Range<PageIndex>,
|
||||
) -> Self {
|
||||
// Note that either range may be empty.
|
||||
LayerMapInformation {
|
||||
last_layer: left
|
||||
.last_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_layer.as_ref())
|
||||
.cloned(),
|
||||
last_image_layer: left
|
||||
.last_image_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_image_layer.as_ref())
|
||||
.cloned(),
|
||||
max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
|
||||
Ordering::Less => right.max_delta_layers.clone(),
|
||||
Ordering::Greater => left.max_delta_layers.clone(),
|
||||
Ordering::Equal => (
|
||||
left.max_delta_layers.0,
|
||||
merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn add(
|
||||
left: &mut Self,
|
||||
left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
right_range: &Range<PageIndex>,
|
||||
) {
|
||||
*left = Self::combine(&left, left_range, right, right_range);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct AddDeltaLayers {
|
||||
last_layer: LayerId,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LayerMapModification {
|
||||
add_image_layer: Option<LayerId>,
|
||||
add_delta_layers: Option<AddDeltaLayers>,
|
||||
}
|
||||
|
||||
impl LayerMapModification {
|
||||
fn add_image_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: Some(layer.into()),
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: Some(AddDeltaLayers {
|
||||
last_layer: layer.into(),
|
||||
count: 1,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeModification<PageIndex> for LayerMapModification {
|
||||
type Result = LayerMapInformation;
|
||||
|
||||
fn no_op() -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_no_op(&self) -> bool {
|
||||
self.add_image_layer.is_none() && self.add_delta_layers.is_none()
|
||||
}
|
||||
|
||||
fn is_reinitialization(&self) -> bool {
|
||||
self.add_image_layer.is_some()
|
||||
}
|
||||
|
||||
fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
|
||||
if let Some(layer) = &self.add_image_layer {
|
||||
result.last_layer = Some(layer.clone());
|
||||
result.last_image_layer = Some(layer.clone());
|
||||
result.max_delta_layers = (0, range.clone());
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
|
||||
result.last_layer = Some(last_layer.clone());
|
||||
result.max_delta_layers.0 += count;
|
||||
}
|
||||
}
|
||||
|
||||
fn compose(later: &Self, earlier: &mut Self) {
|
||||
if later.add_image_layer.is_some() {
|
||||
*earlier = later.clone();
|
||||
return;
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
|
||||
let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
|
||||
last_layer: LayerId::default(),
|
||||
count: 0,
|
||||
});
|
||||
res.last_layer = last_layer.clone();
|
||||
res.count += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
|
||||
fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, range.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn test_layer_map<
|
||||
S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
|
||||
>() {
|
||||
let mut s = S::new(
|
||||
PageIndex(0)..PageIndex(100),
|
||||
SameElementsInitializer::new(()),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(0)..PageIndex(70)),
|
||||
&LayerMapModification::add_image_layer("Img0..70"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(50)..PageIndex(100)),
|
||||
&LayerMapModification::add_image_layer("Img50..100"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(10)..PageIndex(60)),
|
||||
&LayerMapModification::add_delta_layer("Delta10..60"),
|
||||
);
|
||||
let s_before_last_delta = s.freeze();
|
||||
s.modify(
|
||||
&(PageIndex(20)..PageIndex(80)),
|
||||
&LayerMapModification::add_delta_layer("Delta20..80"),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
|
||||
(&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
|
||||
(
|
||||
&Some("Delta10..60".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
|
||||
(
|
||||
&Some("Img50..100".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
|
||||
&(2, PageIndex(20)..PageIndex(60)),
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(0)..PageIndex(100)))
|
||||
.max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(60)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
|
||||
(2, PageIndex(20)..PageIndex(30))
|
||||
);
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(20))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
|
||||
(1, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(70)..PageIndex(80)))
|
||||
.max_delta_layers(),
|
||||
(0, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_naive() {
|
||||
test_layer_map::<NaiveVecStorage<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_tree() {
|
||||
test_layer_map::<PersistentSegmentTree<_, _, _>>();
|
||||
}
|
||||
116
libs/persistent_range_query/tests/rsq_test.rs
Normal file
116
libs/persistent_range_query/tests/rsq_test.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
use persistent_range_query::naive::*;
|
||||
use persistent_range_query::ops::rsq::AddAssignModification::Add;
|
||||
use persistent_range_query::ops::rsq::*;
|
||||
use persistent_range_query::ops::SameElementsInitializer;
|
||||
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree};
|
||||
use persistent_range_query::{PersistentVecStorage, VecReadableVersion};
|
||||
use rand::{Rng, SeedableRng};
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||
struct K(u16);
|
||||
|
||||
impl IndexableKey for K {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||
}
|
||||
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||
K(all_keys.start.0 + index as u16)..K(all_keys.start.0 + index as u16 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl SumOfSameElements<K> for i32 {
|
||||
fn sum(initial_element_value: &Self, keys: &Range<K>) -> Self {
|
||||
initial_element_value * (keys.end.0 - keys.start.0) as Self
|
||||
}
|
||||
}
|
||||
|
||||
impl MidpointableKey for K {
|
||||
fn midpoint(range: &Range<Self>) -> Self {
|
||||
K(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||
}
|
||||
}
|
||||
|
||||
fn test_storage<
|
||||
S: PersistentVecStorage<AddAssignModification<i32>, SameElementsInitializer<i32>, K>,
|
||||
>() {
|
||||
let mut s = S::new(K(0)..K(12), SameElementsInitializer::new(0i32));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 0);
|
||||
|
||||
s.modify(&(K(2)..K(5)), &AddAssignModification::Add(3));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 3 + 3);
|
||||
let s_old = s.freeze();
|
||||
|
||||
s.modify(&(K(3)..K(6)), &AddAssignModification::Assign(10));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 10 + 10);
|
||||
|
||||
s.modify(&(K(4)..K(7)), &AddAssignModification::Add(2));
|
||||
assert_eq!(*s.get(&(K(0)..K(12))).sum(), 3 + 10 + 12 + 12 + 2);
|
||||
|
||||
assert_eq!(*s.get(&(K(4)..K(6))).sum(), 12 + 12);
|
||||
assert_eq!(*s_old.get(&(K(4)..K(6))).sum(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_naive() {
|
||||
test_storage::<NaiveVecStorage<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_tree() {
|
||||
test_storage::<PersistentSegmentTree<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stress() {
|
||||
const LEN: u16 = 17_238;
|
||||
const OPERATIONS: i32 = 20_000;
|
||||
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
|
||||
let mut naive: NaiveVecStorage<AddAssignModification<i32>, _, _> =
|
||||
NaiveVecStorage::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||
let mut segm_tree: PersistentSegmentTree<AddAssignModification<i32>, _, _> =
|
||||
PersistentSegmentTree::new(K(0)..K(LEN), SameElementsInitializer::new(2i32));
|
||||
|
||||
fn gen_range(rng: &mut impl Rng) -> Range<K> {
|
||||
let l: u16 = rng.gen_range(0..LEN);
|
||||
let r: u16 = rng.gen_range(0..LEN);
|
||||
if l <= r {
|
||||
K(l)..K(r)
|
||||
} else {
|
||||
K(r)..K(l)
|
||||
}
|
||||
}
|
||||
|
||||
for _ in 0..2 {
|
||||
let checksum_range = gen_range(&mut rng);
|
||||
let checksum_before: i32 = *naive.get(&checksum_range).sum();
|
||||
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||
|
||||
let naive_before = naive.freeze();
|
||||
let segm_tree_before = segm_tree.freeze();
|
||||
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||
assert_eq!(checksum_before, *segm_tree.get(&checksum_range).sum());
|
||||
|
||||
for _ in 0..OPERATIONS {
|
||||
{
|
||||
let range = gen_range(&mut rng);
|
||||
assert_eq!(naive.get(&range).sum(), segm_tree.get(&range).sum());
|
||||
}
|
||||
{
|
||||
let range = gen_range(&mut rng);
|
||||
let val = rng.gen_range(-10i32..=10i32);
|
||||
let op = Add(val);
|
||||
naive.modify(&range, &op);
|
||||
segm_tree.modify(&range, &op);
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(checksum_before, *naive_before.get(&checksum_range).sum());
|
||||
assert_eq!(
|
||||
checksum_before,
|
||||
*segm_tree_before.get(&checksum_range).sum()
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ mod s3_bucket;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::Debug,
|
||||
fmt::{Debug, Display},
|
||||
num::{NonZeroU32, NonZeroUsize},
|
||||
ops::Deref,
|
||||
path::{Path, PathBuf},
|
||||
@@ -41,27 +41,44 @@ pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
|
||||
|
||||
const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';
|
||||
|
||||
/// Path on the remote storage, relative to some inner prefix.
|
||||
/// The prefix is an implementation detail, that allows representing local paths
|
||||
/// as the remote ones, stripping the local storage prefix away.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct RemotePath(PathBuf);
|
||||
|
||||
impl RemotePath {
|
||||
pub fn new(relative_path: &Path) -> anyhow::Result<Self> {
|
||||
anyhow::ensure!(
|
||||
relative_path.is_relative(),
|
||||
"Path {relative_path:?} is not relative"
|
||||
);
|
||||
Ok(Self(relative_path.to_path_buf()))
|
||||
}
|
||||
|
||||
pub fn with_base(&self, base_path: &Path) -> PathBuf {
|
||||
base_path.join(&self.0)
|
||||
}
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct RemoteObjectId(String);
|
||||
|
||||
///
|
||||
/// A key that refers to an object in remote storage. It works much like a Path,
|
||||
/// but it's a separate datatype so that you don't accidentally mix local paths
|
||||
/// and remote keys.
|
||||
///
|
||||
impl RemoteObjectId {
|
||||
// Needed to retrieve last component for RemoteObjectId.
|
||||
// In other words a file name
|
||||
/// Turn a/b/c or a/b/c/ into c
|
||||
pub fn object_name(&self) -> Option<&str> {
|
||||
self.0.file_name().and_then(|os_str| os_str.to_str())
|
||||
// corner case, char::to_string is not const, thats why this is more verbose than it needs to be
|
||||
// see https://github.com/rust-lang/rust/issues/88674
|
||||
if self.0.len() == 1 && self.0.chars().next().unwrap() == REMOTE_STORAGE_PREFIX_SEPARATOR {
|
||||
return None;
|
||||
}
|
||||
|
||||
if self.0.ends_with(REMOTE_STORAGE_PREFIX_SEPARATOR) {
|
||||
self.0.rsplit(REMOTE_STORAGE_PREFIX_SEPARATOR).nth(1)
|
||||
} else {
|
||||
self.0
|
||||
.rsplit_once(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.map(|(_, last)| last)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for RemoteObjectId {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
|
||||
Debug::fmt(&self.0, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for RemoteObjectId {
|
||||
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
Display::fmt(&self.0, fmt)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,40 +87,49 @@ impl RemotePath {
|
||||
/// providing basic CRUD operations for storage files.
|
||||
#[async_trait::async_trait]
|
||||
pub trait RemoteStorage: Send + Sync + 'static {
|
||||
/// Attempts to derive the storage path out of the local path, if the latter is correct.
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId>;
|
||||
|
||||
/// Gets the download path of the given storage file.
|
||||
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf>;
|
||||
|
||||
/// Lists all items the storage has right now.
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>>;
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>>;
|
||||
|
||||
/// Lists all top level subdirectories for a given prefix
|
||||
/// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
|
||||
/// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
|
||||
/// so this method doesnt need to.
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>>;
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>>;
|
||||
|
||||
/// Streams the local file contents into remote into the remote storage entry.
|
||||
async fn upload(
|
||||
&self,
|
||||
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
// S3 PUT request requires the content length to be specified,
|
||||
// otherwise it starts to fail with the concurrent connection count increasing.
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from_size_bytes: usize,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()>;
|
||||
|
||||
/// Streams the remote storage entry contents into the buffered writer given, returns the filled writer.
|
||||
/// Returns the metadata, if any was stored with the file previously.
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError>;
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError>;
|
||||
|
||||
/// Streams a given byte range of the remote storage entry contents into the buffered writer given, returns the filled writer.
|
||||
/// Returns the metadata, if any was stored with the file previously.
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError>;
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()>;
|
||||
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()>;
|
||||
|
||||
/// Downcast to LocalFs implementation. For tests.
|
||||
fn as_local(&self) -> Option<&LocalFs> {
|
||||
@@ -152,35 +178,34 @@ impl std::error::Error for DownloadError {}
|
||||
/// Every storage, currently supported.
|
||||
/// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
|
||||
#[derive(Clone)]
|
||||
pub enum GenericRemoteStorage {
|
||||
LocalFs(LocalFs),
|
||||
AwsS3(Arc<S3Bucket>),
|
||||
}
|
||||
pub struct GenericRemoteStorage(Arc<dyn RemoteStorage>);
|
||||
|
||||
impl Deref for GenericRemoteStorage {
|
||||
type Target = dyn RemoteStorage;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self {
|
||||
GenericRemoteStorage::LocalFs(local_fs) => local_fs,
|
||||
GenericRemoteStorage::AwsS3(s3_bucket) => s3_bucket.as_ref(),
|
||||
}
|
||||
self.0.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
impl GenericRemoteStorage {
|
||||
pub fn new(storage: impl RemoteStorage) -> Self {
|
||||
Self(Arc::new(storage))
|
||||
}
|
||||
|
||||
pub fn from_config(
|
||||
working_directory: PathBuf,
|
||||
storage_config: &RemoteStorageConfig,
|
||||
) -> anyhow::Result<GenericRemoteStorage> {
|
||||
Ok(match &storage_config.storage {
|
||||
RemoteStorageKind::LocalFs(root) => {
|
||||
info!("Using fs root '{}' as a remote storage", root.display());
|
||||
GenericRemoteStorage::LocalFs(LocalFs::new(root.clone())?)
|
||||
GenericRemoteStorage::new(LocalFs::new(root.clone(), working_directory)?)
|
||||
}
|
||||
RemoteStorageKind::AwsS3(s3_config) => {
|
||||
info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}'",
|
||||
s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint);
|
||||
GenericRemoteStorage::AwsS3(Arc::new(S3Bucket::new(s3_config)?))
|
||||
GenericRemoteStorage::new(S3Bucket::new(s3_config, working_directory)?)
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -194,12 +219,23 @@ impl GenericRemoteStorage {
|
||||
&self,
|
||||
from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
self.upload(from, from_size_bytes, to, None)
|
||||
let target_storage_path = self.remote_object_id(from_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the storage path for source local path '{}'",
|
||||
from_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
self.upload(from, from_size_bytes, &target_storage_path, None)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to upload data of length {from_size_bytes} to storage path {to:?}")
|
||||
format!(
|
||||
"Failed to upload from '{}' to storage path '{:?}'",
|
||||
from_path.display(),
|
||||
target_storage_path
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -208,11 +244,24 @@ impl GenericRemoteStorage {
|
||||
pub async fn download_storage_object(
|
||||
&self,
|
||||
byte_range: Option<(u64, Option<u64>)>,
|
||||
from: &RemotePath,
|
||||
to_path: &Path,
|
||||
) -> Result<Download, DownloadError> {
|
||||
let remote_object_path = self
|
||||
.remote_object_id(to_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the storage path for target local path '{}'",
|
||||
to_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
|
||||
match byte_range {
|
||||
Some((start, end)) => self.download_byte_range(from, start, end).await,
|
||||
None => self.download(from).await,
|
||||
Some((start, end)) => {
|
||||
self.download_byte_range(&remote_object_path, start, end)
|
||||
.await
|
||||
}
|
||||
None => self.download(&remote_object_path).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -222,6 +271,23 @@ impl GenericRemoteStorage {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct StorageMetadata(HashMap<String, String>);
|
||||
|
||||
fn strip_path_prefix<'a>(prefix: &'a Path, path: &'a Path) -> anyhow::Result<&'a Path> {
|
||||
if prefix == path {
|
||||
anyhow::bail!(
|
||||
"Prefix and the path are equal, cannot strip: '{}'",
|
||||
prefix.display()
|
||||
)
|
||||
} else {
|
||||
path.strip_prefix(prefix).with_context(|| {
|
||||
format!(
|
||||
"Path '{}' is not prefixed with '{}'",
|
||||
path.display(),
|
||||
prefix.display(),
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// External backup storage configuration, enough for creating a client for that storage.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RemoteStorageConfig {
|
||||
@@ -365,24 +431,21 @@ mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_object_name() {
|
||||
let k = RemotePath::new(Path::new("a/b/c")).unwrap();
|
||||
fn object_name() {
|
||||
let k = RemoteObjectId("a/b/c".to_owned());
|
||||
assert_eq!(k.object_name(), Some("c"));
|
||||
|
||||
let k = RemotePath::new(Path::new("a/b/c/")).unwrap();
|
||||
let k = RemoteObjectId("a/b/c/".to_owned());
|
||||
assert_eq!(k.object_name(), Some("c"));
|
||||
|
||||
let k = RemotePath::new(Path::new("a/")).unwrap();
|
||||
let k = RemoteObjectId("a/".to_owned());
|
||||
assert_eq!(k.object_name(), Some("a"));
|
||||
|
||||
// XXX is it impossible to have an empty key?
|
||||
let k = RemotePath::new(Path::new("")).unwrap();
|
||||
let k = RemoteObjectId("".to_owned());
|
||||
assert_eq!(k.object_name(), None);
|
||||
|
||||
let k = RemoteObjectId("/".to_owned());
|
||||
assert_eq!(k.object_name(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rempte_path_cannot_be_created_from_absolute_ones() {
|
||||
let err = RemotePath::new(Path::new("/")).expect_err("Should fail on absolute paths");
|
||||
assert_eq!(err.to_string(), "Path \"/\" is not relative");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
//! volume is mounted to the local FS.
|
||||
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
future::Future,
|
||||
path::{Path, PathBuf},
|
||||
pin::Pin,
|
||||
@@ -19,33 +18,60 @@ use tokio::{
|
||||
use tracing::*;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
|
||||
use crate::{Download, DownloadError, RemotePath};
|
||||
use crate::{Download, DownloadError, RemoteObjectId};
|
||||
|
||||
use super::{RemoteStorage, StorageMetadata};
|
||||
use super::{strip_path_prefix, RemoteStorage, StorageMetadata};
|
||||
|
||||
const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Convert a Path in the remote storage into a RemoteObjectId
|
||||
fn remote_object_id_from_path(path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
Ok(RemoteObjectId(
|
||||
path.to_str()
|
||||
.ok_or_else(|| anyhow::anyhow!("unexpected characters found in path"))?
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
|
||||
pub struct LocalFs {
|
||||
working_directory: PathBuf,
|
||||
storage_root: PathBuf,
|
||||
}
|
||||
|
||||
impl LocalFs {
|
||||
/// Attempts to create local FS storage, along with its root directory.
|
||||
/// Storage root will be created (if does not exist) and transformed into an absolute path (if passed as relative).
|
||||
pub fn new(mut storage_root: PathBuf) -> anyhow::Result<Self> {
|
||||
if !storage_root.exists() {
|
||||
std::fs::create_dir_all(&storage_root).with_context(|| {
|
||||
format!("Failed to create all directories in the given root path {storage_root:?}")
|
||||
})?;
|
||||
}
|
||||
if !storage_root.is_absolute() {
|
||||
storage_root = storage_root.canonicalize().with_context(|| {
|
||||
format!("Failed to represent path {storage_root:?} as an absolute path")
|
||||
pub fn new(root: PathBuf, working_directory: PathBuf) -> anyhow::Result<Self> {
|
||||
if !root.exists() {
|
||||
std::fs::create_dir_all(&root).with_context(|| {
|
||||
format!(
|
||||
"Failed to create all directories in the given root path '{}'",
|
||||
root.display(),
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Ok(Self {
|
||||
working_directory,
|
||||
storage_root: root,
|
||||
})
|
||||
}
|
||||
|
||||
Ok(Self { storage_root })
|
||||
///
|
||||
/// Get the absolute path in the local filesystem to given remote object.
|
||||
///
|
||||
/// This is public so that it can be used in tests. Should not be used elsewhere.
|
||||
///
|
||||
pub fn resolve_in_storage(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
let path = PathBuf::from(&remote_object_id.0);
|
||||
if path.is_relative() {
|
||||
Ok(self.storage_root.join(path))
|
||||
} else if path.starts_with(&self.storage_root) {
|
||||
Ok(path)
|
||||
} else {
|
||||
bail!(
|
||||
"Path '{}' does not belong to the current storage",
|
||||
path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async fn read_storage_metadata(
|
||||
@@ -77,48 +103,45 @@ impl LocalFs {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RemoteStorage for LocalFs {
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
|
||||
Ok(get_all_files(&self.storage_root, true)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|path| {
|
||||
path.strip_prefix(&self.storage_root)
|
||||
.context("Failed to strip storage root prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.expect(
|
||||
"We list files for storage root, hence should be able to remote the prefix",
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
/// Convert a "local" path into a "remote path"
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
let path = self.storage_root.join(
|
||||
strip_path_prefix(&self.working_directory, local_path)
|
||||
.context("local path does not belong to this storage")?,
|
||||
);
|
||||
remote_object_id_from_path(&path)
|
||||
}
|
||||
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||
fn local_path(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
let storage_path = PathBuf::from(&remote_object_id.0);
|
||||
let relative_path = strip_path_prefix(&self.storage_root, &storage_path)
|
||||
.context("local path does not belong to this storage")?;
|
||||
Ok(self.working_directory.join(relative_path))
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
get_all_files(&self.storage_root, true).await
|
||||
}
|
||||
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let path = match prefix {
|
||||
Some(prefix) => Cow::Owned(prefix.with_base(&self.storage_root)),
|
||||
None => Cow::Borrowed(&self.storage_root),
|
||||
Some(prefix) => Path::new(&prefix.0),
|
||||
None => &self.storage_root,
|
||||
};
|
||||
Ok(get_all_files(path.as_ref(), false)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|path| {
|
||||
path.strip_prefix(&self.storage_root)
|
||||
.context("Failed to strip preifix")
|
||||
.and_then(RemotePath::new)
|
||||
.expect(
|
||||
"We list files for storage root, hence should be able to remote the prefix",
|
||||
)
|
||||
})
|
||||
.collect())
|
||||
get_all_files(path, false).await
|
||||
}
|
||||
|
||||
async fn upload(
|
||||
&self,
|
||||
data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
data_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
let target_file_path = to.with_base(&self.storage_root);
|
||||
let target_file_path = self.resolve_in_storage(to)?;
|
||||
create_target_directory(&target_file_path).await?;
|
||||
// We need this dance with sort of durable rename (without fsyncs)
|
||||
// to prevent partial uploads. This was really hit when pageserver shutdown
|
||||
@@ -139,8 +162,8 @@ impl RemoteStorage for LocalFs {
|
||||
})?,
|
||||
);
|
||||
|
||||
let from_size_bytes = data_size_bytes as u64;
|
||||
let mut buffer_to_read = data.take(from_size_bytes);
|
||||
let from_size_bytes = from_size_bytes as u64;
|
||||
let mut buffer_to_read = from.take(from_size_bytes);
|
||||
|
||||
let bytes_read = io::copy(&mut buffer_to_read, &mut destination)
|
||||
.await
|
||||
@@ -197,22 +220,27 @@ impl RemoteStorage for LocalFs {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
|
||||
let file_path = self
|
||||
.resolve_in_storage(from)
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
|
||||
let source = io::BufReader::new(
|
||||
fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.open(&file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
format!(
|
||||
"Failed to open source file '{}' to use in the download",
|
||||
file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
.read_storage_metadata(&file_path)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
Ok(Download {
|
||||
@@ -226,7 +254,7 @@ impl RemoteStorage for LocalFs {
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError> {
|
||||
@@ -238,15 +266,20 @@ impl RemoteStorage for LocalFs {
|
||||
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
|
||||
}
|
||||
}
|
||||
let target_path = from.with_base(&self.storage_root);
|
||||
if file_exists(&target_path).map_err(DownloadError::BadInput)? {
|
||||
let file_path = self
|
||||
.resolve_in_storage(from)
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
if file_exists(&file_path).map_err(DownloadError::BadInput)? {
|
||||
let mut source = io::BufReader::new(
|
||||
fs::OpenOptions::new()
|
||||
.read(true)
|
||||
.open(&target_path)
|
||||
.open(&file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open source file {target_path:?} to use in the download")
|
||||
format!(
|
||||
"Failed to open source file '{}' to use in the download",
|
||||
file_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?,
|
||||
);
|
||||
@@ -256,7 +289,7 @@ impl RemoteStorage for LocalFs {
|
||||
.context("Failed to seek to the range start in a local storage file")
|
||||
.map_err(DownloadError::Other)?;
|
||||
let metadata = self
|
||||
.read_storage_metadata(&target_path)
|
||||
.read_storage_metadata(&file_path)
|
||||
.await
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
@@ -275,12 +308,15 @@ impl RemoteStorage for LocalFs {
|
||||
}
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||
let file_path = path.with_base(&self.storage_root);
|
||||
async fn delete(&self, path: &RemoteObjectId) -> anyhow::Result<()> {
|
||||
let file_path = self.resolve_in_storage(path)?;
|
||||
if file_path.exists() && file_path.is_file() {
|
||||
Ok(fs::remove_file(file_path).await?)
|
||||
} else {
|
||||
bail!("File {file_path:?} either does not exist or is not a file")
|
||||
bail!(
|
||||
"File '{}' either does not exist or is not a file",
|
||||
file_path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -296,7 +332,7 @@ fn storage_metadata_path(original_path: &Path) -> PathBuf {
|
||||
fn get_all_files<'a, P>(
|
||||
directory_path: P,
|
||||
recursive: bool,
|
||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<PathBuf>>> + Send + Sync + 'a>>
|
||||
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<RemoteObjectId>>> + Send + Sync + 'a>>
|
||||
where
|
||||
P: AsRef<Path> + Send + Sync + 'a,
|
||||
{
|
||||
@@ -310,20 +346,20 @@ where
|
||||
let file_type = dir_entry.file_type().await?;
|
||||
let entry_path = dir_entry.path();
|
||||
if file_type.is_symlink() {
|
||||
debug!("{entry_path:?} us a symlink, skipping")
|
||||
debug!("{:?} us a symlink, skipping", entry_path)
|
||||
} else if file_type.is_dir() {
|
||||
if recursive {
|
||||
paths.extend(get_all_files(&entry_path, true).await?.into_iter())
|
||||
} else {
|
||||
paths.push(entry_path)
|
||||
paths.push(remote_object_id_from_path(&dir_entry.path())?)
|
||||
}
|
||||
} else {
|
||||
paths.push(entry_path);
|
||||
paths.push(remote_object_id_from_path(&dir_entry.path())?);
|
||||
}
|
||||
}
|
||||
Ok(paths)
|
||||
} else {
|
||||
bail!("Path {directory_path:?} is not a directory")
|
||||
bail!("Path '{}' is not a directory", directory_path.display())
|
||||
}
|
||||
} else {
|
||||
Ok(Vec::new())
|
||||
@@ -358,6 +394,173 @@ fn file_exists(file_path: &Path) -> anyhow::Result<bool> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod pure_tests {
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let local_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("file_name");
|
||||
let expected_path = storage_root.join(local_path.strip_prefix(&workdir)?);
|
||||
|
||||
let actual_path = PathBuf::from(
|
||||
storage
|
||||
.remote_object_id(&local_path)
|
||||
.expect("Matching path should map to storage path normally")
|
||||
.0,
|
||||
);
|
||||
assert_eq!(
|
||||
expected_path,
|
||||
actual_path,
|
||||
"File paths from workdir should be stored in local fs storage with the same path they have relative to the workdir"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &LocalFs, mismatching_path: &Path) -> String {
|
||||
match storage.remote_object_id(mismatching_path) {
|
||||
Ok(wrong_path) => panic!(
|
||||
"Expected path '{}' to error, but got storage path: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_path,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let error_string = storage_path_error(&storage, &workdir);
|
||||
assert!(error_string.contains("does not belong to this storage"));
|
||||
assert!(error_string.contains(workdir.to_str().unwrap()));
|
||||
|
||||
let mismatching_path_str = "/something/else";
|
||||
let error_message = storage_path_error(&storage, Path::new(mismatching_path_str));
|
||||
assert!(
|
||||
error_message.contains(mismatching_path_str),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(error_message.contains("does not belong to this storage"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: workdir.clone(),
|
||||
storage_root: storage_root.clone(),
|
||||
};
|
||||
|
||||
let name = "not a metadata";
|
||||
let local_path = workdir.join("timelines").join("some_timeline").join(name);
|
||||
assert_eq!(
|
||||
local_path,
|
||||
storage
|
||||
.local_path(&remote_object_id_from_path(
|
||||
&storage_root.join(local_path.strip_prefix(&workdir)?)
|
||||
)?)
|
||||
.expect("For a valid input, valid local path should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta file"
|
||||
);
|
||||
|
||||
let local_metadata_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("metadata");
|
||||
let remote_metadata_path = storage.remote_object_id(&local_metadata_path)?;
|
||||
assert_eq!(
|
||||
local_metadata_path,
|
||||
storage
|
||||
.local_path(&remote_metadata_path)
|
||||
.expect("For a valid input, valid local path should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn local_path_error(storage: &LocalFs, storage_path: &RemoteObjectId) -> String {
|
||||
match storage.local_path(storage_path) {
|
||||
Ok(wrong_path) => panic!(
|
||||
"Expected local path input {:?} to cause an error, but got file path: {:?}",
|
||||
storage_path, wrong_path,
|
||||
),
|
||||
Err(e) => format!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let storage = LocalFs {
|
||||
working_directory: tempdir()?.path().to_owned(),
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let totally_wrong_path = "wrong_wrong_wrong";
|
||||
let error_message =
|
||||
local_path_error(&storage, &RemoteObjectId(totally_wrong_path.to_string()));
|
||||
assert!(error_message.contains(totally_wrong_path));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let original_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("some name");
|
||||
|
||||
let storage_root = PathBuf::from("somewhere").join("else");
|
||||
let dummy_storage = LocalFs {
|
||||
working_directory: workdir,
|
||||
storage_root,
|
||||
};
|
||||
|
||||
let storage_path = dummy_storage.remote_object_id(&original_path)?;
|
||||
let download_destination = dummy_storage.local_path(&storage_path)?;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage path -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod fs_tests {
|
||||
use super::*;
|
||||
@@ -369,7 +572,7 @@ mod fs_tests {
|
||||
storage: &LocalFs,
|
||||
#[allow(clippy::ptr_arg)]
|
||||
// have to use &PathBuf due to `storage.local_path` parameter requirements
|
||||
remote_storage_path: &RemotePath,
|
||||
remote_storage_path: &RemoteObjectId,
|
||||
expected_metadata: Option<&StorageMetadata>,
|
||||
) -> anyhow::Result<String> {
|
||||
let mut download = storage
|
||||
@@ -392,16 +595,41 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn upload_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = create_storage()?;
|
||||
|
||||
let target_path_1 = upload_dummy_file(&storage, "upload_1", None).await?;
|
||||
let (file, size) = create_file_for_upload(
|
||||
&storage.working_directory.join("whatever"),
|
||||
"whatever_contents",
|
||||
)
|
||||
.await?;
|
||||
let target_path = "/somewhere/else";
|
||||
match storage
|
||||
.upload(
|
||||
Box::new(file),
|
||||
size,
|
||||
&RemoteObjectId(target_path.to_string()),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => panic!("Should not allow storing files with wrong target path"),
|
||||
Err(e) => {
|
||||
let message = format!("{:?}", e);
|
||||
assert!(message.contains(target_path));
|
||||
assert!(message.contains("does not belong to the current storage"));
|
||||
}
|
||||
}
|
||||
assert!(storage.list().await?.is_empty());
|
||||
|
||||
let target_path_1 = upload_dummy_file(&workdir, &storage, "upload_1", None).await?;
|
||||
assert_eq!(
|
||||
storage.list().await?,
|
||||
vec![target_path_1.clone()],
|
||||
"Should list a single file after first upload"
|
||||
);
|
||||
|
||||
let target_path_2 = upload_dummy_file(&storage, "upload_2", None).await?;
|
||||
let target_path_2 = upload_dummy_file(&workdir, &storage, "upload_2", None).await?;
|
||||
assert_eq!(
|
||||
list_files_sorted(&storage).await?,
|
||||
vec![target_path_1.clone(), target_path_2.clone()],
|
||||
@@ -415,7 +643,7 @@ mod fs_tests {
|
||||
async fn upload_file_negatives() -> anyhow::Result<()> {
|
||||
let storage = create_storage()?;
|
||||
|
||||
let id = RemotePath::new(Path::new("dummy"))?;
|
||||
let id = storage.remote_object_id(&storage.working_directory.join("dummy"))?;
|
||||
let content = std::io::Cursor::new(b"12345");
|
||||
|
||||
// Check that you get an error if the size parameter doesn't match the actual
|
||||
@@ -440,14 +668,16 @@ mod fs_tests {
|
||||
}
|
||||
|
||||
fn create_storage() -> anyhow::Result<LocalFs> {
|
||||
LocalFs::new(tempdir()?.path().to_owned())
|
||||
LocalFs::new(tempdir()?.path().to_owned(), tempdir()?.path().to_owned())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let contents = read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
|
||||
assert_eq!(
|
||||
@@ -457,7 +687,7 @@ mod fs_tests {
|
||||
);
|
||||
|
||||
let non_existing_path = "somewhere/else";
|
||||
match storage.download(&RemotePath::new(Path::new(non_existing_path))?).await {
|
||||
match storage.download(&RemoteObjectId(non_existing_path.to_string())).await {
|
||||
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
|
||||
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
|
||||
}
|
||||
@@ -466,9 +696,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file_range_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let full_range_download_contents =
|
||||
read_and_assert_remote_file_contents(&storage, &upload_target, None).await?;
|
||||
@@ -534,9 +766,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn download_file_range_negative() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
let start = 1_000_000_000;
|
||||
let end = start + 1;
|
||||
@@ -578,9 +812,11 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn delete_file() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let upload_target = upload_dummy_file(&storage, upload_name, None).await?;
|
||||
let upload_target = upload_dummy_file(&workdir, &storage, upload_name, None).await?;
|
||||
|
||||
storage.delete(&upload_target).await?;
|
||||
assert!(storage.list().await?.is_empty());
|
||||
@@ -590,8 +826,7 @@ mod fs_tests {
|
||||
Err(e) => {
|
||||
let error_string = e.to_string();
|
||||
assert!(error_string.contains("does not exist"));
|
||||
let expected_path = upload_target.with_base(&storage.storage_root);
|
||||
assert!(error_string.contains(expected_path.to_str().unwrap()));
|
||||
assert!(error_string.contains(&upload_target.0));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -599,6 +834,8 @@ mod fs_tests {
|
||||
|
||||
#[tokio::test]
|
||||
async fn file_with_metadata() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let storage = create_storage()?;
|
||||
let upload_name = "upload_1";
|
||||
let metadata = StorageMetadata(HashMap::from([
|
||||
@@ -606,7 +843,7 @@ mod fs_tests {
|
||||
("two".to_string(), "2".to_string()),
|
||||
]));
|
||||
let upload_target =
|
||||
upload_dummy_file(&storage, upload_name, Some(metadata.clone())).await?;
|
||||
upload_dummy_file(&workdir, &storage, upload_name, Some(metadata.clone())).await?;
|
||||
|
||||
let full_range_download_contents =
|
||||
read_and_assert_remote_file_contents(&storage, &upload_target, Some(&metadata)).await?;
|
||||
@@ -646,32 +883,23 @@ mod fs_tests {
|
||||
}
|
||||
|
||||
async fn upload_dummy_file(
|
||||
workdir: &Path,
|
||||
storage: &LocalFs,
|
||||
name: &str,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<RemotePath> {
|
||||
let from_path = storage
|
||||
.storage_root
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join(name);
|
||||
) -> anyhow::Result<RemoteObjectId> {
|
||||
let timeline_path = workdir.join("timelines").join("some_timeline");
|
||||
let relative_timeline_path = timeline_path.strip_prefix(&workdir)?;
|
||||
let storage_path = storage.storage_root.join(relative_timeline_path).join(name);
|
||||
let remote_object_id = RemoteObjectId(storage_path.to_str().unwrap().to_string());
|
||||
|
||||
let from_path = storage.working_directory.join(name);
|
||||
let (file, size) = create_file_for_upload(&from_path, &dummy_contents(name)).await?;
|
||||
|
||||
let relative_path = from_path
|
||||
.strip_prefix(&storage.storage_root)
|
||||
.context("Failed to strip storage root prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
from_path, storage.storage_root
|
||||
)
|
||||
})?;
|
||||
|
||||
storage
|
||||
.upload(Box::new(file), size, &relative_path, metadata)
|
||||
.upload(Box::new(file), size, &remote_object_id, metadata)
|
||||
.await?;
|
||||
Ok(relative_path)
|
||||
remote_object_id_from_path(&storage_path)
|
||||
}
|
||||
|
||||
async fn create_file_for_upload(
|
||||
@@ -696,7 +924,7 @@ mod fs_tests {
|
||||
format!("contents for {name}")
|
||||
}
|
||||
|
||||
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemotePath>> {
|
||||
async fn list_files_sorted(storage: &LocalFs) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let mut files = storage.list().await?;
|
||||
files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
Ok(files)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! their bucket prefixes are both specified and different.
|
||||
|
||||
use std::env::var;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -28,7 +29,8 @@ use tracing::debug;
|
||||
|
||||
use super::StorageMetadata;
|
||||
use crate::{
|
||||
Download, DownloadError, RemotePath, RemoteStorage, S3Config, REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
strip_path_prefix, Download, DownloadError, RemoteObjectId, RemoteStorage, S3Config,
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
};
|
||||
|
||||
const DEFAULT_IMDS_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
@@ -98,8 +100,31 @@ pub(super) mod metrics {
|
||||
}
|
||||
}
|
||||
|
||||
fn download_destination(
|
||||
id: &RemoteObjectId,
|
||||
workdir: &Path,
|
||||
prefix_to_strip: Option<&str>,
|
||||
) -> PathBuf {
|
||||
let path_without_prefix = match prefix_to_strip {
|
||||
Some(prefix) => id.0.strip_prefix(prefix).unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Could not strip prefix '{}' from S3 object key '{}'",
|
||||
prefix, id.0
|
||||
)
|
||||
}),
|
||||
None => &id.0,
|
||||
};
|
||||
|
||||
workdir.join(
|
||||
path_without_prefix
|
||||
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.collect::<PathBuf>(),
|
||||
)
|
||||
}
|
||||
|
||||
/// AWS S3 storage.
|
||||
pub struct S3Bucket {
|
||||
workdir: PathBuf,
|
||||
client: Client,
|
||||
bucket_name: String,
|
||||
prefix_in_bucket: Option<String>,
|
||||
@@ -117,7 +142,7 @@ struct GetObjectRequest {
|
||||
}
|
||||
impl S3Bucket {
|
||||
/// Creates the S3 storage, errors if incorrect AWS S3 configuration provided.
|
||||
pub fn new(aws_config: &S3Config) -> anyhow::Result<Self> {
|
||||
pub fn new(aws_config: &S3Config, workdir: PathBuf) -> anyhow::Result<Self> {
|
||||
debug!(
|
||||
"Creating s3 remote storage for S3 bucket {}",
|
||||
aws_config.bucket_name
|
||||
@@ -171,39 +196,13 @@ impl S3Bucket {
|
||||
});
|
||||
Ok(Self {
|
||||
client,
|
||||
workdir,
|
||||
bucket_name: aws_config.bucket_name.clone(),
|
||||
prefix_in_bucket,
|
||||
concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
|
||||
})
|
||||
}
|
||||
|
||||
fn s3_object_to_relative_path(&self, key: &str) -> RemotePath {
|
||||
let relative_path =
|
||||
match key.strip_prefix(self.prefix_in_bucket.as_deref().unwrap_or_default()) {
|
||||
Some(stripped) => stripped,
|
||||
// we rely on AWS to return properly prefixed paths
|
||||
// for requests with a certain prefix
|
||||
None => panic!(
|
||||
"Key {} does not start with bucket prefix {:?}",
|
||||
key, self.prefix_in_bucket
|
||||
),
|
||||
};
|
||||
RemotePath(
|
||||
relative_path
|
||||
.split(REMOTE_STORAGE_PREFIX_SEPARATOR)
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
fn relative_path_to_s3_object(&self, path: &RemotePath) -> String {
|
||||
let mut full_path = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
for segment in path.0.iter() {
|
||||
full_path.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
full_path.push_str(segment.to_str().unwrap_or_default());
|
||||
}
|
||||
full_path
|
||||
}
|
||||
|
||||
async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
|
||||
let _guard = self
|
||||
.concurrency_limiter
|
||||
@@ -253,7 +252,25 @@ impl S3Bucket {
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl RemoteStorage for S3Bucket {
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
|
||||
fn remote_object_id(&self, local_path: &Path) -> anyhow::Result<RemoteObjectId> {
|
||||
let relative_path = strip_path_prefix(&self.workdir, local_path)?;
|
||||
let mut key = self.prefix_in_bucket.clone().unwrap_or_default();
|
||||
for segment in relative_path {
|
||||
key.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
key.push_str(&segment.to_string_lossy());
|
||||
}
|
||||
Ok(RemoteObjectId(key))
|
||||
}
|
||||
|
||||
fn local_path(&self, storage_path: &RemoteObjectId) -> anyhow::Result<PathBuf> {
|
||||
Ok(download_destination(
|
||||
storage_path,
|
||||
&self.workdir,
|
||||
self.prefix_in_bucket.as_deref(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn list(&self) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
let mut document_keys = Vec::new();
|
||||
|
||||
let mut continuation_token = None;
|
||||
@@ -283,7 +300,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.contents
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
|
||||
.filter_map(|o| Some(RemoteObjectId(o.key?))),
|
||||
);
|
||||
|
||||
match fetch_response.continuation_token {
|
||||
@@ -297,10 +314,13 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
/// See the doc for `RemoteStorage::list_prefixes`
|
||||
/// Note: it wont include empty "directories"
|
||||
async fn list_prefixes(&self, prefix: Option<&RemotePath>) -> anyhow::Result<Vec<RemotePath>> {
|
||||
async fn list_prefixes(
|
||||
&self,
|
||||
prefix: Option<&RemoteObjectId>,
|
||||
) -> anyhow::Result<Vec<RemoteObjectId>> {
|
||||
// get the passed prefix or if it is not set use prefix_in_bucket value
|
||||
let list_prefix = prefix
|
||||
.map(|p| self.relative_path_to_s3_object(p))
|
||||
.map(|p| p.0.clone())
|
||||
.or_else(|| self.prefix_in_bucket.clone())
|
||||
.map(|mut p| {
|
||||
// required to end with a separator
|
||||
@@ -342,7 +362,7 @@ impl RemoteStorage for S3Bucket {
|
||||
.common_prefixes
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
|
||||
.filter_map(|o| Some(RemoteObjectId(o.prefix?))),
|
||||
);
|
||||
|
||||
match fetch_response.continuation_token {
|
||||
@@ -358,7 +378,7 @@ impl RemoteStorage for S3Bucket {
|
||||
&self,
|
||||
from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
|
||||
from_size_bytes: usize,
|
||||
to: &RemotePath,
|
||||
to: &RemoteObjectId,
|
||||
metadata: Option<StorageMetadata>,
|
||||
) -> anyhow::Result<()> {
|
||||
let _guard = self
|
||||
@@ -375,7 +395,7 @@ impl RemoteStorage for S3Bucket {
|
||||
self.client
|
||||
.put_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(self.relative_path_to_s3_object(to))
|
||||
.key(to.0.to_owned())
|
||||
.set_metadata(metadata.map(|m| m.0))
|
||||
.content_length(from_size_bytes.try_into()?)
|
||||
.body(bytes_stream)
|
||||
@@ -388,10 +408,10 @@ impl RemoteStorage for S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
|
||||
async fn download(&self, from: &RemoteObjectId) -> Result<Download, DownloadError> {
|
||||
self.download_object(GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
key: from.0.to_owned(),
|
||||
..GetObjectRequest::default()
|
||||
})
|
||||
.await
|
||||
@@ -399,7 +419,7 @@ impl RemoteStorage for S3Bucket {
|
||||
|
||||
async fn download_byte_range(
|
||||
&self,
|
||||
from: &RemotePath,
|
||||
from: &RemoteObjectId,
|
||||
start_inclusive: u64,
|
||||
end_exclusive: Option<u64>,
|
||||
) -> Result<Download, DownloadError> {
|
||||
@@ -407,19 +427,19 @@ impl RemoteStorage for S3Bucket {
|
||||
// and needs both ends to be exclusive
|
||||
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
|
||||
let range = Some(match end_inclusive {
|
||||
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
|
||||
None => format!("bytes={start_inclusive}-"),
|
||||
Some(end_inclusive) => format!("bytes={}-{}", start_inclusive, end_inclusive),
|
||||
None => format!("bytes={}-", start_inclusive),
|
||||
});
|
||||
|
||||
self.download_object(GetObjectRequest {
|
||||
bucket: self.bucket_name.clone(),
|
||||
key: self.relative_path_to_s3_object(from),
|
||||
key: from.0.to_owned(),
|
||||
range,
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
|
||||
async fn delete(&self, remote_object_id: &RemoteObjectId) -> anyhow::Result<()> {
|
||||
let _guard = self
|
||||
.concurrency_limiter
|
||||
.acquire()
|
||||
@@ -431,7 +451,7 @@ impl RemoteStorage for S3Bucket {
|
||||
self.client
|
||||
.delete_object()
|
||||
.bucket(self.bucket_name.clone())
|
||||
.key(self.relative_path_to_s3_object(path))
|
||||
.key(remote_object_id.0.to_owned())
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
@@ -441,3 +461,181 @@ impl RemoteStorage for S3Bucket {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_download_destination() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let local_path = workdir.join("one").join("two").join("test_name");
|
||||
let relative_path = local_path.strip_prefix(&workdir)?;
|
||||
|
||||
let key = RemoteObjectId(format!(
|
||||
"{}{}",
|
||||
REMOTE_STORAGE_PREFIX_SEPARATOR,
|
||||
relative_path
|
||||
.iter()
|
||||
.map(|segment| segment.to_str().unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
.join(&REMOTE_STORAGE_PREFIX_SEPARATOR.to_string()),
|
||||
));
|
||||
|
||||
assert_eq!(
|
||||
local_path,
|
||||
download_destination(&key, &workdir, None),
|
||||
"Download destination should consist of s3 path joined with the workdir prefix"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
|
||||
let segment_1 = "matching";
|
||||
let segment_2 = "file";
|
||||
let local_path = &workdir.join(segment_1).join(segment_2);
|
||||
|
||||
let storage = dummy_storage(workdir);
|
||||
|
||||
let expected_key = RemoteObjectId(format!(
|
||||
"{}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_1}{REMOTE_STORAGE_PREFIX_SEPARATOR}{segment_2}",
|
||||
storage.prefix_in_bucket.as_deref().unwrap_or_default(),
|
||||
));
|
||||
|
||||
let actual_key = storage
|
||||
.remote_object_id(local_path)
|
||||
.expect("Matching path should map to S3 path normally");
|
||||
assert_eq!(
|
||||
expected_key,
|
||||
actual_key,
|
||||
"S3 key from the matching path should contain all segments after the workspace prefix, separated with S3 separator"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn storage_path_negatives() -> anyhow::Result<()> {
|
||||
#[track_caller]
|
||||
fn storage_path_error(storage: &S3Bucket, mismatching_path: &Path) -> String {
|
||||
match storage.remote_object_id(mismatching_path) {
|
||||
Ok(wrong_key) => panic!(
|
||||
"Expected path '{}' to error, but got S3 key: {:?}",
|
||||
mismatching_path.display(),
|
||||
wrong_key,
|
||||
),
|
||||
Err(e) => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = dummy_storage(workdir.clone());
|
||||
|
||||
let error_message = storage_path_error(&storage, &workdir);
|
||||
assert!(
|
||||
error_message.contains("Prefix and the path are equal"),
|
||||
"Message '{}' does not contain the required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
let mismatching_path = PathBuf::from("somewhere").join("else");
|
||||
let error_message = storage_path_error(&storage, &mismatching_path);
|
||||
assert!(
|
||||
error_message.contains(mismatching_path.to_str().unwrap()),
|
||||
"Error should mention wrong path"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains(workdir.to_str().unwrap()),
|
||||
"Error should mention server workdir"
|
||||
);
|
||||
assert!(
|
||||
error_message.contains("is not prefixed with"),
|
||||
"Message '{}' does not contain a required string",
|
||||
error_message
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_path_positive() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let storage = dummy_storage(workdir.clone());
|
||||
let timeline_dir = workdir.join("timelines").join("test_timeline");
|
||||
let relative_timeline_path = timeline_dir.strip_prefix(&workdir)?;
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join("not a metadata"),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
assert_eq!(
|
||||
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote delta file"
|
||||
);
|
||||
|
||||
let s3_key = create_s3_key(
|
||||
&relative_timeline_path.join("metadata"),
|
||||
storage.prefix_in_bucket.as_deref(),
|
||||
);
|
||||
assert_eq!(
|
||||
download_destination(&s3_key, &workdir, storage.prefix_in_bucket.as_deref()),
|
||||
storage
|
||||
.local_path(&s3_key)
|
||||
.expect("For a valid input, valid S3 info should be parsed"),
|
||||
"Should be able to parse metadata out of the correctly named remote metadata file"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn download_destination_matches_original_path() -> anyhow::Result<()> {
|
||||
let workdir = tempdir()?.path().to_owned();
|
||||
let original_path = workdir
|
||||
.join("timelines")
|
||||
.join("some_timeline")
|
||||
.join("some name");
|
||||
|
||||
let dummy_storage = dummy_storage(workdir);
|
||||
|
||||
let key = dummy_storage.remote_object_id(&original_path)?;
|
||||
let download_destination = dummy_storage.local_path(&key)?;
|
||||
|
||||
assert_eq!(
|
||||
original_path, download_destination,
|
||||
"'original path -> storage key -> matching fs path' transformation should produce the same path as the input one for the correct path"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn dummy_storage(workdir: PathBuf) -> S3Bucket {
|
||||
S3Bucket {
|
||||
workdir,
|
||||
client: Client::new(&aws_config::SdkConfig::builder().build()),
|
||||
bucket_name: "dummy-bucket".to_string(),
|
||||
prefix_in_bucket: Some("dummy_prefix/".to_string()),
|
||||
concurrency_limiter: Semaphore::new(1),
|
||||
}
|
||||
}
|
||||
|
||||
fn create_s3_key(relative_file_path: &Path, prefix: Option<&str>) -> RemoteObjectId {
|
||||
RemoteObjectId(relative_file_path.iter().fold(
|
||||
prefix.unwrap_or_default().to_string(),
|
||||
|mut path_string, segment| {
|
||||
path_string.push(REMOTE_STORAGE_PREFIX_SEPARATOR);
|
||||
path_string.push_str(segment.to_str().unwrap());
|
||||
path_string
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
sentry = "0.29.0"
|
||||
async-trait = "0.1"
|
||||
anyhow = "1.0"
|
||||
bincode = "1.3"
|
||||
|
||||
@@ -34,7 +34,6 @@ pub mod sock_split;
|
||||
pub mod logging;
|
||||
|
||||
pub mod lock_file;
|
||||
pub mod pid_file;
|
||||
|
||||
// Misc
|
||||
pub mod accum;
|
||||
@@ -47,7 +46,6 @@ pub mod tcp_listener;
|
||||
pub mod nonblock;
|
||||
|
||||
// Default signal handling
|
||||
pub mod sentry_init;
|
||||
pub mod signals;
|
||||
|
||||
pub mod fs_ext;
|
||||
|
||||
@@ -1,133 +1,81 @@
|
||||
//! A module to create and read lock files.
|
||||
//! A module to create and read lock files. A lock file ensures that only one
|
||||
//! process is running at a time, in a particular directory.
|
||||
//!
|
||||
//! File locking is done using [`fcntl::flock`] exclusive locks.
|
||||
//! The only consumer of this module is currently [`pid_file`].
|
||||
//! See the module-level comment there for potential pitfalls
|
||||
//! with lock files that are used to store PIDs (pidfiles).
|
||||
//! File locking is done using [`fcntl::flock`], which means that holding the
|
||||
//! lock on file only prevents acquiring another lock on it; all other
|
||||
//! operations are still possible on files. Other process can still open, read,
|
||||
//! write, or remove the file, for example.
|
||||
//! If the file is removed while a process is holding a lock on it,
|
||||
//! the process that holds the lock does not get any error or notification.
|
||||
//! Furthermore, you can create a new file with the same name and lock the new file,
|
||||
//! while the old process is still running.
|
||||
//! Deleting the lock file while the locking process is still running is a bad idea!
|
||||
|
||||
use std::{
|
||||
fs,
|
||||
io::{Read, Write},
|
||||
ops::Deref,
|
||||
os::unix::prelude::AsRawFd,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
use std::{fs, os::unix::prelude::AsRawFd, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::{errno::Errno::EAGAIN, fcntl};
|
||||
use nix::fcntl;
|
||||
|
||||
use crate::crashsafe;
|
||||
|
||||
/// A handle to an open and unlocked, but not-yet-written lock file.
|
||||
/// Returned by [`create_exclusive`].
|
||||
#[must_use]
|
||||
pub struct UnwrittenLockFile {
|
||||
path: PathBuf,
|
||||
file: fs::File,
|
||||
pub enum LockCreationResult {
|
||||
Created {
|
||||
new_lock_contents: String,
|
||||
file: fs::File,
|
||||
},
|
||||
AlreadyLocked {
|
||||
existing_lock_contents: String,
|
||||
},
|
||||
CreationFailed(anyhow::Error),
|
||||
}
|
||||
|
||||
/// Returned by [`UnwrittenLockFile::write_content`].
|
||||
#[must_use]
|
||||
pub struct LockFileGuard(fs::File);
|
||||
|
||||
impl Deref for LockFileGuard {
|
||||
type Target = fs::File;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl UnwrittenLockFile {
|
||||
/// Replace the content of this lock file with the byte representation of `contents`.
|
||||
pub fn write_content(mut self, contents: String) -> anyhow::Result<LockFileGuard> {
|
||||
self.file
|
||||
.set_len(0)
|
||||
.context("Failed to truncate lockfile")?;
|
||||
self.file
|
||||
.write_all(contents.as_bytes())
|
||||
.with_context(|| format!("Failed to write '{contents}' contents into lockfile"))?;
|
||||
crashsafe::fsync_file_and_parent(&self.path).context("fsync lockfile")?;
|
||||
Ok(LockFileGuard(self.file))
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates and opens a lock file in the path, grabs an exclusive flock on it, and returns
|
||||
/// a handle that allows overwriting the locked file's content.
|
||||
///
|
||||
/// The exclusive lock is released when dropping the returned handle.
|
||||
///
|
||||
/// It is not an error if the file already exists.
|
||||
/// It is an error if the file is already locked.
|
||||
pub fn create_exclusive(lock_file_path: &Path) -> anyhow::Result<UnwrittenLockFile> {
|
||||
let lock_file = fs::OpenOptions::new()
|
||||
/// Creates a lock file in the path given and writes the given contents into the file.
|
||||
/// Note: The lock is automatically released when the file closed. You might want to use Box::leak to make sure it lives until the end of the program.
|
||||
pub fn create_lock_file(lock_file_path: &Path, contents: String) -> LockCreationResult {
|
||||
let lock_file = match fs::OpenOptions::new()
|
||||
.create(true) // O_CREAT
|
||||
.write(true)
|
||||
.open(lock_file_path)
|
||||
.context("open lock file")?;
|
||||
|
||||
let res = fcntl::flock(
|
||||
lock_file.as_raw_fd(),
|
||||
fcntl::FlockArg::LockExclusiveNonblock,
|
||||
);
|
||||
match res {
|
||||
Ok(()) => Ok(UnwrittenLockFile {
|
||||
path: lock_file_path.to_owned(),
|
||||
file: lock_file,
|
||||
}),
|
||||
Err(EAGAIN) => anyhow::bail!("file is already locked"),
|
||||
Err(e) => Err(e).context("flock error"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returned by [`read_and_hold_lock_file`].
|
||||
/// Check out the [`pid_file`] module for what the variants mean
|
||||
/// and potential caveats if the lock files that are used to store PIDs.
|
||||
pub enum LockFileRead {
|
||||
/// No file exists at the given path.
|
||||
NotExist,
|
||||
/// No other process held the lock file, so we grabbed an flock
|
||||
/// on it and read its contents.
|
||||
/// Release the flock by dropping the [`LockFileGuard`].
|
||||
NotHeldByAnyProcess(LockFileGuard, String),
|
||||
/// The file exists but another process was holding an flock on it.
|
||||
LockedByOtherProcess {
|
||||
not_locked_file: fs::File,
|
||||
content: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to
|
||||
/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked.
|
||||
/// Check the [`LockFileRead`] variants for details.
|
||||
pub fn read_and_hold_lock_file(path: &Path) -> anyhow::Result<LockFileRead> {
|
||||
let res = fs::OpenOptions::new().read(true).open(path);
|
||||
let mut lock_file = match res {
|
||||
Ok(f) => f,
|
||||
Err(e) => match e.kind() {
|
||||
std::io::ErrorKind::NotFound => return Ok(LockFileRead::NotExist),
|
||||
_ => return Err(e).context("open lock file"),
|
||||
},
|
||||
.context("Failed to open lock file")
|
||||
{
|
||||
Ok(file) => file,
|
||||
Err(e) => return LockCreationResult::CreationFailed(e),
|
||||
};
|
||||
let res = fcntl::flock(
|
||||
|
||||
match fcntl::flock(
|
||||
lock_file.as_raw_fd(),
|
||||
fcntl::FlockArg::LockExclusiveNonblock,
|
||||
);
|
||||
// We need the content regardless of lock success / failure.
|
||||
// But, read it after flock so that, if it succeeded, the content is consistent.
|
||||
let mut content = String::new();
|
||||
lock_file
|
||||
.read_to_string(&mut content)
|
||||
.context("read lock file")?;
|
||||
match res {
|
||||
Ok(()) => Ok(LockFileRead::NotHeldByAnyProcess(
|
||||
LockFileGuard(lock_file),
|
||||
content,
|
||||
)),
|
||||
Err(EAGAIN) => Ok(LockFileRead::LockedByOtherProcess {
|
||||
not_locked_file: lock_file,
|
||||
content,
|
||||
}),
|
||||
Err(e) => Err(e).context("flock error"),
|
||||
) {
|
||||
Ok(()) => {
|
||||
match lock_file
|
||||
.set_len(0)
|
||||
.context("Failed to truncate lockfile")
|
||||
.and_then(|()| {
|
||||
fs::write(lock_file_path, &contents).with_context(|| {
|
||||
format!("Failed to write '{contents}' contents into lockfile")
|
||||
})
|
||||
})
|
||||
.and_then(|()| {
|
||||
crashsafe::fsync_file_and_parent(lock_file_path)
|
||||
.context("Failed to fsync lockfile")
|
||||
}) {
|
||||
Ok(()) => LockCreationResult::Created {
|
||||
new_lock_contents: contents,
|
||||
file: lock_file,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(nix::errno::Errno::EAGAIN) => {
|
||||
match fs::read_to_string(lock_file_path).context("Failed to read lockfile contents") {
|
||||
Ok(existing_lock_contents) => LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
},
|
||||
Err(e) => LockCreationResult::CreationFailed(e),
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
LockCreationResult::CreationFailed(anyhow::anyhow!("Failed to lock lockfile: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
//! Abstraction to create & read pidfiles.
|
||||
//!
|
||||
//! A pidfile is a file in the filesystem that stores a process's PID.
|
||||
//! Its purpose is to implement a singleton behavior where only
|
||||
//! one process of some "kind" is supposed to be running at a given time.
|
||||
//! The "kind" is identified by the pidfile.
|
||||
//!
|
||||
//! During process startup, the process that is supposed to be a singleton
|
||||
//! must [claim][`claim_for_current_process`] the pidfile first.
|
||||
//! If that is unsuccessful, the process must not act as the singleton, i.e.,
|
||||
//! it must not access any of the resources that only the singleton may access.
|
||||
//!
|
||||
//! A common need is to signal a running singleton process, e.g., to make
|
||||
//! it shut down and exit.
|
||||
//! For that, we have to [`read`] the pidfile. The result of the `read` operation
|
||||
//! tells us if there is any singleton process, and if so, what PID it has.
|
||||
//! We can then proceed to signal it, although some caveats still apply.
|
||||
//! Read the function-level documentation of [`read`] for that.
|
||||
//!
|
||||
//! ## Never Remove Pidfiles
|
||||
//!
|
||||
//! It would be natural to assume that the process who claimed the pidfile
|
||||
//! should remove it upon exit to avoid leaving a stale pidfile in place.
|
||||
//! However, we already have a reliable way to detect staleness of the pidfile,
|
||||
//! i.e., the `flock` that [claiming][`claim_for_current_process`] puts on it.
|
||||
//!
|
||||
//! And further, removing pidfiles would introduce a **catastrophic race condition**
|
||||
//! where two processes are running that are supposed to be singletons.
|
||||
//! Suppose we were to remove our pidfile during process shutdown.
|
||||
//! Here is how the race plays out:
|
||||
//! - Suppose we have a service called `myservice` with pidfile `myservice.pidfile`.
|
||||
//! - Process `A` starts to shut down.
|
||||
//! - Process `B` is just starting up
|
||||
//! - It `open("myservice.pid", O_WRONLY|O_CREAT)` the file
|
||||
//! - It blocks on `flock`
|
||||
//! - Process `A` removes the pidfile as the last step of its shutdown procedure
|
||||
//! - `unlink("myservice.pid")
|
||||
//! - Process `A` exits
|
||||
//! - This releases its `flock` and unblocks `B`
|
||||
//! - Process `B` still has the file descriptor for `myservice.pid` open
|
||||
//! - Process `B` writes its PID into `myservice.pid`.
|
||||
//! - But the `myservice.pid` file has been unlinked, so, there is `myservice.pid`
|
||||
//! in the directory.
|
||||
//! - Process `C` starts
|
||||
//! - It `open("myservice.pid", O_WRONLY|O_CREAT)` which creates a new file (new inode)
|
||||
//! - It `flock`s the file, which, since it's a different file, does not block
|
||||
//! - It writes its PID into the file
|
||||
//!
|
||||
//! At this point, `B` and `C` are running, which is hazardous.
|
||||
//! Morale of the story: don't unlink pidfiles, ever.
|
||||
|
||||
use std::{ops::Deref, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use nix::unistd::Pid;
|
||||
|
||||
use crate::lock_file::{self, LockFileRead};
|
||||
|
||||
/// Keeps a claim on a pidfile alive until it is dropped.
|
||||
/// Returned by [`claim_for_current_process`].
|
||||
#[must_use]
|
||||
pub struct PidFileGuard(lock_file::LockFileGuard);
|
||||
|
||||
impl Deref for PidFileGuard {
|
||||
type Target = lock_file::LockFileGuard;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to claim `path` as a pidfile for the current process.
|
||||
///
|
||||
/// If another process has already claimed the pidfile, and it is still running,
|
||||
/// this function returns ane error.
|
||||
/// Otherwise, the function `flock`s the file and updates its contents to the
|
||||
/// current process's PID.
|
||||
/// If the update fails, the flock is released and an error returned.
|
||||
/// On success, the function returns a [`PidFileGuard`] to keep the flock alive.
|
||||
///
|
||||
/// ### Maintaining A Claim
|
||||
///
|
||||
/// It is the caller's responsibility to maintain the claim.
|
||||
/// The claim ends as soon as the returned guard object is dropped.
|
||||
/// To maintain the claim for the remaining lifetime of the current process,
|
||||
/// use [`std::mem::forget`] or similar.
|
||||
pub fn claim_for_current_process(path: &Path) -> anyhow::Result<PidFileGuard> {
|
||||
let unwritten_lock_file = lock_file::create_exclusive(path).context("lock file")?;
|
||||
// if any of the next steps fail, we drop the file descriptor and thereby release the lock
|
||||
let guard = unwritten_lock_file
|
||||
.write_content(Pid::this().to_string())
|
||||
.context("write pid to lock file")?;
|
||||
Ok(PidFileGuard(guard))
|
||||
}
|
||||
|
||||
/// Returned by [`read`].
|
||||
pub enum PidFileRead {
|
||||
/// No file exists at the given path.
|
||||
NotExist,
|
||||
/// The given pidfile is currently not claimed by any process.
|
||||
/// To determine this, the [`read`] operation acquired
|
||||
/// an exclusive flock on the file. The lock is still held and responsibility
|
||||
/// to release it is returned through the guard object.
|
||||
/// Before releasing it, other [`claim_for_current_process`] or [`read`] calls
|
||||
/// will fail.
|
||||
///
|
||||
/// ### Caveats
|
||||
///
|
||||
/// Do not unlink the pidfile from the filesystem. See module-comment for why.
|
||||
NotHeldByAnyProcess(PidFileGuard),
|
||||
/// The given pidfile is still claimed by another process whose PID is given
|
||||
/// as part of this variant.
|
||||
///
|
||||
/// ### Caveats
|
||||
///
|
||||
/// 1. The other process might exit at any time, turning the given PID stale.
|
||||
/// 2. There is a small window in which `claim_for_current_process` has already
|
||||
/// locked the file but not yet updates its contents. [`read`] will return
|
||||
/// this variant here, but with the old file contents, i.e., a stale PID.
|
||||
///
|
||||
/// The kernel is free to recycle PID once it has been `wait(2)`ed upon by
|
||||
/// its creator. Thus, acting upon a stale PID, e.g., by issuing a `kill`
|
||||
/// system call on it, bears the risk of killing an unrelated process.
|
||||
/// This is an inherent limitation of using pidfiles.
|
||||
/// The only race-free solution is to have a supervisor-process with a lifetime
|
||||
/// that exceeds that of all of its child-processes (e.g., `runit`, `supervisord`).
|
||||
LockedByOtherProcess(Pid),
|
||||
}
|
||||
|
||||
/// Try to read the file at the given path as a pidfile that was previously created
|
||||
/// through [`claim_for_current_process`].
|
||||
///
|
||||
/// On success, this function returns a [`PidFileRead`].
|
||||
/// Check its docs for a description of the meaning of its different variants.
|
||||
pub fn read(pidfile: &Path) -> anyhow::Result<PidFileRead> {
|
||||
let res = lock_file::read_and_hold_lock_file(pidfile).context("read and hold pid file")?;
|
||||
let ret = match res {
|
||||
LockFileRead::NotExist => PidFileRead::NotExist,
|
||||
LockFileRead::NotHeldByAnyProcess(guard, _) => {
|
||||
PidFileRead::NotHeldByAnyProcess(PidFileGuard(guard))
|
||||
}
|
||||
LockFileRead::LockedByOtherProcess {
|
||||
not_locked_file: _not_locked_file,
|
||||
content,
|
||||
} => {
|
||||
// XXX the read races with the write in claim_pid_file_for_pid().
|
||||
// But pids are smaller than a page, so the kernel page cache will lock for us.
|
||||
// The only problem is that we might get the old contents here.
|
||||
// Can only fix that by implementing some scheme that downgrades the
|
||||
// exclusive lock to shared lock in claim_pid_file_for_pid().
|
||||
PidFileRead::LockedByOtherProcess(parse_pidfile_content(&content)?)
|
||||
}
|
||||
};
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
fn parse_pidfile_content(content: &str) -> anyhow::Result<Pid> {
|
||||
let pid: i32 = content
|
||||
.parse()
|
||||
.map_err(|_| anyhow::anyhow!("parse pidfile content to PID"))?;
|
||||
if pid < 1 {
|
||||
anyhow::bail!("bad value in pidfile '{pid}'");
|
||||
}
|
||||
Ok(Pid::from_raw(pid))
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
use sentry::ClientInitGuard;
|
||||
use std::borrow::Cow;
|
||||
use std::env;
|
||||
|
||||
pub use sentry::release_name;
|
||||
|
||||
#[must_use]
|
||||
pub fn init_sentry(
|
||||
release_name: Option<Cow<'static, str>>,
|
||||
extra_options: &[(&str, &str)],
|
||||
) -> Option<ClientInitGuard> {
|
||||
let dsn = env::var("SENTRY_DSN").ok()?;
|
||||
|
||||
let guard = sentry::init((
|
||||
dsn,
|
||||
sentry::ClientOptions {
|
||||
release: release_name,
|
||||
..Default::default()
|
||||
},
|
||||
));
|
||||
sentry::configure_scope(|scope| {
|
||||
for &(key, value) in extra_options {
|
||||
scope.set_extra(key, value.into());
|
||||
}
|
||||
});
|
||||
Some(guard)
|
||||
}
|
||||
@@ -5,6 +5,10 @@ edition = "2021"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
|
||||
# which adds some runtime cost to run tests on outage conditions
|
||||
testing = ["fail/failpoints"]
|
||||
|
||||
profiling = ["pprof"]
|
||||
|
||||
[dependencies]
|
||||
@@ -14,13 +18,13 @@ async-stream = "0.3"
|
||||
async-trait = "0.1"
|
||||
byteorder = "1.4.3"
|
||||
bytes = "1.0.1"
|
||||
chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
|
||||
chrono = "0.4.19"
|
||||
clap = { version = "4.0", features = ["string"] }
|
||||
close_fds = "0.3.2"
|
||||
const_format = "0.2.21"
|
||||
crc32c = "0.6.0"
|
||||
crossbeam-utils = "0.8.5"
|
||||
fail = { version = "0.5", default-features = false, features = ["failpoints"] }
|
||||
fail = "0.5.0"
|
||||
futures = "0.3.13"
|
||||
git-version = "0.3.5"
|
||||
hex = "0.4.3"
|
||||
@@ -55,6 +59,7 @@ tracing = "0.1.36"
|
||||
url = "2"
|
||||
walkdir = "2.3.2"
|
||||
|
||||
persistent_range_query = { path = "../libs/persistent_range_query" }
|
||||
etcd_broker = { path = "../libs/etcd_broker" }
|
||||
metrics = { path = "../libs/metrics" }
|
||||
pageserver_api = { path = "../libs/pageserver_api" }
|
||||
@@ -65,6 +70,7 @@ remote_storage = { path = "../libs/remote_storage" }
|
||||
tenant_size_model = { path = "../libs/tenant_size_model" }
|
||||
utils = { path = "../libs/utils" }
|
||||
workspace_hack = { version = "0.1", path = "../workspace_hack" }
|
||||
rpds = "0.12.0"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.4"
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use anyhow::Result;
|
||||
use num_traits::ToPrimitive;
|
||||
use pageserver::repository::{Key, Value};
|
||||
use pageserver::tenant::bst_layer_map::BSTLM;
|
||||
use pageserver::tenant::filename::{DeltaFileName, ImageFileName};
|
||||
use pageserver::tenant::layer_map::LayerMap;
|
||||
use pageserver::tenant::segment_tree_layer_map::STLM;
|
||||
use pageserver::tenant::storage_layer::Layer;
|
||||
use pageserver::tenant::storage_layer::ValueReconstructResult;
|
||||
use pageserver::tenant::storage_layer::ValueReconstructState;
|
||||
@@ -243,22 +246,69 @@ fn bench_from_captest_env(c: &mut Criterion) {
|
||||
// too long processing layer map queries.
|
||||
fn bench_from_real_project(c: &mut Criterion) {
|
||||
// TODO consider compressing this file
|
||||
|
||||
// Init layer map
|
||||
let now = Instant::now();
|
||||
let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt"));
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Init bst layer map with the same layers
|
||||
let now = Instant::now();
|
||||
let mut bstlm = BSTLM::new();
|
||||
let mut sorted_layers: Vec<_> = layer_map.iter_historic_layers().collect();
|
||||
sorted_layers.sort_by(|a, b| {
|
||||
a.get_lsn_range().start.cmp(&b.get_lsn_range().start)
|
||||
});
|
||||
for layer in sorted_layers {
|
||||
if layer.is_incremental() {
|
||||
// TODO check if they're sorted
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
} else {
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("Finished bst init in {:?}", now.elapsed());
|
||||
|
||||
// Choose uniformly distributed queries
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map);
|
||||
|
||||
// Test with uniform query pattern
|
||||
c.bench_function("real_map_uniform_queries", |b| {
|
||||
// Define and name the benchmark function
|
||||
let mut group = c.benchmark_group("real_map_uniform_queries");
|
||||
group.bench_function("current_code", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
layer_map.search(q.0, q.1).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("persistent_bst", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
bstlm.query(q.0.to_i128(), q.1.0);
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines.
|
||||
fn bench_sequential(c: &mut Criterion) {
|
||||
let mut layer_map = LayerMap::default();
|
||||
|
||||
// Init layer map. Create 100_000 layers arranged in 1000 diagonal lines.
|
||||
//
|
||||
@@ -267,42 +317,65 @@ fn bench_sequential(c: &mut Criterion) {
|
||||
// Putting it inside the `bench_function` closure is not a solution
|
||||
// because then it runs multiple times during warmup.
|
||||
let now = Instant::now();
|
||||
let mut layer_map = LayerMap::default();
|
||||
for i in 0..100_000 {
|
||||
// TODO try inserting a super-wide layer in between every 10 to reflect
|
||||
// what often happens with L1 layers that include non-rel changes.
|
||||
// Maybe do that as a separate test.
|
||||
let i32 = (i as u32) % 100;
|
||||
let zero = Key::from_hex("000000000000000000000000000000000000").unwrap();
|
||||
let layer = DummyImage {
|
||||
key_range: zero.add(10 * i32)..zero.add(10 * i32 + 1),
|
||||
lsn: Lsn(10 * i),
|
||||
lsn: Lsn(i),
|
||||
};
|
||||
layer_map.insert_historic(Arc::new(layer));
|
||||
}
|
||||
println!("Finished layer map init in {:?}", now.elapsed());
|
||||
|
||||
// Manually measure runtime without criterion because criterion
|
||||
// has a minimum sample size of 10 and I don't want to run it 10 times.
|
||||
println!("Finished init in {:?}", now.elapsed());
|
||||
// Init bst layer map with the same layers
|
||||
let now = Instant::now();
|
||||
let mut bstlm = BSTLM::new();
|
||||
for layer in layer_map.iter_historic_layers() {
|
||||
if layer.is_incremental() {
|
||||
panic!("AAA");
|
||||
} else {
|
||||
let kr = layer.get_key_range();
|
||||
let lr = layer.get_lsn_range();
|
||||
|
||||
bstlm.insert(
|
||||
kr.start.to_i128(),
|
||||
kr.end.to_i128(),
|
||||
lr.start.0,
|
||||
format!("Layer {}", lr.start.0),
|
||||
);
|
||||
}
|
||||
}
|
||||
println!("Finished bst init in {:?}", now.elapsed());
|
||||
|
||||
// Choose 100 uniformly random queries
|
||||
let rng = &mut StdRng::seed_from_u64(1);
|
||||
let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map)
|
||||
.choose_multiple(rng, 1)
|
||||
.choose_multiple(rng, 100)
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
// Define and name the benchmark function
|
||||
c.bench_function("sequential_uniform_queries", |b| {
|
||||
// Run the search queries
|
||||
let mut group = c.benchmark_group("sequential_uniform_queries");
|
||||
group.bench_function("current_code", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
layer_map.search(q.0, q.1).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("persistent_bst", |b| {
|
||||
b.iter(|| {
|
||||
for q in queries.clone().into_iter() {
|
||||
bstlm.query(q.0.to_i128(), q.1.0);
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(group_1, bench_from_captest_env);
|
||||
criterion_group!(group_2, bench_from_real_project);
|
||||
criterion_group!(group_3, bench_sequential);
|
||||
criterion_main!(group_1, group_2, group_3);
|
||||
// HACK TODO bring back all the bench functions. I remove
|
||||
// them here to avoid initializing.
|
||||
criterion_group!(group, bench_from_real_project);
|
||||
criterion_main!(group);
|
||||
|
||||
@@ -431,7 +431,7 @@ fn pg_record(will_init: bool, bytes: &'static [u8]) -> NeonWalRecord {
|
||||
struct Request {
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
}
|
||||
|
||||
0
pageserver/benches/segment_tree_layer_map.rs
Normal file
0
pageserver/benches/segment_tree_layer_map.rs
Normal file
@@ -12,6 +12,7 @@
|
||||
//!
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use bytes::{BufMut, BytesMut};
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
@@ -21,7 +22,6 @@ use std::time::SystemTime;
|
||||
use tar::{Builder, EntryType, Header};
|
||||
use tracing::*;
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::tenant::Timeline;
|
||||
use pageserver_api::reltag::{RelTag, SlruKind};
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
//!
|
||||
//! Example use:
|
||||
//! ```
|
||||
//! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
|
||||
//! $ grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ cd test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE
|
||||
//! $ ls | grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
|
||||
//! $ firefox out.svg
|
||||
//! ```
|
||||
//!
|
||||
@@ -25,8 +25,6 @@ use anyhow::Result;
|
||||
use pageserver::repository::Key;
|
||||
use std::cmp::Ordering;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::PathBuf;
|
||||
use std::str::FromStr;
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet},
|
||||
ops::Range,
|
||||
@@ -67,11 +65,7 @@ fn main() -> Result<()> {
|
||||
let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
|
||||
let stdin = io::stdin();
|
||||
for line in stdin.lock().lines() {
|
||||
let line = line.unwrap();
|
||||
let line = PathBuf::from_str(&line).unwrap();
|
||||
let filename = line.file_name().unwrap();
|
||||
let filename = filename.to_str().unwrap();
|
||||
let range = parse_filename(filename);
|
||||
let range = parse_filename(&line.unwrap());
|
||||
ranges.push(range);
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::{env, ops::ControlFlow, path::Path, str::FromStr};
|
||||
use anyhow::{anyhow, Context};
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use fail::FailScenario;
|
||||
use nix::unistd::Pid;
|
||||
use tracing::*;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
@@ -22,10 +23,9 @@ use pageserver::{
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
logging,
|
||||
lock_file, logging,
|
||||
postgres_backend::AuthType,
|
||||
project_git_version,
|
||||
sentry_init::{init_sentry, release_name},
|
||||
signals::{self, Signal},
|
||||
tcp_listener,
|
||||
};
|
||||
@@ -35,6 +35,10 @@ project_git_version!(GIT_VERSION);
|
||||
const PID_FILE_NAME: &str = "pageserver.pid";
|
||||
|
||||
const FEATURES: &[&str] = &[
|
||||
#[cfg(feature = "testing")]
|
||||
"testing",
|
||||
#[cfg(feature = "fail/failpoints")]
|
||||
"fail/failpoints",
|
||||
#[cfg(feature = "profiling")]
|
||||
"profiling",
|
||||
];
|
||||
@@ -81,9 +85,6 @@ fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
};
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.id.to_string())]);
|
||||
|
||||
let tenants_path = conf.tenants_path();
|
||||
if !tenants_path.exists() {
|
||||
utils::crashsafe::create_dir_all(conf.tenants_path()).with_context(|| {
|
||||
@@ -174,10 +175,6 @@ fn initialize_config(
|
||||
let conf = PageServerConf::parse_and_validate(&toml, workdir)
|
||||
.context("Failed to parse pageserver configuration")?;
|
||||
|
||||
if pageserver::TESTING_MODE.set(conf.testing_mode).is_err() {
|
||||
anyhow::bail!("testing_mode was already initialized");
|
||||
}
|
||||
|
||||
if update_config {
|
||||
info!("Writing pageserver config to '{}'", cfg_file_path.display());
|
||||
|
||||
@@ -206,32 +203,41 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
|
||||
// If any failpoints were set from FAILPOINTS environment variable,
|
||||
// print them to the log for debugging purposes
|
||||
if *pageserver::TESTING_MODE.get().unwrap() {
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with testing mode enabled, failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
} else {
|
||||
info!("started with testing mode enabled");
|
||||
}
|
||||
} else {
|
||||
info!("started with testing mode disabled");
|
||||
let failpoints = fail::list();
|
||||
if !failpoints.is_empty() {
|
||||
info!(
|
||||
"started with failpoints: {}",
|
||||
failpoints
|
||||
.iter()
|
||||
.map(|(name, actions)| format!("{name}={actions}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join(";")
|
||||
)
|
||||
}
|
||||
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file =
|
||||
utils::pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
||||
info!("Claimed pid file at {lock_file_path:?}");
|
||||
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; pageserver is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
std::mem::forget(lock_file);
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
// TODO: Check that it looks like a valid repository before going further
|
||||
|
||||
@@ -286,23 +292,15 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
|
||||
let remote_storage = conf
|
||||
.remote_storage_config
|
||||
.as_ref()
|
||||
.map(GenericRemoteStorage::from_config)
|
||||
.map(|storage_config| {
|
||||
GenericRemoteStorage::from_config(conf.workdir.clone(), storage_config)
|
||||
})
|
||||
.transpose()
|
||||
.context("Failed to init generic remote storage")?;
|
||||
|
||||
let (init_result_sender, init_result_receiver) =
|
||||
std::sync::mpsc::channel::<anyhow::Result<()>>();
|
||||
let storage_for_spawn = remote_storage.clone();
|
||||
let _handler = BACKGROUND_RUNTIME.spawn(async move {
|
||||
let result = tenant_mgr::init_tenant_mgr(conf, storage_for_spawn).await;
|
||||
init_result_sender.send(result)
|
||||
});
|
||||
match init_result_receiver.recv() {
|
||||
Ok(init_result) => init_result.context("Failed to init tenant_mgr")?,
|
||||
Err(_sender_dropped_err) => {
|
||||
anyhow::bail!("Failed to init tenant_mgr: no init status was returned");
|
||||
}
|
||||
}
|
||||
{
|
||||
let _rt_guard = BACKGROUND_RUNTIME.enter();
|
||||
tenant_mgr::init_tenant_mgr(conf, remote_storage.clone())?
|
||||
};
|
||||
|
||||
// Spawn all HTTP related tasks in the MGMT_REQUEST_RUNTIME.
|
||||
// bind before launching separate thread so the error reported before startup exits
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
//! See also `settings.md` for better description on every parameter.
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context, Result};
|
||||
use remote_storage::{RemotePath, RemoteStorageConfig};
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::env;
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::ConnectionId;
|
||||
@@ -27,9 +27,7 @@ use utils::{
|
||||
|
||||
use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
|
||||
use crate::tenant_config::{TenantConf, TenantConfOpt};
|
||||
use crate::{
|
||||
IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX,
|
||||
};
|
||||
use crate::{METADATA_FILE_NAME, TENANT_CONFIG_NAME, TIMELINE_UNINIT_MARK_SUFFIX};
|
||||
|
||||
pub mod defaults {
|
||||
use crate::tenant_config::defaults::*;
|
||||
@@ -53,8 +51,6 @@ pub mod defaults {
|
||||
pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize =
|
||||
super::ConfigurableSemaphore::DEFAULT_INITIAL.get();
|
||||
|
||||
pub const DEFAULT_TESTING_MODE: bool = false;
|
||||
|
||||
///
|
||||
/// Default built-in configuration file.
|
||||
///
|
||||
@@ -77,8 +73,6 @@ pub mod defaults {
|
||||
|
||||
#concurrent_tenant_size_logical_size_queries = '{DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES}'
|
||||
|
||||
testing_mode = false
|
||||
|
||||
# [tenant_config]
|
||||
#checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
|
||||
#checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
|
||||
@@ -147,9 +141,6 @@ pub struct PageServerConf {
|
||||
|
||||
/// Number of concurrent [`Tenant::gather_size_inputs`] allowed.
|
||||
pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
|
||||
|
||||
/// Enables failpoint support and extra mgmt APIs useful for testing.
|
||||
pub testing_mode: bool,
|
||||
}
|
||||
|
||||
/// We do not want to store this in a PageServerConf because the latter may be logged
|
||||
@@ -229,8 +220,6 @@ struct PageServerConfigBuilder {
|
||||
log_format: BuilderValue<LogFormat>,
|
||||
|
||||
concurrent_tenant_size_logical_size_queries: BuilderValue<ConfigurableSemaphore>,
|
||||
|
||||
testing_mode: BuilderValue<bool>,
|
||||
}
|
||||
|
||||
impl Default for PageServerConfigBuilder {
|
||||
@@ -261,8 +250,6 @@ impl Default for PageServerConfigBuilder {
|
||||
log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
|
||||
|
||||
concurrent_tenant_size_logical_size_queries: Set(ConfigurableSemaphore::default()),
|
||||
|
||||
testing_mode: Set(DEFAULT_TESTING_MODE),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -343,11 +330,11 @@ impl PageServerConfigBuilder {
|
||||
self.concurrent_tenant_size_logical_size_queries = BuilderValue::Set(u);
|
||||
}
|
||||
|
||||
pub fn testing_mode(&mut self, testing_mode: bool) {
|
||||
self.testing_mode = BuilderValue::Set(testing_mode);
|
||||
}
|
||||
|
||||
pub fn build(self) -> anyhow::Result<PageServerConf> {
|
||||
let broker_endpoints = self
|
||||
.broker_endpoints
|
||||
.ok_or(anyhow!("No broker endpoints provided"))?;
|
||||
|
||||
Ok(PageServerConf {
|
||||
listen_pg_addr: self
|
||||
.listen_pg_addr
|
||||
@@ -383,9 +370,7 @@ impl PageServerConfigBuilder {
|
||||
profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
|
||||
// TenantConf is handled separately
|
||||
default_tenant_conf: TenantConf::default(),
|
||||
broker_endpoints: self
|
||||
.broker_endpoints
|
||||
.ok_or(anyhow!("No broker endpoints provided"))?,
|
||||
broker_endpoints,
|
||||
broker_etcd_prefix: self
|
||||
.broker_etcd_prefix
|
||||
.ok_or(anyhow!("missing broker_etcd_prefix"))?,
|
||||
@@ -395,7 +380,6 @@ impl PageServerConfigBuilder {
|
||||
.ok_or(anyhow!(
|
||||
"missing concurrent_tenant_size_logical_size_queries"
|
||||
))?,
|
||||
testing_mode: self.testing_mode.ok_or(anyhow!("missing testing_mode"))?,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -418,10 +402,6 @@ impl PageServerConf {
|
||||
.join(TENANT_ATTACHING_MARKER_FILENAME)
|
||||
}
|
||||
|
||||
pub fn tenant_ignore_mark_file_path(&self, tenant_id: TenantId) -> PathBuf {
|
||||
self.tenant_path(&tenant_id).join(IGNORED_TENANT_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Points to a place in pageserver's local directory,
|
||||
/// where certain tenant's tenantconf file should be located.
|
||||
pub fn tenant_config_path(&self, tenant_id: TenantId) -> PathBuf {
|
||||
@@ -470,28 +450,6 @@ impl PageServerConf {
|
||||
.join(METADATA_FILE_NAME)
|
||||
}
|
||||
|
||||
/// Files on the remote storage are stored with paths, relative to the workdir.
|
||||
/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
|
||||
///
|
||||
/// Errors if the path provided does not start from pageserver's workdir.
|
||||
pub fn remote_path(&self, local_path: &Path) -> anyhow::Result<RemotePath> {
|
||||
local_path
|
||||
.strip_prefix(&self.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
local_path, self.workdir
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Turns storage remote path of a file into its local path.
|
||||
pub fn local_path(&self, remote_path: &RemotePath) -> PathBuf {
|
||||
remote_path.with_base(&self.workdir)
|
||||
}
|
||||
|
||||
//
|
||||
// Postgres distribution paths
|
||||
//
|
||||
@@ -528,7 +486,7 @@ impl PageServerConf {
|
||||
let mut builder = PageServerConfigBuilder::default();
|
||||
builder.workdir(workdir.to_owned());
|
||||
|
||||
let mut t_conf = TenantConfOpt::default();
|
||||
let mut t_conf: TenantConfOpt = Default::default();
|
||||
|
||||
for (key, item) in toml.iter() {
|
||||
match key {
|
||||
@@ -576,7 +534,6 @@ impl PageServerConf {
|
||||
let permits = NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?;
|
||||
ConfigurableSemaphore::new(permits)
|
||||
}),
|
||||
"testing_mode" => builder.testing_mode(parse_toml_bool(key, item)?),
|
||||
_ => bail!("unrecognized pageserver option '{key}'"),
|
||||
}
|
||||
}
|
||||
@@ -660,10 +617,6 @@ impl PageServerConf {
|
||||
if let Some(max_lsn_wal_lag) = item.get("max_lsn_wal_lag") {
|
||||
t_conf.max_lsn_wal_lag = Some(parse_toml_from_str("max_lsn_wal_lag", max_lsn_wal_lag)?);
|
||||
}
|
||||
if let Some(trace_read_requests) = item.get("trace_read_requests") {
|
||||
t_conf.trace_read_requests =
|
||||
Some(parse_toml_bool("trace_read_requests", trace_read_requests)?);
|
||||
}
|
||||
|
||||
Ok(t_conf)
|
||||
}
|
||||
@@ -696,7 +649,6 @@ impl PageServerConf {
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -710,11 +662,6 @@ fn parse_toml_string(name: &str, item: &Item) -> Result<String> {
|
||||
Ok(s.to_string())
|
||||
}
|
||||
|
||||
fn parse_toml_bool(name: &str, item: &Item) -> Result<bool> {
|
||||
item.as_bool()
|
||||
.with_context(|| format!("configure option {name} is not a boolean"))
|
||||
}
|
||||
|
||||
fn parse_toml_u64(name: &str, item: &Item) -> Result<u64> {
|
||||
// A toml integer is signed, so it cannot represent the full range of an u64. That's OK
|
||||
// for our use, though.
|
||||
@@ -891,7 +838,6 @@ log_format = 'json'
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: defaults::DEFAULT_TESTING_MODE,
|
||||
},
|
||||
"Correct defaults should be used when no config values are provided"
|
||||
);
|
||||
@@ -938,7 +884,6 @@ log_format = 'json'
|
||||
broker_etcd_prefix: etcd_broker::DEFAULT_NEON_BROKER_ETCD_PREFIX.to_string(),
|
||||
log_format: LogFormat::Json,
|
||||
concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
|
||||
testing_mode: defaults::DEFAULT_TESTING_MODE,
|
||||
},
|
||||
"Should be able to parse all basic config values correctly"
|
||||
);
|
||||
@@ -1071,35 +1016,6 @@ broker_endpoints = ['{broker_endpoint}']
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tenant_config() -> anyhow::Result<()> {
|
||||
let tempdir = tempdir()?;
|
||||
let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
|
||||
|
||||
let broker_endpoint = "http://127.0.0.1:7777";
|
||||
let trace_read_requests = true;
|
||||
|
||||
let config_string = format!(
|
||||
r#"{ALL_BASE_VALUES_TOML}
|
||||
pg_distrib_dir='{}'
|
||||
broker_endpoints = ['{broker_endpoint}']
|
||||
|
||||
[tenant_config]
|
||||
trace_read_requests = {trace_read_requests}"#,
|
||||
pg_distrib_dir.display(),
|
||||
);
|
||||
|
||||
let toml = config_string.parse()?;
|
||||
|
||||
let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
|
||||
assert_eq!(
|
||||
conf.default_tenant_conf.trace_read_requests, trace_read_requests,
|
||||
"Tenant config from pageserver config file should be parsed and udpated values used as defaults for all tenants",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> {
|
||||
let tempdir_path = tempdir.path();
|
||||
|
||||
|
||||
@@ -274,7 +274,6 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
|
||||
post:
|
||||
description: Schedules attach operation to happen in the background for given tenant
|
||||
responses:
|
||||
@@ -326,9 +325,7 @@ paths:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Remove tenant data (including all corresponding timelines) from pageserver's memory and file system.
|
||||
Files on the remote storage are not affected.
|
||||
description: Detach local tenant
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant detached
|
||||
@@ -357,92 +354,6 @@ paths:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/ignore:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Remove tenant data (including all corresponding timelines) from pageserver's memory.
|
||||
Files on local disk and remote storage are not affected.
|
||||
|
||||
Future pageserver restarts won't load the data back until `load` is called on such tenant.
|
||||
responses:
|
||||
"200":
|
||||
description: Tenant ignored
|
||||
"400":
|
||||
description: Error when no tenant id found in path parameters
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/load:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
in: path
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
format: hex
|
||||
post:
|
||||
description: |
|
||||
Schedules an operation that attempts to load a tenant from the local disk and
|
||||
synchronise it with the remote storage (if enabled), repeating pageserver's restart logic for tenant load.
|
||||
If the tenant was ignored before, removes the ignore mark and continues with load scheduling.
|
||||
|
||||
Errors if the tenant is absent on disk, already present in memory or fails to schedule its load.
|
||||
Scheduling a load does not mean that the tenant would load successfully, check tenant status to ensure load correctness.
|
||||
responses:
|
||||
"202":
|
||||
description: Tenant scheduled to load successfully
|
||||
"400":
|
||||
description: Error when no tenant id found in path parameters
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
"401":
|
||||
description: Unauthorized Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/UnauthorizedError"
|
||||
"403":
|
||||
description: Forbidden Error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/ForbiddenError"
|
||||
"500":
|
||||
description: Generic operation error
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: "#/components/schemas/Error"
|
||||
|
||||
/v1/tenant/{tenant_id}/size:
|
||||
parameters:
|
||||
- name: tenant_id
|
||||
@@ -748,6 +659,7 @@ components:
|
||||
- tenant_id
|
||||
- last_record_lsn
|
||||
- disk_consistent_lsn
|
||||
- awaits_download
|
||||
- state
|
||||
- latest_gc_cutoff_lsn
|
||||
properties:
|
||||
@@ -790,6 +702,8 @@ components:
|
||||
format: hex
|
||||
last_received_msg_ts:
|
||||
type: integer
|
||||
awaits_download:
|
||||
type: boolean
|
||||
state:
|
||||
type: string
|
||||
latest_gc_cutoff_lsn:
|
||||
|
||||
@@ -3,18 +3,18 @@ use std::sync::Arc;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use hyper::StatusCode;
|
||||
use hyper::{Body, Request, Response, Uri};
|
||||
use pageserver_api::models::TenantState;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio::task::JoinError;
|
||||
use tracing::*;
|
||||
|
||||
use super::models::{
|
||||
ConfigureFailpointsRequest, LocalTimelineInfo, RemoteTimelineInfo, StatusResponse,
|
||||
TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
|
||||
TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
|
||||
LocalTimelineInfo, RemoteTimelineInfo, StatusResponse, TenantConfigRequest,
|
||||
TenantCreateRequest, TenantCreateResponse, TenantInfo, TimelineCreateRequest, TimelineInfo,
|
||||
};
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
use crate::tenant::Timeline;
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::CheckpointConfig;
|
||||
use crate::{config::PageServerConf, tenant_mgr};
|
||||
use utils::{
|
||||
auth::JwtAuth,
|
||||
@@ -29,6 +29,12 @@ use utils::{
|
||||
lsn::Lsn,
|
||||
};
|
||||
|
||||
// Imports only used for testing APIs
|
||||
#[cfg(feature = "testing")]
|
||||
use super::models::{ConfigureFailpointsRequest, TimelineGcRequest};
|
||||
#[cfg(feature = "testing")]
|
||||
use crate::CheckpointConfig;
|
||||
|
||||
struct State {
|
||||
conf: &'static PageServerConf,
|
||||
auth: Option<Arc<JwtAuth>>,
|
||||
@@ -76,11 +82,12 @@ fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Res
|
||||
|
||||
// Helper function to construct a TimelineInfo struct for a timeline
|
||||
fn build_timeline_info(
|
||||
tenant_state: TenantState,
|
||||
timeline: &Arc<Timeline>,
|
||||
include_non_incremental_logical_size: bool,
|
||||
include_non_incremental_physical_size: bool,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
let mut info = build_timeline_info_common(timeline)?;
|
||||
let mut info = build_timeline_info_common(tenant_state, timeline)?;
|
||||
if include_non_incremental_logical_size {
|
||||
info.current_logical_size_non_incremental =
|
||||
Some(timeline.get_current_logical_size_non_incremental(info.last_record_lsn)?);
|
||||
@@ -92,7 +99,10 @@ fn build_timeline_info(
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<TimelineInfo> {
|
||||
fn build_timeline_info_common(
|
||||
tenant_state: TenantState,
|
||||
timeline: &Arc<Timeline>,
|
||||
) -> anyhow::Result<TimelineInfo> {
|
||||
let last_record_lsn = timeline.get_last_record_lsn();
|
||||
let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
|
||||
let guard = timeline.last_received_wal.lock().unwrap();
|
||||
@@ -144,6 +154,10 @@ fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<Timeli
|
||||
|
||||
state,
|
||||
|
||||
// XXX bring back tracking of downloads per timeline, or, introduce
|
||||
// an 'Attaching' state for the timeline and get rid of this field.
|
||||
awaits_download: tenant_state == TenantState::Attaching,
|
||||
|
||||
// Duplicate some fields in 'local' and 'remote' fields, for backwards-compatility
|
||||
// with the control plane.
|
||||
local: LocalTimelineInfo {
|
||||
@@ -175,9 +189,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
.new_timeline_id
|
||||
.unwrap_or_else(TimelineId::generate);
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
match tenant.create_timeline(
|
||||
new_timeline_id,
|
||||
request_data.ancestor_timeline_id.map(TimelineId::from),
|
||||
@@ -188,7 +200,7 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
|
||||
.await {
|
||||
Ok(Some(new_timeline)) => {
|
||||
// Created. Construct a TimelineInfo for it.
|
||||
let timeline_info = build_timeline_info_common(&new_timeline)
|
||||
let timeline_info = build_timeline_info_common(tenant.current_state(), &new_timeline)
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
json_response(StatusCode::CREATED, timeline_info)
|
||||
}
|
||||
@@ -205,29 +217,26 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
query_param_present(&request, "include-non-incremental-physical-size");
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let response_data = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let timelines = tenant.list_timelines();
|
||||
let _entered = info_span!("timeline_list", tenant = %tenant_id).entered();
|
||||
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let (tenant_state, timelines) = {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
(tenant.current_state(), tenant.list_timelines())
|
||||
};
|
||||
|
||||
response_data.push(timeline_info);
|
||||
}
|
||||
let mut response_data = Vec::with_capacity(timelines.len());
|
||||
for timeline in timelines {
|
||||
let timeline_info = build_timeline_info(
|
||||
tenant_state,
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
)
|
||||
.context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
Ok(response_data)
|
||||
response_data.push(timeline_info);
|
||||
}
|
||||
.instrument(info_span!("timeline_list", tenant = %tenant_id))
|
||||
.await?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
@@ -272,15 +281,20 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let timeline_info = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let (tenant_state, timeline) = tokio::task::spawn_blocking(move || {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
Ok((
|
||||
tenant.current_state(),
|
||||
tenant.get_timeline(timeline_id, false),
|
||||
))
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, false)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let timeline = timeline.map_err(ApiError::NotFound)?;
|
||||
|
||||
let timeline_info = build_timeline_info(
|
||||
tenant_state,
|
||||
&timeline,
|
||||
include_non_incremental_logical_size,
|
||||
include_non_incremental_physical_size,
|
||||
@@ -308,7 +322,6 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
|
||||
let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
|
||||
|
||||
let timeline = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.and_then(|tenant| tenant.get_timeline(timeline_id, true))
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let result = match timeline
|
||||
@@ -334,13 +347,13 @@ async fn tenant_attach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
|
||||
if let Some(remote_storage) = &state.remote_storage {
|
||||
// FIXME: distinguish between "Tenant already exists" and other errors
|
||||
tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage.clone())
|
||||
tenant_mgr::attach_tenant(state.conf, tenant_id, remote_storage)
|
||||
.instrument(info_span!("tenant_attach", tenant = %tenant_id))
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
} else {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
"attach_tenant is not possible because pageserver was configured without remote storage"
|
||||
"attach_tenant is possible because pageserver was configured without remote storage"
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -379,49 +392,23 @@ async fn tenant_detach_handler(request: Request<Body>) -> Result<Response<Body>,
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_load_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::load_tenant(state.conf, tenant_id, state.remote_storage.clone())
|
||||
.instrument(info_span!("load", tenant = %tenant_id))
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::ACCEPTED, ())
|
||||
}
|
||||
|
||||
async fn tenant_ignore_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let state = get_state(&request);
|
||||
let conf = state.conf;
|
||||
tenant_mgr::ignore_tenant(conf, tenant_id)
|
||||
.instrument(info_span!("ignore_tenant", tenant = %tenant_id))
|
||||
.await
|
||||
// FIXME: Errors from `ignore_tenant` can be caused by both both user and internal errors.
|
||||
// Replace this with better handling once the error type permits it.
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
check_permission(&request, None)?;
|
||||
|
||||
let response_data = tenant_mgr::list_tenants()
|
||||
.instrument(info_span!("tenant_list"))
|
||||
.await
|
||||
.iter()
|
||||
.map(|(id, state)| TenantInfo {
|
||||
id: *id,
|
||||
state: *state,
|
||||
current_physical_size: None,
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>();
|
||||
let response_data = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_list").entered();
|
||||
tenant_mgr::list_tenants()
|
||||
.iter()
|
||||
.map(|(id, state)| TenantInfo {
|
||||
id: *id,
|
||||
state: *state,
|
||||
current_physical_size: None,
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
.collect::<Vec<TenantInfo>>()
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?;
|
||||
|
||||
json_response(StatusCode::OK, response_data)
|
||||
}
|
||||
@@ -430,8 +417,9 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant_info = async {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant_info = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_status_handler", tenant = %tenant_id).entered();
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false)?;
|
||||
|
||||
// Calculate total physical size of all timelines
|
||||
let mut current_physical_size = 0;
|
||||
@@ -440,15 +428,17 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
|
||||
}
|
||||
|
||||
let state = tenant.current_state();
|
||||
Ok(TenantInfo {
|
||||
let tenant_info = TenantInfo {
|
||||
id: tenant_id,
|
||||
state,
|
||||
current_physical_size: Some(current_physical_size),
|
||||
has_in_progress_downloads: Some(state.has_in_progress_downloads()),
|
||||
})
|
||||
}
|
||||
.instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
|
||||
};
|
||||
|
||||
Ok::<_, anyhow::Error>(tenant_info)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))?
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
|
||||
json_response(StatusCode::OK, tenant_info)
|
||||
@@ -458,9 +448,7 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::InternalServerError)?;
|
||||
|
||||
// this can be long operation, it currently is not backed by any request coalescing or similar
|
||||
let inputs = tenant
|
||||
@@ -577,19 +565,22 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
.map(TenantId::from)
|
||||
.unwrap_or_else(TenantId::generate);
|
||||
|
||||
let state = get_state(&request);
|
||||
let new_tenant = tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_create", tenant = ?target_tenant_id).entered();
|
||||
let state = get_state(&request);
|
||||
|
||||
let new_tenant = tenant_mgr::create_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
target_tenant_id,
|
||||
state.remote_storage.clone(),
|
||||
)
|
||||
.instrument(info_span!("tenant_create", tenant = ?target_tenant_id))
|
||||
tenant_mgr::create_tenant(
|
||||
state.conf,
|
||||
tenant_conf,
|
||||
target_tenant_id,
|
||||
state.remote_storage.clone(),
|
||||
)
|
||||
// FIXME: `create_tenant` can fail from both user and internal errors. Replace this
|
||||
// with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)
|
||||
})
|
||||
.await
|
||||
// FIXME: `create_tenant` can fail from both user and internal errors. Replace this
|
||||
// with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
Ok(match new_tenant {
|
||||
Some(tenant) => {
|
||||
@@ -680,17 +671,22 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
|
||||
);
|
||||
}
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
|
||||
.instrument(info_span!("tenant_config", tenant = ?tenant_id))
|
||||
.await
|
||||
// FIXME: `update_tenant_config` can fail because of both user and internal errors.
|
||||
// Replace this `map_err` with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)?;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let _enter = info_span!("tenant_config", tenant = ?tenant_id).entered();
|
||||
|
||||
let state = get_state(&request);
|
||||
tenant_mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
|
||||
// FIXME: `update_tenant_config` can fail because of both user and internal errors.
|
||||
// Replace this `map_err` with better error handling once the type permits it
|
||||
.map_err(ApiError::InternalServerError)
|
||||
})
|
||||
.await
|
||||
.map_err(|e: JoinError| ApiError::InternalServerError(e.into()))??;
|
||||
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
if !fail::has_failpoints() {
|
||||
return Err(ApiError::BadRequest(anyhow!(
|
||||
@@ -724,6 +720,7 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
|
||||
}
|
||||
|
||||
// Run GC immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
@@ -731,7 +728,7 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
|
||||
let gc_req: TimelineGcRequest = json_request(&mut request).await?;
|
||||
|
||||
let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req).await?;
|
||||
let wait_task_done = tenant_mgr::immediate_gc(tenant_id, timeline_id, gc_req)?;
|
||||
let gc_result = wait_task_done
|
||||
.await
|
||||
.context("wait for gc task")
|
||||
@@ -742,14 +739,13 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body
|
||||
}
|
||||
|
||||
// Run compaction immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
@@ -762,14 +758,13 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
|
||||
}
|
||||
|
||||
// Run checkpoint immediately on given timeline.
|
||||
#[cfg(feature = "testing")]
|
||||
async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_id))?;
|
||||
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true)
|
||||
.await
|
||||
.map_err(ApiError::NotFound)?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, true).map_err(ApiError::NotFound)?;
|
||||
let timeline = tenant
|
||||
.get_timeline(timeline_id, true)
|
||||
.map_err(ApiError::NotFound)?;
|
||||
@@ -806,26 +801,22 @@ pub fn make_router(
|
||||
}))
|
||||
}
|
||||
|
||||
// A wrapper around a handler function that returns an error if the server
|
||||
// was not configured with testing_mode enabled. This is used to gate API
|
||||
// functions that should only be used in tests, never in production.
|
||||
macro_rules! testing_api {
|
||||
($handler_desc:literal, $handler:path $(,)?) => {{
|
||||
use futures::FutureExt;
|
||||
|req: Request<Body>| {
|
||||
if conf.testing_mode {
|
||||
$handler(req).left_future()
|
||||
} else {
|
||||
async {
|
||||
Err(ApiError::BadRequest(anyhow!(concat!(
|
||||
"Cannot ",
|
||||
$handler_desc,
|
||||
" because pageserver was configured without testing APIs",
|
||||
))))
|
||||
}
|
||||
.right_future()
|
||||
}
|
||||
#[cfg(not(feature = "testing"))]
|
||||
async fn cfg_disabled(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
|
||||
Err(ApiError::BadRequest(anyhow!(concat!(
|
||||
"Cannot ",
|
||||
$handler_desc,
|
||||
" because pageserver was compiled without testing APIs",
|
||||
))))
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
let handler = $handler;
|
||||
#[cfg(not(feature = "testing"))]
|
||||
let handler = cfg_disabled;
|
||||
handler
|
||||
}};
|
||||
}
|
||||
|
||||
@@ -847,8 +838,6 @@ pub fn make_router(
|
||||
.post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
|
||||
.post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
|
||||
.post("/v1/tenant/:tenant_id/detach", tenant_detach_handler)
|
||||
.post("/v1/tenant/:tenant_id/load", tenant_load_handler)
|
||||
.post("/v1/tenant/:tenant_id/ignore", tenant_ignore_handler)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_id/timeline/:timeline_id",
|
||||
timeline_detail_handler,
|
||||
|
||||
@@ -125,13 +125,6 @@ pub const TEMP_FILE_SUFFIX: &str = "___temp";
|
||||
/// Full path: `tenants/<tenant_id>/timelines/<timeline_id>___uninit`.
|
||||
pub const TIMELINE_UNINIT_MARK_SUFFIX: &str = "___uninit";
|
||||
|
||||
/// A marker file to prevent pageserver from loading a certain tenant on restart.
|
||||
/// Different from [`TIMELINE_UNINIT_MARK_SUFFIX`] due to semantics of the corresponding
|
||||
/// `ignore` management API command, that expects the ignored tenant to be properly loaded
|
||||
/// into pageserver's memory before being ignored.
|
||||
/// Full path: `tenants/<tenant_id>/___ignored_tenant`.
|
||||
pub const IGNORED_TENANT_FILE_NAME: &str = "___ignored_tenant";
|
||||
|
||||
pub fn is_temporary(path: &Path) -> bool {
|
||||
match path.file_name() {
|
||||
Some(name) => name.to_string_lossy().ends_with(TEMP_FILE_SUFFIX),
|
||||
@@ -148,28 +141,6 @@ pub fn is_uninit_mark(path: &Path) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Wrapper around fail::fail_point! macro that returns quickly if testing_mode was
|
||||
/// disabled in the pageserver config. Also enabled in unit tests.
|
||||
///
|
||||
/// fail::fail_point! is fairly quick, but it does acquire an RwLock and perform a HashMap
|
||||
/// lookup. This macro is hopefully cheap enough that we don't need to worry about the
|
||||
/// overhead even in production, and even if the macro is used in hot spots. (This check
|
||||
/// compiles to two cmp instructions; get_unchecked() would shrink it to one.)
|
||||
///
|
||||
#[macro_export]
|
||||
macro_rules! fail_point {
|
||||
($($name:expr),*) => {{
|
||||
if cfg!(test) || *$crate::TESTING_MODE.get().expect("testing_mode not initialized") {
|
||||
fail::fail_point!($($name), *)
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
/// This is set early in the pageserver startup, from the "testing_mode" setting in the
|
||||
/// config file.
|
||||
pub static TESTING_MODE: once_cell::sync::OnceCell<bool> = once_cell::sync::OnceCell::new();
|
||||
|
||||
#[cfg(test)]
|
||||
mod backoff_defaults_tests {
|
||||
use super::*;
|
||||
|
||||
@@ -941,7 +941,7 @@ impl postgres_backend_async::Handler for PageServerHandler {
|
||||
/// ensures that queries don't fail immediately after pageserver startup, because
|
||||
/// all tenants are still loading.
|
||||
async fn get_active_tenant_with_timeout(tenant_id: TenantId) -> Result<Arc<Tenant>> {
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false).await?;
|
||||
let tenant = tenant_mgr::get_tenant(tenant_id, false)?;
|
||||
match tokio::time::timeout(Duration::from_secs(30), tenant.wait_to_become_active()).await {
|
||||
Ok(wait_result) => wait_result
|
||||
// no .context(), the error message is good enough and some tests depend on it
|
||||
|
||||
@@ -202,9 +202,9 @@ use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use anyhow::ensure;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||
use tokio::runtime::Runtime;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{info_span, Instrument};
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
@@ -217,7 +217,7 @@ use crate::metrics::RemoteOpKind;
|
||||
use crate::metrics::REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS;
|
||||
use crate::{
|
||||
config::PageServerConf,
|
||||
storage_sync::index::LayerFileMetadata,
|
||||
storage_sync::index::{LayerFileMetadata, RelativePath},
|
||||
task_mgr,
|
||||
task_mgr::TaskKind,
|
||||
task_mgr::BACKGROUND_RUNTIME,
|
||||
@@ -287,7 +287,7 @@ struct UploadQueueInitialized {
|
||||
|
||||
/// All layer files stored in the remote storage, taking into account all
|
||||
/// in-progress and queued operations
|
||||
latest_files: HashMap<RemotePath, LayerFileMetadata>,
|
||||
latest_files: HashMap<RelativePath, LayerFileMetadata>,
|
||||
|
||||
/// Metadata stored in the remote storage, taking into account all
|
||||
/// in-progress and queued operations.
|
||||
@@ -337,18 +337,18 @@ impl UploadQueue {
|
||||
|
||||
let state = UploadQueueInitialized {
|
||||
// As described in the doc comment, it's ok for `latest_files` and `latest_metadata` to be ahead.
|
||||
latest_files: HashMap::new(),
|
||||
latest_files: Default::default(),
|
||||
latest_metadata: metadata.clone(),
|
||||
// We haven't uploaded anything yet, so, `last_uploaded_consistent_lsn` must be 0 to prevent
|
||||
// safekeepers from garbage-collecting anything.
|
||||
last_uploaded_consistent_lsn: Lsn(0),
|
||||
// what follows are boring default initializations
|
||||
task_counter: 0,
|
||||
task_counter: Default::default(),
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
inprogress_tasks: Default::default(),
|
||||
queued_operations: Default::default(),
|
||||
};
|
||||
|
||||
*self = UploadQueue::Initialized(state);
|
||||
@@ -357,10 +357,6 @@ impl UploadQueue {
|
||||
|
||||
fn initialize_with_current_remote_index_part(
|
||||
&mut self,
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
|
||||
index_part: &IndexPart,
|
||||
) -> anyhow::Result<&mut UploadQueueInitialized> {
|
||||
match self {
|
||||
@@ -370,19 +366,14 @@ impl UploadQueue {
|
||||
}
|
||||
}
|
||||
|
||||
let mut files = HashMap::with_capacity(index_part.timeline_layers.len());
|
||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
||||
for timeline_name in &index_part.timeline_layers {
|
||||
let local_path = timeline_path.join(timeline_name);
|
||||
let remote_timeline_path = conf.remote_path(&local_path).expect(
|
||||
"Remote timeline path and local timeline path were constructed form the same conf",
|
||||
);
|
||||
let mut files = HashMap::new();
|
||||
for path in &index_part.timeline_layers {
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(timeline_name)
|
||||
.get(path)
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
files.insert(remote_timeline_path, layer_metadata);
|
||||
files.insert(path.clone(), layer_metadata);
|
||||
}
|
||||
|
||||
let index_part_metadata = index_part.parse_metadata()?;
|
||||
@@ -400,8 +391,8 @@ impl UploadQueue {
|
||||
num_inprogress_layer_uploads: 0,
|
||||
num_inprogress_metadata_uploads: 0,
|
||||
num_inprogress_deletions: 0,
|
||||
inprogress_tasks: HashMap::new(),
|
||||
queued_operations: VecDeque::new(),
|
||||
inprogress_tasks: Default::default(),
|
||||
queued_operations: Default::default(),
|
||||
};
|
||||
|
||||
*self = UploadQueue::Initialized(state);
|
||||
@@ -465,12 +456,7 @@ impl RemoteTimelineClient {
|
||||
/// The given `index_part` must be the one on the remote.
|
||||
pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {
|
||||
let mut upload_queue = self.upload_queue.lock().unwrap();
|
||||
upload_queue.initialize_with_current_remote_index_part(
|
||||
self.conf,
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
index_part,
|
||||
)?;
|
||||
upload_queue.initialize_with_current_remote_index_part(index_part)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -524,13 +510,15 @@ impl RemoteTimelineClient {
|
||||
/// On success, returns the size of the downloaded file.
|
||||
pub async fn download_layer_file(
|
||||
&self,
|
||||
remote_path: &RemotePath,
|
||||
path: &RelativePath,
|
||||
layer_metadata: &LayerFileMetadata,
|
||||
) -> anyhow::Result<u64> {
|
||||
let downloaded_size = download::download_layer_file(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
remote_path,
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
path,
|
||||
layer_metadata,
|
||||
)
|
||||
.measure_remote_op(
|
||||
@@ -548,13 +536,13 @@ impl RemoteTimelineClient {
|
||||
let new_metadata = LayerFileMetadata::new(downloaded_size);
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
if let Some(upgraded) = upload_queue.latest_files.get_mut(remote_path) {
|
||||
if let Some(upgraded) = upload_queue.latest_files.get_mut(path) {
|
||||
upgraded.merge(&new_metadata);
|
||||
} else {
|
||||
// The file should exist, since we just downloaded it.
|
||||
warn!(
|
||||
"downloaded file {:?} not found in local copy of the index file",
|
||||
remote_path
|
||||
path
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -624,9 +612,14 @@ impl RemoteTimelineClient {
|
||||
"file size not initialized in metadata"
|
||||
);
|
||||
|
||||
let relative_path = RelativePath::from_local_path(
|
||||
&self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
path,
|
||||
)?;
|
||||
|
||||
upload_queue
|
||||
.latest_files
|
||||
.insert(self.conf.remote_path(path)?, layer_metadata.clone());
|
||||
.insert(relative_path, layer_metadata.clone());
|
||||
|
||||
let op = UploadOp::UploadLayer(PathBuf::from(path), layer_metadata.clone());
|
||||
self.update_upload_queue_unfinished_metric(1, &op);
|
||||
@@ -648,10 +641,13 @@ impl RemoteTimelineClient {
|
||||
let mut guard = self.upload_queue.lock().unwrap();
|
||||
let upload_queue = guard.initialized_mut()?;
|
||||
|
||||
// Convert the paths into RemotePaths, and gather other information we need.
|
||||
let mut remote_paths = Vec::with_capacity(paths.len());
|
||||
// Convert the paths into RelativePaths, and gather other information we need.
|
||||
let mut relative_paths = Vec::with_capacity(paths.len());
|
||||
for path in paths {
|
||||
remote_paths.push(self.conf.remote_path(path)?);
|
||||
relative_paths.push(RelativePath::from_local_path(
|
||||
&self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
|
||||
path,
|
||||
)?);
|
||||
}
|
||||
|
||||
// Deleting layers doesn't affect the values stored in TimelineMetadata,
|
||||
@@ -667,8 +663,8 @@ impl RemoteTimelineClient {
|
||||
// from latest_files, but not yet scheduled for deletion. Use a closure
|
||||
// to syntactically forbid ? or bail! calls here.
|
||||
let no_bail_here = || {
|
||||
for remote_path in remote_paths {
|
||||
upload_queue.latest_files.remove(&remote_path);
|
||||
for relative_path in relative_paths {
|
||||
upload_queue.latest_files.remove(&relative_path);
|
||||
}
|
||||
|
||||
let index_part = IndexPart::new(
|
||||
@@ -842,19 +838,14 @@ impl RemoteTimelineClient {
|
||||
|
||||
let upload_result: anyhow::Result<()> = match &task.op {
|
||||
UploadOp::UploadLayer(ref path, ref layer_metadata) => {
|
||||
upload::upload_timeline_layer(
|
||||
self.conf,
|
||||
&self.storage_impl,
|
||||
path,
|
||||
layer_metadata,
|
||||
)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
)
|
||||
.await
|
||||
upload::upload_timeline_layer(&self.storage_impl, path, layer_metadata)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
RemoteOpFileKind::Layer,
|
||||
RemoteOpKind::Upload,
|
||||
)
|
||||
.await
|
||||
}
|
||||
UploadOp::UploadMetadata(ref index_part, _lsn) => {
|
||||
upload::upload_index_part(
|
||||
@@ -873,7 +864,7 @@ impl RemoteTimelineClient {
|
||||
.await
|
||||
}
|
||||
UploadOp::Delete(metric_file_kind, ref path) => {
|
||||
delete::delete_layer(self.conf, &self.storage_impl, path)
|
||||
delete::delete_layer(&self.storage_impl, path)
|
||||
.measure_remote_op(
|
||||
self.tenant_id,
|
||||
self.timeline_id,
|
||||
@@ -897,20 +888,10 @@ impl RemoteTimelineClient {
|
||||
Err(e) => {
|
||||
let retries = task.retries.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// uploads may fail due to rate limts (IAM, S3) or spurious network and external errors
|
||||
// such issues are relatively regular, so don't use WARN or ERROR to avoid alerting
|
||||
// people and tests until the retries are definitely causing delays.
|
||||
if retries < 3 {
|
||||
info!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
}
|
||||
error!(
|
||||
"failed to perform remote task {}, will retry (attempt {}): {:?}",
|
||||
task.op, retries, e
|
||||
);
|
||||
|
||||
// sleep until it's time to retry, or we're cancelled
|
||||
tokio::select! {
|
||||
@@ -1102,11 +1083,15 @@ mod tests {
|
||||
TimelineMetadata::from_bytes(&metadata.to_bytes().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
fn assert_file_list(a: &HashSet<String>, b: &[&str]) {
|
||||
let mut avec: Vec<&str> = a.iter().map(|a| a.as_str()).collect();
|
||||
fn assert_file_list(a: &HashSet<RelativePath>, b: &[&str]) {
|
||||
let xx = PathBuf::from("");
|
||||
let mut avec: Vec<String> = a
|
||||
.iter()
|
||||
.map(|x| x.to_local_path(&xx).to_string_lossy().into())
|
||||
.collect();
|
||||
avec.sort();
|
||||
|
||||
let mut bvec = b.to_vec();
|
||||
let mut bvec = b.to_owned();
|
||||
bvec.sort_unstable();
|
||||
|
||||
assert_eq!(avec, bvec);
|
||||
@@ -1174,7 +1159,8 @@ mod tests {
|
||||
|
||||
println!("workdir: {}", harness.conf.workdir.display());
|
||||
|
||||
let storage_impl = GenericRemoteStorage::from_config(&storage_config)?;
|
||||
let storage_impl =
|
||||
GenericRemoteStorage::from_config(harness.conf.workdir.clone(), &storage_config)?;
|
||||
let client = Arc::new(RemoteTimelineClient {
|
||||
conf: harness.conf,
|
||||
runtime,
|
||||
|
||||
@@ -5,24 +5,34 @@ use tracing::debug;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
|
||||
pub(super) async fn delete_layer<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
local_layer_path: &'a Path,
|
||||
pub(super) async fn delete_layer(
|
||||
storage: &GenericRemoteStorage,
|
||||
local_layer_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
crate::fail_point!("before-delete-layer", |_| {
|
||||
fail::fail_point!("before-delete-layer", |_| {
|
||||
anyhow::bail!("failpoint before-delete-layer")
|
||||
});
|
||||
debug!("Deleting layer from remote storage: {local_layer_path:?}",);
|
||||
debug!(
|
||||
"Deleting layer from remote storage: {:?}",
|
||||
local_layer_path.display()
|
||||
);
|
||||
|
||||
let path_to_delete = conf.remote_path(local_layer_path)?;
|
||||
let storage_path = storage
|
||||
.remote_object_id(local_layer_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
local_layer_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// XXX: If the deletion fails because the object already didn't exist,
|
||||
// it would be good to just issue a warning but consider it success.
|
||||
// https://github.com/neondatabase/neon/issues/2934
|
||||
storage.delete(&path_to_delete).await.with_context(|| {
|
||||
format!("Failed to delete remote layer from storage at {path_to_delete:?}")
|
||||
storage.delete(&storage_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to delete remote layer from storage at '{:?}'",
|
||||
storage_path
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
@@ -10,11 +10,12 @@ use tracing::debug;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::storage_sync::index::LayerFileMetadata;
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::{DownloadError, GenericRemoteStorage};
|
||||
use utils::crashsafe::path_with_suffix_extension;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
use super::index::IndexPart;
|
||||
use super::RelativePath;
|
||||
|
||||
async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Error> {
|
||||
fs::File::open(path).await?.sync_all().await
|
||||
@@ -28,10 +29,21 @@ async fn fsync_path(path: impl AsRef<std::path::Path>) -> Result<(), std::io::Er
|
||||
pub async fn download_layer_file<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
remote_path: &'a RemotePath,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
path: &'a RelativePath,
|
||||
layer_metadata: &'a LayerFileMetadata,
|
||||
) -> anyhow::Result<u64> {
|
||||
let local_path = conf.local_path(remote_path);
|
||||
let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);
|
||||
|
||||
let local_path = path.to_local_path(&timeline_path);
|
||||
|
||||
let layer_storage_path = storage.remote_object_id(&local_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
local_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Perform a rename inspired by durable_rename from file_utils.c.
|
||||
// The sequence:
|
||||
@@ -52,13 +64,18 @@ pub async fn download_layer_file<'a>(
|
||||
temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
let mut download = storage.download(remote_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a download stream for layer with remote storage path '{remote_path:?}'"
|
||||
)
|
||||
})?;
|
||||
let mut download = storage
|
||||
.download(&layer_storage_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a download stream for layer with remote storage path '{layer_storage_path:?}'"
|
||||
)
|
||||
})?;
|
||||
let bytes_amount = tokio::io::copy(&mut download.download_stream, &mut destination_file).await.with_context(|| {
|
||||
format!("Failed to download layer with remote storage path '{remote_path:?}' into file {temp_file_path:?}")
|
||||
format!(
|
||||
"Failed to download layer with remote storage path '{layer_storage_path:?}' into file '{}'", temp_file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
// Tokio doc here: https://docs.rs/tokio/1.17.0/tokio/fs/struct.File.html states that:
|
||||
@@ -97,7 +114,7 @@ pub async fn download_layer_file<'a>(
|
||||
})?;
|
||||
drop(destination_file);
|
||||
|
||||
crate::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
fail::fail_point!("remote-storage-download-pre-rename", |_| {
|
||||
bail!("remote-storage-download-pre-rename failpoint triggered")
|
||||
});
|
||||
|
||||
@@ -134,7 +151,12 @@ pub async fn list_remote_timelines<'a>(
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<Vec<(TimelineId, IndexPart)>> {
|
||||
let tenant_path = conf.timelines_path(&tenant_id);
|
||||
let tenant_storage_path = conf.remote_path(&tenant_path)?;
|
||||
let tenant_storage_path = storage.remote_object_id(&tenant_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get tenant storage path for local path '{}'",
|
||||
tenant_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let timelines = storage
|
||||
.list_prefixes(Some(&tenant_storage_path))
|
||||
@@ -196,8 +218,14 @@ pub async fn download_index_part(
|
||||
let index_part_path = conf
|
||||
.metadata_path(timeline_id, tenant_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let part_storage_path = conf
|
||||
.remote_path(&index_part_path)
|
||||
let part_storage_path = storage
|
||||
.remote_object_id(&index_part_path)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to get the index part storage path for local path '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::BadInput)?;
|
||||
|
||||
let mut index_part_download = storage.download(&part_storage_path).await?;
|
||||
@@ -208,12 +236,20 @@ pub async fn download_index_part(
|
||||
&mut index_part_bytes,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to download an index part into file {index_part_path:?}"))
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to download an index part into file '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
|
||||
.with_context(|| {
|
||||
format!("Failed to deserialize index part file into file {index_part_path:?}")
|
||||
format!(
|
||||
"Failed to deserialize index part file into file '{}'",
|
||||
index_part_path.display()
|
||||
)
|
||||
})
|
||||
.map_err(DownloadError::Other)?;
|
||||
|
||||
|
||||
@@ -2,9 +2,12 @@
|
||||
//! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about
|
||||
//! remote timeline layers and its metadata.
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use remote_storage::RemotePath;
|
||||
use anyhow::{Context, Ok};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_with::{serde_as, DisplayFromStr};
|
||||
|
||||
@@ -12,6 +15,33 @@ use crate::tenant::metadata::TimelineMetadata;
|
||||
|
||||
use utils::lsn::Lsn;
|
||||
|
||||
/// A part of the filesystem path, that needs a root to become a path again.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct RelativePath(String);
|
||||
|
||||
impl RelativePath {
|
||||
/// Attempts to strip off the base from path, producing a relative path or an error.
|
||||
pub fn from_local_path(timeline_path: &Path, path: &Path) -> anyhow::Result<RelativePath> {
|
||||
let relative = path.strip_prefix(timeline_path).with_context(|| {
|
||||
format!(
|
||||
"path '{}' is not relative to base '{}'",
|
||||
path.display(),
|
||||
timeline_path.display()
|
||||
)
|
||||
})?;
|
||||
Ok(Self::from_filename(relative))
|
||||
}
|
||||
|
||||
pub fn from_filename(path: &Path) -> RelativePath {
|
||||
RelativePath(path.to_string_lossy().to_string())
|
||||
}
|
||||
|
||||
pub fn to_local_path(&self, timeline_path: &Path) -> PathBuf {
|
||||
timeline_path.join(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// Metadata gathered for each of the layer files.
|
||||
///
|
||||
/// Fields have to be `Option`s because remote [`IndexPart`]'s can be from different version, which
|
||||
@@ -67,22 +97,21 @@ pub struct IndexPart {
|
||||
#[serde(default)]
|
||||
version: usize,
|
||||
|
||||
/// Layer names, which are stored on the remote storage.
|
||||
/// Each of the layers present on remote storage.
|
||||
///
|
||||
/// Additional metadata can might exist in `layer_metadata`.
|
||||
pub timeline_layers: HashSet<String>,
|
||||
pub timeline_layers: HashSet<RelativePath>,
|
||||
|
||||
/// FIXME: unused field. This should be removed, but that changes the on-disk format,
|
||||
/// so we need to make sure we're backwards-` (and maybe forwards-) compatible
|
||||
/// First pass is to move it to Optional and the next would be its removal
|
||||
missing_layers: Option<HashSet<String>>,
|
||||
/// so we need to make sure we're backwards- (and maybe forwards-) compatible
|
||||
missing_layers: HashSet<RelativePath>,
|
||||
|
||||
/// Per layer file name metadata, which can be present for a present or missing layer file.
|
||||
/// Per layer file metadata, which can be present for a present or missing layer file.
|
||||
///
|
||||
/// Older versions of `IndexPart` will not have this property or have only a part of metadata
|
||||
/// that latest version stores.
|
||||
#[serde(default)]
|
||||
pub layer_metadata: HashMap<String, IndexLayerMetadata>,
|
||||
pub layer_metadata: HashMap<RelativePath, IndexLayerMetadata>,
|
||||
|
||||
// 'disk_consistent_lsn' is a copy of the 'disk_consistent_lsn' in the metadata.
|
||||
// It's duplicated here for convenience.
|
||||
@@ -100,29 +129,23 @@ impl IndexPart {
|
||||
pub const FILE_NAME: &'static str = "index_part.json";
|
||||
|
||||
pub fn new(
|
||||
layers_and_metadata: HashMap<RemotePath, LayerFileMetadata>,
|
||||
layers_and_metadata: HashMap<RelativePath, LayerFileMetadata>,
|
||||
disk_consistent_lsn: Lsn,
|
||||
metadata_bytes: Vec<u8>,
|
||||
) -> Self {
|
||||
let mut timeline_layers = HashSet::with_capacity(layers_and_metadata.len());
|
||||
let mut layer_metadata = HashMap::with_capacity(layers_and_metadata.len());
|
||||
let mut timeline_layers = HashSet::new();
|
||||
let mut layer_metadata = HashMap::new();
|
||||
|
||||
for (remote_path, metadata) in &layers_and_metadata {
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
match remote_path.object_name() {
|
||||
Some(layer_name) => {
|
||||
timeline_layers.insert(layer_name.to_owned());
|
||||
layer_metadata.insert(layer_name.to_owned(), metadata);
|
||||
}
|
||||
// TODO move this on a type level: we know, that every layer entry does have a name
|
||||
None => panic!("Layer {remote_path:?} has no file name, skipping"),
|
||||
}
|
||||
}
|
||||
separate_paths_and_metadata(
|
||||
&layers_and_metadata,
|
||||
&mut timeline_layers,
|
||||
&mut layer_metadata,
|
||||
);
|
||||
|
||||
Self {
|
||||
version: Self::LATEST_VERSION,
|
||||
timeline_layers,
|
||||
missing_layers: Some(HashSet::new()),
|
||||
missing_layers: HashSet::new(),
|
||||
layer_metadata,
|
||||
disk_consistent_lsn,
|
||||
metadata_bytes,
|
||||
@@ -148,6 +171,18 @@ impl From<&'_ LayerFileMetadata> for IndexLayerMetadata {
|
||||
}
|
||||
}
|
||||
|
||||
fn separate_paths_and_metadata(
|
||||
input: &HashMap<RelativePath, LayerFileMetadata>,
|
||||
output: &mut HashSet<RelativePath>,
|
||||
layer_metadata: &mut HashMap<RelativePath, IndexLayerMetadata>,
|
||||
) {
|
||||
for (path, metadata) in input {
|
||||
let metadata = IndexLayerMetadata::from(metadata);
|
||||
layer_metadata.insert(path.clone(), metadata);
|
||||
output.insert(path.clone());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -163,8 +198,8 @@ mod tests {
|
||||
|
||||
let expected = IndexPart {
|
||||
version: 0,
|
||||
timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
|
||||
missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::default(),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
@@ -191,13 +226,13 @@ mod tests {
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::from([String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9")]),
|
||||
missing_layers: Some(HashSet::from([String::from("not_a_real_layer_but_adding_coverage")])),
|
||||
timeline_layers: [RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned())].into_iter().collect(),
|
||||
missing_layers: [RelativePath("not_a_real_layer_but_adding_coverage".to_owned())].into_iter().collect(),
|
||||
layer_metadata: HashMap::from([
|
||||
(String::from("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"), IndexLayerMetadata {
|
||||
(RelativePath("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_owned()), IndexLayerMetadata {
|
||||
file_size: Some(25600000),
|
||||
}),
|
||||
(String::from("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
|
||||
(RelativePath("not_a_real_layer_but_adding_coverage".to_owned()), IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: Some(9007199254741001),
|
||||
@@ -210,46 +245,4 @@ mod tests {
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v1_indexpart_is_parsed_with_optional_missing_layers() {
|
||||
let example = r#"{
|
||||
"version":1,
|
||||
"timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
|
||||
"layer_metadata":{
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
|
||||
"not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
|
||||
},
|
||||
"disk_consistent_lsn":"0/16960E8",
|
||||
"metadata_bytes":[112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
|
||||
}"#;
|
||||
|
||||
let expected = IndexPart {
|
||||
// note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
|
||||
version: 1,
|
||||
timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string()]),
|
||||
layer_metadata: HashMap::from([
|
||||
(
|
||||
"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".to_string(),
|
||||
IndexLayerMetadata {
|
||||
file_size: Some(25600000),
|
||||
}
|
||||
),
|
||||
(
|
||||
"not_a_real_layer_but_adding_coverage".to_string(),
|
||||
IndexLayerMetadata {
|
||||
// serde_json should always parse this but this might be a double with jq for
|
||||
// example.
|
||||
file_size: Some(9007199254741001),
|
||||
}
|
||||
)
|
||||
]),
|
||||
disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
|
||||
metadata_bytes: [112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
|
||||
missing_layers: None,
|
||||
};
|
||||
|
||||
let part = serde_json::from_str::<IndexPart>(example).unwrap();
|
||||
assert_eq!(part, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Helper functions to upload files to remote storage with a RemoteStorage
|
||||
|
||||
use anyhow::{bail, Context};
|
||||
use fail::fail_point;
|
||||
use std::path::Path;
|
||||
use tokio::fs;
|
||||
|
||||
use super::index::IndexPart;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::fail_point;
|
||||
use crate::storage_sync::LayerFileMetadata;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
@@ -30,9 +30,12 @@ pub(super) async fn upload_index_part<'a>(
|
||||
let index_part_path = conf
|
||||
.metadata_path(timeline_id, tenant_id)
|
||||
.with_file_name(IndexPart::FILE_NAME);
|
||||
let storage_path = conf.remote_path(&index_part_path)?;
|
||||
storage
|
||||
.upload_storage_object(Box::new(index_part_bytes), index_part_size, &storage_path)
|
||||
.upload_storage_object(
|
||||
Box::new(index_part_bytes),
|
||||
index_part_size,
|
||||
&index_part_path,
|
||||
)
|
||||
.await
|
||||
.with_context(|| format!("Failed to upload index part for '{tenant_id} / {timeline_id}'"))
|
||||
}
|
||||
@@ -41,26 +44,36 @@ pub(super) async fn upload_index_part<'a>(
|
||||
/// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload.
|
||||
///
|
||||
/// On an error, bumps the retries count and reschedules the entire task.
|
||||
pub(super) async fn upload_timeline_layer<'a>(
|
||||
conf: &'static PageServerConf,
|
||||
storage: &'a GenericRemoteStorage,
|
||||
source_path: &'a Path,
|
||||
known_metadata: &'a LayerFileMetadata,
|
||||
pub(super) async fn upload_timeline_layer(
|
||||
storage: &GenericRemoteStorage,
|
||||
source_path: &Path,
|
||||
known_metadata: &LayerFileMetadata,
|
||||
) -> anyhow::Result<()> {
|
||||
fail_point!("before-upload-layer", |_| {
|
||||
bail!("failpoint before-upload-layer")
|
||||
});
|
||||
let storage_path = conf.remote_path(source_path)?;
|
||||
let storage_path = storage.remote_object_id(source_path).with_context(|| {
|
||||
format!(
|
||||
"Failed to get the layer storage path for local path '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let source_file = fs::File::open(&source_path)
|
||||
.await
|
||||
.with_context(|| format!("Failed to open a source file for layer {source_path:?}"))?;
|
||||
let source_file = fs::File::open(&source_path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to open a source file for layer '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let fs_size = source_file
|
||||
.metadata()
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to get the source file metadata for layer {source_path:?}")
|
||||
format!(
|
||||
"Failed to get the source file metadata for layer '{}'",
|
||||
source_path.display()
|
||||
)
|
||||
})?
|
||||
.len();
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ use std::time::{Duration, Instant};
|
||||
|
||||
use self::metadata::TimelineMetadata;
|
||||
use crate::config::PageServerConf;
|
||||
use crate::fail_point;
|
||||
use crate::import_datadir;
|
||||
use crate::is_uninit_mark;
|
||||
use crate::metrics::{remove_tenant_metrics, STORAGE_TIME};
|
||||
@@ -81,6 +80,8 @@ pub mod filename;
|
||||
mod image_layer;
|
||||
mod inmemory_layer;
|
||||
pub mod layer_map;
|
||||
pub mod bst_layer_map;
|
||||
pub mod segment_tree_layer_map;
|
||||
|
||||
pub mod metadata;
|
||||
mod par_fsync;
|
||||
@@ -256,7 +257,7 @@ impl UninitializedTimeline<'_> {
|
||||
// Thus spawning flush loop manually and skipping flush_loop setup in initialize_with_lock
|
||||
raw_timeline.maybe_spawn_flush_loop();
|
||||
|
||||
fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
fail::fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
bail!("failpoint before-checkpoint-new-timeline");
|
||||
});
|
||||
|
||||
@@ -572,7 +573,7 @@ impl Tenant {
|
||||
pub fn spawn_attach(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
) -> Arc<Tenant> {
|
||||
// XXX: Attach should provide the config, especially during tenant migration.
|
||||
// See https://github.com/neondatabase/neon/issues/1555
|
||||
@@ -585,7 +586,7 @@ impl Tenant {
|
||||
tenant_conf,
|
||||
wal_redo_manager,
|
||||
tenant_id,
|
||||
Some(remote_storage),
|
||||
Some(remote_storage.clone()),
|
||||
));
|
||||
|
||||
// Do all the hard work in the background
|
||||
@@ -783,7 +784,7 @@ impl Tenant {
|
||||
let tenant_conf = match Self::load_tenant_config(conf, tenant_id) {
|
||||
Ok(conf) => conf,
|
||||
Err(e) => {
|
||||
error!("load tenant config failed: {:?}", e);
|
||||
error!("load tenant config failed: {}", e);
|
||||
return Tenant::create_broken_tenant(conf, tenant_id);
|
||||
}
|
||||
};
|
||||
@@ -2099,7 +2100,7 @@ impl Tenant {
|
||||
// Thus spawn flush loop manually and skip flush_loop setup in initialize_with_lock
|
||||
unfinished_timeline.maybe_spawn_flush_loop();
|
||||
|
||||
fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
fail::fail_point!("before-checkpoint-new-timeline", |_| {
|
||||
anyhow::bail!("failpoint before-checkpoint-new-timeline");
|
||||
});
|
||||
|
||||
@@ -2193,7 +2194,7 @@ impl Tenant {
|
||||
.context("Failed to create timeline data structure")?;
|
||||
crashsafe::create_dir_all(timeline_path).context("Failed to create timeline directory")?;
|
||||
|
||||
fail_point!("after-timeline-uninit-mark-creation", |_| {
|
||||
fail::fail_point!("after-timeline-uninit-mark-creation", |_| {
|
||||
anyhow::bail!("failpoint after-timeline-uninit-mark-creation");
|
||||
});
|
||||
|
||||
@@ -2382,7 +2383,7 @@ fn try_create_target_tenant_dir(
|
||||
temporary_tenant_timelines_dir.display()
|
||||
)
|
||||
})?;
|
||||
fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
fail::fail_point!("tenant-creation-before-tmp-rename", |_| {
|
||||
anyhow::bail!("failpoint tenant-creation-before-tmp-rename");
|
||||
});
|
||||
|
||||
@@ -2670,7 +2671,7 @@ pub mod harness {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
_pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
|
||||
111
pageserver/src/tenant/bst_layer_map.rs
Normal file
111
pageserver/src/tenant/bst_layer_map.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
// TODO the `im` crate has 20x more downloads and also has persistent
|
||||
// BTree. See if it's better. (What's "fully persistent?")
|
||||
use rpds::RedBlackTreeMap;
|
||||
|
||||
|
||||
/// Layer map implemented using persistent binary search tree.
|
||||
/// This implementation is only good enough to run benchmarks,
|
||||
/// so it's missing unnecessary details. Values are String for now.
|
||||
pub struct BSTLM {
|
||||
/// Mapping key to the latest layer (if any) until the next key
|
||||
head: RedBlackTreeMap<i128, Option<String>>,
|
||||
|
||||
/// All previous states of `self.head`
|
||||
historic: BTreeMap<u64, RedBlackTreeMap<i128, Option<String>>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for BSTLM {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let head_vec: Vec<_> = self.head.iter().collect();
|
||||
write!(f, "BSTLM: head: {:?}", head_vec)
|
||||
}
|
||||
}
|
||||
|
||||
impl BSTLM {
|
||||
pub fn new() -> Self {
|
||||
BSTLM {
|
||||
head: RedBlackTreeMap::default(),
|
||||
historic: BTreeMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(self: &mut Self, key_begin: i128, key_end: i128, lsn: u64, value: String) {
|
||||
// TODO check for off-by-one errors
|
||||
|
||||
// It's a persistent map, not a retroactive one
|
||||
if let Some(last_entry) = self.historic.iter().rev().next() {
|
||||
let last_lsn = last_entry.0;
|
||||
if lsn == *last_lsn {
|
||||
// TODO there are edge cases to take care of
|
||||
}
|
||||
if lsn < *last_lsn {
|
||||
todo!("smaller lsn not implemented yet")
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE The order of the following lines is important!!
|
||||
|
||||
// Preserve information after right endpoint
|
||||
let value_at_end = match self.head.range(0..key_end).last() {
|
||||
Some((_, Some(v))) => Some(v.clone()),
|
||||
Some((_, None)) => None,
|
||||
None => None,
|
||||
};
|
||||
self.head.insert_mut(key_end, value_at_end);
|
||||
|
||||
// Insert the left endpoint
|
||||
self.head.insert_mut(key_begin, Some(value.clone()));
|
||||
|
||||
// Cover the inside of the interval
|
||||
let to_remove: Vec<_> = self.head.range((key_begin + 1)..key_end)
|
||||
.map(|(k, _)| k.clone())
|
||||
.collect();
|
||||
for key in to_remove {
|
||||
self.head.remove_mut(&key);
|
||||
}
|
||||
|
||||
// Remember history. Clone is O(1)
|
||||
self.historic.insert(lsn, self.head.clone());
|
||||
}
|
||||
|
||||
pub fn query(self: &Self, key: i128, lsn: u64) -> Option<&String> {
|
||||
// TODO check for off-by-one errors
|
||||
|
||||
let version = self.historic.range(0..=lsn).rev().next()?.1;
|
||||
version.range(0..=key).rev().next()?.1.as_ref()
|
||||
}
|
||||
|
||||
// TODO Add API for delta layers with lsn range.
|
||||
// The easy solution is to only store images, and then from every
|
||||
// image point to deltas on top of it. There might be something
|
||||
// nicer but we have this solution as backup.
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_bstlm() {
|
||||
let mut bstlm = BSTLM::new();
|
||||
bstlm.insert(0, 5, 100, "Layer 1".to_string());
|
||||
dbg!(&bstlm);
|
||||
bstlm.insert(3, 9, 110, "Layer 2".to_string());
|
||||
dbg!(&bstlm);
|
||||
bstlm.insert(5, 6, 120, "Layer 3".to_string());
|
||||
dbg!(&bstlm);
|
||||
|
||||
// After Layer 1 insertion
|
||||
assert_eq!(bstlm.query(1, 105), Some(&"Layer 1".to_string()));
|
||||
assert_eq!(bstlm.query(4, 105), Some(&"Layer 1".to_string()));
|
||||
|
||||
// After Layer 2 insertion
|
||||
assert_eq!(bstlm.query(4, 115), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(8, 115), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(11, 115), None);
|
||||
|
||||
// After Layer 3 insertion
|
||||
assert_eq!(bstlm.query(4, 125), Some(&"Layer 2".to_string()));
|
||||
assert_eq!(bstlm.query(5, 125), Some(&"Layer 3".to_string()));
|
||||
|
||||
assert_eq!(bstlm.query(7, 125), Some(&"Layer 2".to_string()));
|
||||
}
|
||||
346
pageserver/src/tenant/segment_tree_layer_map.rs
Normal file
346
pageserver/src/tenant/segment_tree_layer_map.rs
Normal file
@@ -0,0 +1,346 @@
|
||||
use persistent_range_query::naive::{IndexableKey, NaiveVecStorage};
|
||||
use persistent_range_query::ops::SameElementsInitializer;
|
||||
use persistent_range_query::segment_tree::{MidpointableKey, PersistentSegmentTree, PersistentSegmentTreeVersion};
|
||||
use persistent_range_query::{
|
||||
LazyRangeInitializer, PersistentVecStorage, RangeModification, RangeQueryResult,
|
||||
VecReadableVersion,
|
||||
};
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
|
||||
struct PageIndex(u32);
|
||||
type LayerId = String;
|
||||
|
||||
impl IndexableKey for PageIndex {
|
||||
fn index(all_keys: &Range<Self>, key: &Self) -> usize {
|
||||
(key.0 as usize) - (all_keys.start.0 as usize)
|
||||
}
|
||||
|
||||
fn element_range(all_keys: &Range<Self>, index: usize) -> Range<Self> {
|
||||
PageIndex(all_keys.start.0 + index as u32)..PageIndex(all_keys.start.0 + index as u32 + 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl MidpointableKey for PageIndex {
|
||||
fn midpoint(range: &Range<Self>) -> Self {
|
||||
PageIndex(range.start.0 + (range.end.0 - range.start.0) / 2)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct LayerMapInformation {
|
||||
// Only make sense for a range of length 1.
|
||||
last_layer: Option<LayerId>,
|
||||
last_image_layer: Option<LayerId>,
|
||||
// Work for all ranges
|
||||
max_delta_layers: (usize, Range<PageIndex>),
|
||||
}
|
||||
|
||||
impl LayerMapInformation {
|
||||
fn last_layers(&self) -> (&Option<LayerId>, &Option<LayerId>) {
|
||||
(&self.last_layer, &self.last_image_layer)
|
||||
}
|
||||
|
||||
fn max_delta_layers(&self) -> &(usize, Range<PageIndex>) {
|
||||
&self.max_delta_layers
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_ranges(left: &Range<PageIndex>, right: &Range<PageIndex>) -> Range<PageIndex> {
|
||||
if left.is_empty() {
|
||||
right.clone()
|
||||
} else if right.is_empty() {
|
||||
left.clone()
|
||||
} else if left.end == right.start {
|
||||
left.start..right.end
|
||||
} else {
|
||||
left.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeQueryResult<PageIndex> for LayerMapInformation {
|
||||
fn new_for_empty_range() -> Self {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, PageIndex(0)..PageIndex(0)),
|
||||
}
|
||||
}
|
||||
|
||||
fn combine(
|
||||
left: &Self,
|
||||
_left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
_right_range: &Range<PageIndex>,
|
||||
) -> Self {
|
||||
// Note that either range may be empty.
|
||||
LayerMapInformation {
|
||||
last_layer: left
|
||||
.last_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_layer.as_ref())
|
||||
.cloned(),
|
||||
last_image_layer: left
|
||||
.last_image_layer
|
||||
.as_ref()
|
||||
.or_else(|| right.last_image_layer.as_ref())
|
||||
.cloned(),
|
||||
max_delta_layers: match left.max_delta_layers.0.cmp(&right.max_delta_layers.0) {
|
||||
Ordering::Less => right.max_delta_layers.clone(),
|
||||
Ordering::Greater => left.max_delta_layers.clone(),
|
||||
Ordering::Equal => (
|
||||
left.max_delta_layers.0,
|
||||
merge_ranges(&left.max_delta_layers.1, &right.max_delta_layers.1),
|
||||
),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn add(
|
||||
left: &mut Self,
|
||||
left_range: &Range<PageIndex>,
|
||||
right: &Self,
|
||||
right_range: &Range<PageIndex>,
|
||||
) {
|
||||
*left = Self::combine(&left, left_range, right, right_range);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct AddDeltaLayers {
|
||||
last_layer: LayerId,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LayerMapModification {
|
||||
add_image_layer: Option<LayerId>,
|
||||
add_delta_layers: Option<AddDeltaLayers>,
|
||||
}
|
||||
|
||||
impl LayerMapModification {
|
||||
fn add_image_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: Some(layer.into()),
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_delta_layer(layer: impl Into<LayerId>) -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: Some(AddDeltaLayers {
|
||||
last_layer: layer.into(),
|
||||
count: 1,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeModification<PageIndex> for LayerMapModification {
|
||||
type Result = LayerMapInformation;
|
||||
|
||||
fn no_op() -> Self {
|
||||
LayerMapModification {
|
||||
add_image_layer: None,
|
||||
add_delta_layers: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_no_op(&self) -> bool {
|
||||
self.add_image_layer.is_none() && self.add_delta_layers.is_none()
|
||||
}
|
||||
|
||||
fn is_reinitialization(&self) -> bool {
|
||||
self.add_image_layer.is_some()
|
||||
}
|
||||
|
||||
fn apply(&self, result: &mut Self::Result, range: &Range<PageIndex>) {
|
||||
if let Some(layer) = &self.add_image_layer {
|
||||
result.last_layer = Some(layer.clone());
|
||||
result.last_image_layer = Some(layer.clone());
|
||||
result.max_delta_layers = (0, range.clone());
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &self.add_delta_layers {
|
||||
result.last_layer = Some(last_layer.clone());
|
||||
result.max_delta_layers.0 += count;
|
||||
}
|
||||
}
|
||||
|
||||
fn compose(later: &Self, earlier: &mut Self) {
|
||||
if later.add_image_layer.is_some() {
|
||||
*earlier = later.clone();
|
||||
return;
|
||||
}
|
||||
if let Some(AddDeltaLayers { last_layer, count }) = &later.add_delta_layers {
|
||||
let res = earlier.add_delta_layers.get_or_insert(AddDeltaLayers {
|
||||
last_layer: LayerId::default(),
|
||||
count: 0,
|
||||
});
|
||||
res.last_layer = last_layer.clone();
|
||||
res.count += count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LazyRangeInitializer<LayerMapInformation, PageIndex> for SameElementsInitializer<()> {
|
||||
fn get(&self, range: &Range<PageIndex>) -> LayerMapInformation {
|
||||
LayerMapInformation {
|
||||
last_layer: None,
|
||||
last_image_layer: None,
|
||||
max_delta_layers: (0, range.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Head = PersistentSegmentTree<LayerMapModification, SameElementsInitializer<()>, PageIndex>;
|
||||
type Version = PersistentSegmentTreeVersion<LayerMapModification, SameElementsInitializer<()>, PageIndex>;
|
||||
|
||||
pub struct STLM {
|
||||
head: Head,
|
||||
historic: BTreeMap<u32, Version>,
|
||||
}
|
||||
|
||||
/// Layer map (good enough for benchmarks) implemented using persistent segment tree
|
||||
impl STLM {
|
||||
pub fn new() -> Self {
|
||||
STLM {
|
||||
head: PersistentSegmentTree::new(
|
||||
PageIndex(0)..PageIndex(100),
|
||||
SameElementsInitializer::new(()),
|
||||
),
|
||||
historic: BTreeMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn insert(self: &mut Self, key_begin: u32, key_end: u32, lsn: u32, value: String) {
|
||||
self.head.modify(
|
||||
&(PageIndex(key_begin)..PageIndex(key_end)),
|
||||
&LayerMapModification::add_image_layer(value),
|
||||
);
|
||||
self.historic.insert(lsn, self.head.freeze());
|
||||
}
|
||||
|
||||
pub fn query(self: &Self, key: u32, lsn: u32) -> Option<String> {
|
||||
let version = self.historic.range(0..=lsn).rev().next()?.1;
|
||||
let info = version.get(&(PageIndex(key)..PageIndex(key + 1)));
|
||||
info.last_image_layer.map(|s| s.clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn test_stlm() {
|
||||
let mut stlm = STLM::new();
|
||||
stlm.insert(0, 5, 100, "layer 1".to_string());
|
||||
stlm.insert(3, 9, 110, "layer 2".to_string());
|
||||
|
||||
dbg!(stlm.query(1, 105));
|
||||
dbg!(stlm.query(4, 105));
|
||||
dbg!(stlm.query(4, 115));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stlm_() {
|
||||
test_stlm()
|
||||
}
|
||||
|
||||
fn test_layer_map<
|
||||
S: PersistentVecStorage<LayerMapModification, SameElementsInitializer<()>, PageIndex>,
|
||||
>() {
|
||||
let mut s = S::new(
|
||||
PageIndex(0)..PageIndex(100),
|
||||
SameElementsInitializer::new(()),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(0)..PageIndex(70)),
|
||||
&LayerMapModification::add_image_layer("Img0..70"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(50)..PageIndex(100)),
|
||||
&LayerMapModification::add_image_layer("Img50..100"),
|
||||
);
|
||||
s.modify(
|
||||
&(PageIndex(10)..PageIndex(60)),
|
||||
&LayerMapModification::add_delta_layer("Delta10..60"),
|
||||
);
|
||||
let s_before_last_delta = s.freeze();
|
||||
s.modify(
|
||||
&(PageIndex(20)..PageIndex(80)),
|
||||
&LayerMapModification::add_delta_layer("Delta20..80"),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(5)..PageIndex(6))).last_layers(),
|
||||
(&Some("Img0..70".to_owned()), &Some("Img0..70".to_owned()))
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(15)..PageIndex(16))).last_layers(),
|
||||
(
|
||||
&Some("Delta10..60".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(25)..PageIndex(26))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img0..70".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(65)..PageIndex(66))).last_layers(),
|
||||
(
|
||||
&Some("Delta20..80".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(95)..PageIndex(96))).last_layers(),
|
||||
(
|
||||
&Some("Img50..100".to_owned()),
|
||||
&Some("Img50..100".to_owned())
|
||||
)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
s.get(&(PageIndex(0)..PageIndex(100))).max_delta_layers(),
|
||||
&(2, PageIndex(20)..PageIndex(60)),
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(0)..PageIndex(100)))
|
||||
.max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(60)),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(30))).max_delta_layers(),
|
||||
(2, PageIndex(20)..PageIndex(30))
|
||||
);
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(10)..PageIndex(20))).max_delta_layers(),
|
||||
(1, PageIndex(10)..PageIndex(20))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
*s.get(&(PageIndex(70)..PageIndex(80))).max_delta_layers(),
|
||||
(1, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
assert_eq!(
|
||||
*s_before_last_delta
|
||||
.get(&(PageIndex(70)..PageIndex(80)))
|
||||
.max_delta_layers(),
|
||||
(0, PageIndex(70)..PageIndex(80))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_naive() {
|
||||
test_layer_map::<NaiveVecStorage<_, _, _>>();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_segment_tree() {
|
||||
test_layer_map::<PersistentSegmentTree<_, _, _>>();
|
||||
}
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
use anyhow::{anyhow, bail, ensure, Context};
|
||||
use bytes::Bytes;
|
||||
use fail::fail_point;
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use pageserver_api::models::TimelineState;
|
||||
@@ -18,8 +19,7 @@ use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering};
|
||||
use std::sync::{Arc, Mutex, MutexGuard, RwLock};
|
||||
use std::time::{Duration, Instant, SystemTime};
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::storage_sync::index::IndexPart;
|
||||
use crate::storage_sync::index::{IndexPart, RelativePath};
|
||||
use crate::storage_sync::RemoteTimelineClient;
|
||||
use crate::tenant::{
|
||||
delta_layer::{DeltaLayer, DeltaLayerWriter},
|
||||
@@ -999,9 +999,55 @@ impl Timeline {
|
||||
&self,
|
||||
index_part: &IndexPart,
|
||||
remote_client: &RemoteTimelineClient,
|
||||
local_layers: HashSet<PathBuf>,
|
||||
mut local_filenames: HashSet<PathBuf>,
|
||||
up_to_date_disk_consistent_lsn: Lsn,
|
||||
) -> anyhow::Result<HashSet<PathBuf>> {
|
||||
let mut remote_filenames: HashSet<PathBuf> = HashSet::new();
|
||||
for fname in index_part.timeline_layers.iter() {
|
||||
remote_filenames.insert(fname.to_local_path(&PathBuf::from("")));
|
||||
}
|
||||
|
||||
// Are there any local files that exist, with a size that doesn't match
|
||||
// with the size stored in the remote index file?
|
||||
// If so, rename_to_backup those files so that we re-download them later.
|
||||
local_filenames.retain(|path| {
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(&RelativePath::from_filename(path))
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
|
||||
if let Some(remote_size) = layer_metadata.file_size() {
|
||||
let local_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id).join(&path);
|
||||
match local_path.metadata() {
|
||||
Ok(metadata) => {
|
||||
let local_size = metadata.len();
|
||||
|
||||
if local_size != remote_size {
|
||||
warn!("removing local file \"{}\" because it has unexpected length {}; length in remote index is {}",
|
||||
path.display(),
|
||||
local_size,
|
||||
remote_size);
|
||||
if let Err(err) = rename_to_backup(&local_path) {
|
||||
error!("could not rename file \"{}\": {:?}",
|
||||
local_path.display(), err);
|
||||
}
|
||||
self.metrics.current_physical_size_gauge.sub(local_size);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not get size of local file \"{}\": {:?}", path.display(), err);
|
||||
true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
|
||||
// Are we missing some files that are present in remote storage?
|
||||
// Download them now.
|
||||
// TODO Downloading many files this way is not efficient.
|
||||
@@ -1010,63 +1056,17 @@ impl Timeline {
|
||||
// b) typical case now is that there is nothing to sync, this downloads a lot
|
||||
// 1) if there was another pageserver that came and generated new files
|
||||
// 2) during attach of a timeline with big history which we currently do not do
|
||||
let mut local_only_layers = local_layers;
|
||||
let timeline_dir = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||
for remote_layer_name in &index_part.timeline_layers {
|
||||
let local_layer_path = timeline_dir.join(remote_layer_name);
|
||||
local_only_layers.remove(&local_layer_path);
|
||||
for path in remote_filenames.difference(&local_filenames) {
|
||||
let fname = path.to_str().unwrap();
|
||||
info!("remote layer file {fname} does not exist locally");
|
||||
|
||||
let remote_layer_metadata = index_part
|
||||
let layer_metadata = index_part
|
||||
.layer_metadata
|
||||
.get(remote_layer_name)
|
||||
.get(&RelativePath::from_filename(path))
|
||||
.map(LayerFileMetadata::from)
|
||||
.unwrap_or(LayerFileMetadata::MISSING);
|
||||
|
||||
let remote_layer_path = self
|
||||
.conf
|
||||
.remote_path(&local_layer_path)
|
||||
.expect("local_layer_path received from the same conf that provided a workdir");
|
||||
|
||||
if local_layer_path.exists() {
|
||||
let mut already_downloaded = true;
|
||||
// Are there any local files that exist, with a size that doesn't match
|
||||
// with the size stored in the remote index file?
|
||||
// If so, rename_to_backup those files so that we re-download them later.
|
||||
if let Some(remote_size) = remote_layer_metadata.file_size() {
|
||||
match local_layer_path.metadata() {
|
||||
Ok(metadata) => {
|
||||
let local_size = metadata.len();
|
||||
|
||||
if local_size != remote_size {
|
||||
warn!("removing local file {local_layer_path:?} because it has unexpected length {local_size}; length in remote index is {remote_size}");
|
||||
if let Err(err) = rename_to_backup(&local_layer_path) {
|
||||
error!("could not rename file {local_layer_path:?}: {err:?}");
|
||||
} else {
|
||||
self.metrics.current_physical_size_gauge.sub(local_size);
|
||||
already_downloaded = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
error!("could not get size of local file {local_layer_path:?}: {err:?}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if already_downloaded {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
info!("remote layer {remote_layer_path:?} does not exist locally");
|
||||
}
|
||||
|
||||
let layer_name = local_layer_path
|
||||
.file_name()
|
||||
.and_then(|os_str| os_str.to_str())
|
||||
.with_context(|| {
|
||||
format!("Layer file {local_layer_path:?} has no name in unicode")
|
||||
})?;
|
||||
if let Some(imgfilename) = ImageFileName::parse_str(layer_name) {
|
||||
if let Some(imgfilename) = ImageFileName::parse_str(fname) {
|
||||
if imgfilename.lsn > up_to_date_disk_consistent_lsn {
|
||||
warn!(
|
||||
"found future image layer {} on timeline {} remote_consistent_lsn is {}",
|
||||
@@ -1075,13 +1075,11 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
trace!("downloading image file: {remote_layer_path:?}");
|
||||
let downloaded_size = remote_client
|
||||
.download_layer_file(&remote_layer_path, &remote_layer_metadata)
|
||||
trace!("downloading image file: {}", file = path.display());
|
||||
let sz = remote_client
|
||||
.download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("failed to download image layer from path {remote_layer_path:?}")
|
||||
})?;
|
||||
.context("download image layer")?;
|
||||
trace!("done");
|
||||
|
||||
let image_layer =
|
||||
@@ -1091,10 +1089,8 @@ impl Timeline {
|
||||
.write()
|
||||
.unwrap()
|
||||
.insert_historic(Arc::new(image_layer));
|
||||
self.metrics
|
||||
.current_physical_size_gauge
|
||||
.add(downloaded_size);
|
||||
} else if let Some(deltafilename) = DeltaFileName::parse_str(layer_name) {
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
} else if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
|
||||
// Create a DeltaLayer struct for each delta file.
|
||||
// The end-LSN is exclusive, while disk_consistent_lsn is
|
||||
// inclusive. For example, if disk_consistent_lsn is 100, it is
|
||||
@@ -1109,13 +1105,11 @@ impl Timeline {
|
||||
continue;
|
||||
}
|
||||
|
||||
trace!("downloading delta file: {remote_layer_path:?}");
|
||||
trace!("downloading image file: {}", file = path.display());
|
||||
let sz = remote_client
|
||||
.download_layer_file(&remote_layer_path, &remote_layer_metadata)
|
||||
.download_layer_file(&RelativePath::from_filename(path), &layer_metadata)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("failed to download delta layer from path {remote_layer_path:?}")
|
||||
})?;
|
||||
.context("download delta layer")?;
|
||||
trace!("done");
|
||||
|
||||
let delta_layer =
|
||||
@@ -1127,11 +1121,16 @@ impl Timeline {
|
||||
.insert_historic(Arc::new(delta_layer));
|
||||
self.metrics.current_physical_size_gauge.add(sz);
|
||||
} else {
|
||||
bail!("unexpected layer filename {layer_name} in remote storage path: {remote_layer_path:?}");
|
||||
bail!("unexpected layer filename in remote storage: {}", fname);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(local_only_layers)
|
||||
// now these are local only filenames
|
||||
let local_only_filenames = local_filenames
|
||||
.difference(&remote_filenames)
|
||||
.cloned()
|
||||
.collect();
|
||||
Ok(local_only_filenames)
|
||||
}
|
||||
|
||||
///
|
||||
@@ -1165,46 +1164,47 @@ impl Timeline {
|
||||
let disk_consistent_lsn = up_to_date_metadata.disk_consistent_lsn();
|
||||
|
||||
// Build a map of local layers for quick lookups
|
||||
let local_layers = self
|
||||
.layers
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter_historic_layers()
|
||||
.map(|historic_layer| {
|
||||
historic_layer
|
||||
.local_path()
|
||||
.expect("Historic layers should have a path")
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
let mut local_filenames: HashSet<PathBuf> = HashSet::new();
|
||||
for layer in self.layers.read().unwrap().iter_historic_layers() {
|
||||
local_filenames.insert(layer.filename());
|
||||
}
|
||||
|
||||
let local_only_layers = match index_part {
|
||||
let local_only_filenames = match index_part {
|
||||
Some(index_part) => {
|
||||
info!(
|
||||
"initializing upload queue from remote index with {} layer files",
|
||||
index_part.timeline_layers.len()
|
||||
);
|
||||
remote_client.init_upload_queue(index_part)?;
|
||||
self.download_missing(index_part, remote_client, local_layers, disk_consistent_lsn)
|
||||
.await?
|
||||
let local_only_filenames = self
|
||||
.download_missing(
|
||||
index_part,
|
||||
remote_client,
|
||||
local_filenames,
|
||||
disk_consistent_lsn,
|
||||
)
|
||||
.await?;
|
||||
local_only_filenames
|
||||
}
|
||||
None => {
|
||||
info!("initializing upload queue as empty");
|
||||
remote_client.init_upload_queue_for_empty_remote(up_to_date_metadata)?;
|
||||
local_layers
|
||||
local_filenames
|
||||
}
|
||||
};
|
||||
|
||||
// Are there local files that don't exist remotely? Schedule uploads for them
|
||||
for layer_path in &local_only_layers {
|
||||
let layer_size = layer_path
|
||||
let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
|
||||
for fname in &local_only_filenames {
|
||||
let absolute = timeline_path.join(fname);
|
||||
let sz = absolute
|
||||
.metadata()
|
||||
.with_context(|| format!("failed to get file {layer_path:?} metadata"))?
|
||||
.with_context(|| format!("failed to get file {} metadata", fname.display()))?
|
||||
.len();
|
||||
info!("scheduling {layer_path:?} for upload");
|
||||
remote_client
|
||||
.schedule_layer_file_upload(layer_path, &LayerFileMetadata::new(layer_size))?;
|
||||
info!("scheduling {} for upload", fname.display());
|
||||
remote_client.schedule_layer_file_upload(&absolute, &LayerFileMetadata::new(sz))?;
|
||||
}
|
||||
if !local_only_layers.is_empty() {
|
||||
if !local_only_filenames.is_empty() {
|
||||
remote_client.schedule_index_upload(up_to_date_metadata)?;
|
||||
}
|
||||
|
||||
@@ -2642,22 +2642,24 @@ impl Timeline {
|
||||
data.records.len()
|
||||
);
|
||||
} else {
|
||||
if data.img.is_some() {
|
||||
let base_img = if let Some((_lsn, img)) = data.img {
|
||||
trace!(
|
||||
"found {} WAL records and a base image for {} at {}, performing WAL redo",
|
||||
data.records.len(),
|
||||
key,
|
||||
request_lsn
|
||||
);
|
||||
Some(img)
|
||||
} else {
|
||||
trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn);
|
||||
None
|
||||
};
|
||||
|
||||
let last_rec_lsn = data.records.last().unwrap().0;
|
||||
|
||||
let img = self
|
||||
.walredo_mgr
|
||||
.request_redo(key, request_lsn, data.img, data.records, self.pg_version)
|
||||
.request_redo(key, request_lsn, base_img, data.records, self.pg_version)
|
||||
.context("Failed to reconstruct a page image:")?;
|
||||
|
||||
if img.len() == page_cache::PAGE_SZ {
|
||||
|
||||
@@ -185,9 +185,6 @@ impl TenantConfOpt {
|
||||
if let Some(max_lsn_wal_lag) = other.max_lsn_wal_lag {
|
||||
self.max_lsn_wal_lag = Some(max_lsn_wal_lag);
|
||||
}
|
||||
if let Some(trace_read_requests) = other.trace_read_requests {
|
||||
self.trace_read_requests = Some(trace_read_requests);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,62 +1,75 @@
|
||||
//! This module acts as a switchboard to access different repositories managed by this
|
||||
//! page server.
|
||||
|
||||
use std::collections::{hash_map, HashMap};
|
||||
use std::collections::hash_map;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use tokio::fs;
|
||||
|
||||
use anyhow::Context;
|
||||
use once_cell::sync::Lazy;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::*;
|
||||
|
||||
use pageserver_api::models::TimelineGcRequest;
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use utils::crashsafe;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::repository::GcResult;
|
||||
use crate::task_mgr::{self, TaskKind};
|
||||
use crate::tenant::{Tenant, TenantState};
|
||||
use crate::tenant_config::TenantConfOpt;
|
||||
use crate::IGNORED_TENANT_FILE_NAME;
|
||||
|
||||
use utils::fs_ext::PathExt;
|
||||
use utils::http::error::ApiError;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
mod tenants_state {
|
||||
use once_cell::sync::Lazy;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard},
|
||||
};
|
||||
use utils::id::TenantId;
|
||||
|
||||
use crate::tenant::Tenant;
|
||||
|
||||
static TENANTS: Lazy<RwLock<HashMap<TenantId, Arc<Tenant>>>> =
|
||||
Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
pub(super) fn read_tenants() -> RwLockReadGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
|
||||
TENANTS
|
||||
.read()
|
||||
.expect("Failed to read() tenants lock, it got poisoned")
|
||||
}
|
||||
|
||||
pub(super) fn write_tenants() -> RwLockWriteGuard<'static, HashMap<TenantId, Arc<Tenant>>> {
|
||||
TENANTS
|
||||
.write()
|
||||
.expect("Failed to write() tenants lock, it got poisoned")
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialize repositories with locally available timelines.
|
||||
/// Timelines that are only partially available locally (remote storage has more data than this pageserver)
|
||||
/// are scheduled for download and added to the tenant once download is completed.
|
||||
#[instrument(skip(conf, remote_storage))]
|
||||
pub async fn init_tenant_mgr(
|
||||
pub fn init_tenant_mgr(
|
||||
conf: &'static PageServerConf,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<()> {
|
||||
let _entered = info_span!("init_tenant_mgr").entered();
|
||||
|
||||
// Scan local filesystem for attached tenants
|
||||
let mut number_of_tenants = 0;
|
||||
let tenants_dir = conf.tenants_path();
|
||||
|
||||
let mut dir_entries = fs::read_dir(&tenants_dir)
|
||||
.await
|
||||
.with_context(|| format!("Failed to list tenants dir {tenants_dir:?}"))?;
|
||||
|
||||
loop {
|
||||
match dir_entries.next_entry().await {
|
||||
Ok(None) => break,
|
||||
Ok(Some(dir_entry)) => {
|
||||
for dir_entry in std::fs::read_dir(&tenants_dir)
|
||||
.with_context(|| format!("Failed to list tenants dir {}", tenants_dir.display()))?
|
||||
{
|
||||
match &dir_entry {
|
||||
Ok(dir_entry) => {
|
||||
let tenant_dir_path = dir_entry.path();
|
||||
if crate::is_temporary(&tenant_dir_path) {
|
||||
info!(
|
||||
"Found temporary tenant directory, removing: {}",
|
||||
tenant_dir_path.display()
|
||||
);
|
||||
if let Err(e) = fs::remove_dir_all(&tenant_dir_path).await {
|
||||
if let Err(e) = std::fs::remove_dir_all(&tenant_dir_path) {
|
||||
error!(
|
||||
"Failed to remove temporary directory '{}': {:?}",
|
||||
tenant_dir_path.display(),
|
||||
@@ -64,38 +77,27 @@ pub async fn init_tenant_mgr(
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// This case happens if we crash during attach before creating the attach marker file
|
||||
let is_empty = tenant_dir_path.is_empty_dir().with_context(|| {
|
||||
format!("Failed to check whether {tenant_dir_path:?} is an empty dir")
|
||||
})?;
|
||||
if is_empty {
|
||||
info!("removing empty tenant directory {tenant_dir_path:?}");
|
||||
if let Err(e) = fs::remove_dir(&tenant_dir_path).await {
|
||||
error!(
|
||||
"Failed to remove empty tenant directory '{}': {e:#}",
|
||||
tenant_dir_path.display()
|
||||
)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let tenant_ignore_mark_file = tenant_dir_path.join(IGNORED_TENANT_FILE_NAME);
|
||||
if tenant_ignore_mark_file.exists() {
|
||||
info!("Found an ignore mark file {tenant_ignore_mark_file:?}, skipping the tenant");
|
||||
continue;
|
||||
}
|
||||
|
||||
match schedule_local_tenant_processing(
|
||||
conf,
|
||||
&tenant_dir_path,
|
||||
remote_storage.clone(),
|
||||
) {
|
||||
Ok(tenant) => {
|
||||
TENANTS.write().await.insert(tenant.tenant_id(), tenant);
|
||||
match load_local_tenant(conf, &tenant_dir_path, remote_storage.clone()) {
|
||||
Ok(Some(tenant)) => {
|
||||
tenants_state::write_tenants().insert(tenant.tenant_id(), tenant);
|
||||
number_of_tenants += 1;
|
||||
}
|
||||
Ok(None) => {
|
||||
// This case happens if we crash during attach before creating the attach marker file
|
||||
if let Err(e) = std::fs::remove_dir(&tenant_dir_path) {
|
||||
error!(
|
||||
"Failed to remove empty tenant directory '{}': {e:#}",
|
||||
tenant_dir_path.display()
|
||||
)
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to collect tenant files from dir {tenants_dir:?} for entry {dir_entry:?}, reason: {e:#}");
|
||||
error!(
|
||||
"Failed to collect tenant files from dir '{}' for entry {:?}, reason: {:#}",
|
||||
tenants_dir.display(),
|
||||
dir_entry,
|
||||
e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,7 +107,10 @@ pub async fn init_tenant_mgr(
|
||||
// here, the pageserver startup fails altogether, causing outage for *all*
|
||||
// tenants. That seems worse.
|
||||
error!(
|
||||
"Failed to list tenants dir entry in directory {tenants_dir:?}, reason: {e:?}"
|
||||
"Failed to list tenants dir entry {:?} in directory {}, reason: {:?}",
|
||||
dir_entry,
|
||||
tenants_dir.display(),
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -115,45 +120,34 @@ pub async fn init_tenant_mgr(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn schedule_local_tenant_processing(
|
||||
fn load_local_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_path: &Path,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<Arc<Tenant>> {
|
||||
anyhow::ensure!(
|
||||
tenant_path.is_dir(),
|
||||
"Cannot load tenant from path {tenant_path:?}, it either does not exist or not a directory"
|
||||
);
|
||||
anyhow::ensure!(
|
||||
!crate::is_temporary(tenant_path),
|
||||
"Cannot load tenant from temporary path {tenant_path:?}"
|
||||
);
|
||||
anyhow::ensure!(
|
||||
!tenant_path.is_empty_dir().with_context(|| {
|
||||
format!("Failed to check whether {tenant_path:?} is an empty dir")
|
||||
})?,
|
||||
"Cannot load tenant from empty directory {tenant_path:?}"
|
||||
);
|
||||
) -> anyhow::Result<Option<Arc<Tenant>>> {
|
||||
if !tenant_path.is_dir() {
|
||||
anyhow::bail!("tenant_path is not a directory: {tenant_path:?}")
|
||||
}
|
||||
|
||||
let is_empty = tenant_path
|
||||
.is_empty_dir()
|
||||
.context("check whether tenant_path is an empty dir")?;
|
||||
if is_empty {
|
||||
info!("skipping empty tenant directory {tenant_path:?}");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let tenant_id = tenant_path
|
||||
.file_name()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default()
|
||||
.parse::<TenantId>()
|
||||
.with_context(|| {
|
||||
format!("Could not parse tenant id out of the tenant dir name in path {tenant_path:?}")
|
||||
})?;
|
||||
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
anyhow::ensure!(
|
||||
!conf.tenant_ignore_mark_file_path(tenant_id).exists(),
|
||||
"Cannot load tenant, ignore mark found at {tenant_ignore_mark:?}"
|
||||
);
|
||||
.context("Could not parse tenant id out of the tenant dir name")?;
|
||||
|
||||
let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
|
||||
info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
|
||||
if let Some(remote_storage) = remote_storage {
|
||||
Tenant::spawn_attach(conf, tenant_id, remote_storage)
|
||||
Tenant::spawn_attach(conf, tenant_id, &remote_storage)
|
||||
} else {
|
||||
warn!("tenant {tenant_id} has attaching mark file, but pageserver has no remote storage configured");
|
||||
Tenant::create_broken_tenant(conf, tenant_id)
|
||||
@@ -163,7 +157,7 @@ pub fn schedule_local_tenant_processing(
|
||||
// Start loading the tenant into memory. It will initially be in Loading state.
|
||||
Tenant::spawn_load(conf, tenant_id, remote_storage)
|
||||
};
|
||||
Ok(tenant)
|
||||
Ok(Some(tenant))
|
||||
}
|
||||
|
||||
///
|
||||
@@ -171,7 +165,7 @@ pub fn schedule_local_tenant_processing(
|
||||
///
|
||||
pub async fn shutdown_all_tenants() {
|
||||
let tenants_to_shut_down = {
|
||||
let mut m = TENANTS.write().await;
|
||||
let mut m = tenants_state::write_tenants();
|
||||
let mut tenants_to_shut_down = Vec::with_capacity(m.len());
|
||||
for (_, tenant) in m.drain() {
|
||||
if tenant.is_active() {
|
||||
@@ -205,13 +199,13 @@ pub async fn shutdown_all_tenants() {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn create_tenant(
|
||||
pub fn create_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<Option<Arc<Tenant>>> {
|
||||
match TENANTS.write().await.entry(tenant_id) {
|
||||
match tenants_state::write_tenants().entry(tenant_id) {
|
||||
hash_map::Entry::Occupied(_) => {
|
||||
debug!("tenant {tenant_id} already exists");
|
||||
Ok(None)
|
||||
@@ -221,36 +215,44 @@ pub async fn create_tenant(
|
||||
// If this section ever becomes contentious, introduce a new `TenantState::Creating`.
|
||||
let tenant_directory =
|
||||
super::tenant::create_tenant_files(conf, tenant_conf, tenant_id)?;
|
||||
let created_tenant =
|
||||
schedule_local_tenant_processing(conf, &tenant_directory, remote_storage)?;
|
||||
let crated_tenant_id = created_tenant.tenant_id();
|
||||
anyhow::ensure!(
|
||||
tenant_id == crated_tenant_id,
|
||||
"loaded created tenant has unexpected tenant id (expect {tenant_id} != actual {crated_tenant_id})",
|
||||
);
|
||||
v.insert(Arc::clone(&created_tenant));
|
||||
Ok(Some(created_tenant))
|
||||
let created_tenant = load_local_tenant(conf, &tenant_directory, remote_storage)?;
|
||||
match created_tenant {
|
||||
None => {
|
||||
// We get None in case the directory is empty.
|
||||
// This shouldn't happen here, because we just created the directory.
|
||||
// So, skip any cleanup work for now, we don't know how we reached this state.
|
||||
anyhow::bail!("we just created the tenant directory, it can't be empty");
|
||||
}
|
||||
Some(tenant) => {
|
||||
anyhow::ensure!(
|
||||
tenant_id == tenant.tenant_id(),
|
||||
"loaded created tenant has unexpected tenant id (expect {} != actual {})",
|
||||
tenant_id,
|
||||
tenant.tenant_id()
|
||||
);
|
||||
v.insert(Arc::clone(&tenant));
|
||||
Ok(Some(tenant))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_tenant_config(
|
||||
pub fn update_tenant_config(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_conf: TenantConfOpt,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
info!("configuring tenant {tenant_id}");
|
||||
get_tenant(tenant_id, true)
|
||||
.await?
|
||||
.update_tenant_config(tenant_conf);
|
||||
get_tenant(tenant_id, true)?.update_tenant_config(tenant_conf);
|
||||
Tenant::persist_tenant_config(&conf.tenant_config_path(tenant_id), tenant_conf, false)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
|
||||
/// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
|
||||
pub async fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
|
||||
let m = TENANTS.read().await;
|
||||
pub fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
|
||||
let m = tenants_state::read_tenants();
|
||||
let tenant = m
|
||||
.get(&tenant_id)
|
||||
.with_context(|| format!("Tenant {tenant_id} not found in the local state"))?;
|
||||
@@ -286,7 +288,7 @@ pub async fn delete_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> an
|
||||
info!("waiting for timeline tasks to shutdown");
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), Some(timeline_id)).await;
|
||||
info!("timeline task shutdown completed");
|
||||
match get_tenant(tenant_id, true).await {
|
||||
match get_tenant(tenant_id, true) {
|
||||
Ok(tenant) => {
|
||||
tenant.delete_timeline(timeline_id).await?;
|
||||
}
|
||||
@@ -300,67 +302,40 @@ pub async fn detach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
remove_tenant_from_memory(tenant_id, async {
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id);
|
||||
fs::remove_dir_all(&local_tenant_directory)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to remove local tenant directory {local_tenant_directory:?}")
|
||||
})?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
let tenant = match {
|
||||
let mut tenants_accessor = tenants_state::write_tenants();
|
||||
tenants_accessor.remove(&tenant_id)
|
||||
} {
|
||||
Some(tenant) => tenant,
|
||||
None => anyhow::bail!("Tenant not found for id {tenant_id}"),
|
||||
};
|
||||
|
||||
pub async fn load_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: Option<GenericRemoteStorage>,
|
||||
) -> anyhow::Result<()> {
|
||||
run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
if tenant_ignore_mark.exists() {
|
||||
std::fs::remove_file(&tenant_ignore_mark)
|
||||
.with_context(|| format!("Failed to remove tenant ignore mark {tenant_ignore_mark:?} during tenant loading"))?;
|
||||
}
|
||||
tenant.set_stopping();
|
||||
// shutdown all tenant and timeline tasks: gc, compaction, page service)
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
|
||||
|
||||
let new_tenant = schedule_local_tenant_processing(conf, &tenant_path, remote_storage)
|
||||
.with_context(|| {
|
||||
format!("Failed to schedule tenant processing in path {tenant_path:?}")
|
||||
})?;
|
||||
// If removal fails there will be no way to successfully retry detach,
|
||||
// because the tenant no longer exists in the in-memory map. And it needs to be removed from it
|
||||
// before we remove files, because it contains references to tenant
|
||||
// which references ephemeral files which are deleted on drop. So if we keep these references,
|
||||
// we will attempt to remove files which no longer exist. This can be fixed by having shutdown
|
||||
// mechanism for tenant that will clean temporary data to avoid any references to ephemeral files
|
||||
let local_tenant_directory = conf.tenant_path(&tenant_id);
|
||||
fs::remove_dir_all(&local_tenant_directory).with_context(|| {
|
||||
format!(
|
||||
"Failed to remove local tenant directory '{}'",
|
||||
local_tenant_directory.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
vacant_entry.insert(new_tenant);
|
||||
Ok(())
|
||||
}).await
|
||||
}
|
||||
|
||||
pub async fn ignore_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
) -> anyhow::Result<()> {
|
||||
remove_tenant_from_memory(tenant_id, async {
|
||||
let ignore_mark_file = conf.tenant_ignore_mark_file_path(tenant_id);
|
||||
fs::File::create(&ignore_mark_file)
|
||||
.await
|
||||
.context("Failed to create ignore mark file")
|
||||
.and_then(|_| {
|
||||
crashsafe::fsync_file_and_parent(&ignore_mark_file)
|
||||
.context("Failed to fsync ignore mark file")
|
||||
})
|
||||
.with_context(|| format!("Failed to crate ignore mark for tenant {tenant_id}"))?;
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
Ok(())
|
||||
}
|
||||
|
||||
///
|
||||
/// Get list of tenants, for the mgmt API
|
||||
///
|
||||
pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
TENANTS
|
||||
.read()
|
||||
.await
|
||||
pub fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
tenants_state::read_tenants()
|
||||
.iter()
|
||||
.map(|(id, tenant)| (*id, tenant.current_state()))
|
||||
.collect()
|
||||
@@ -373,102 +348,42 @@ pub async fn list_tenants() -> Vec<(TenantId, TenantState)> {
|
||||
pub async fn attach_tenant(
|
||||
conf: &'static PageServerConf,
|
||||
tenant_id: TenantId,
|
||||
remote_storage: GenericRemoteStorage,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
) -> anyhow::Result<()> {
|
||||
run_if_no_tenant_in_memory(tenant_id, |vacant_entry| {
|
||||
let tenant_path = conf.tenant_path(&tenant_id);
|
||||
anyhow::ensure!(
|
||||
!tenant_path.exists(),
|
||||
"Cannot attach tenant {tenant_id}, local tenant directory already exists"
|
||||
);
|
||||
|
||||
let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
|
||||
vacant_entry.insert(tenant);
|
||||
|
||||
Ok(())
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
async fn run_if_no_tenant_in_memory<F, V>(tenant_id: TenantId, run: F) -> anyhow::Result<V>
|
||||
where
|
||||
F: FnOnce(hash_map::VacantEntry<TenantId, Arc<Tenant>>) -> anyhow::Result<V>,
|
||||
{
|
||||
match TENANTS.write().await.entry(tenant_id) {
|
||||
match tenants_state::write_tenants().entry(tenant_id) {
|
||||
hash_map::Entry::Occupied(e) => {
|
||||
anyhow::bail!(
|
||||
"tenant {tenant_id} already exists, state: {:?}",
|
||||
e.get().current_state()
|
||||
)
|
||||
}
|
||||
hash_map::Entry::Vacant(v) => run(v),
|
||||
}
|
||||
}
|
||||
|
||||
/// Stops and removes the tenant from memory, if it's not [`TenantState::Stopping`] already, bails otherwise.
|
||||
/// Allows to remove other tenant resources manually, via `tenant_cleanup`.
|
||||
/// If the cleanup fails, tenant will stay in memory in [`TenantState::Broken`] state, and another removal
|
||||
/// operation would be needed to remove it.
|
||||
async fn remove_tenant_from_memory<V, F>(
|
||||
tenant_id: TenantId,
|
||||
tenant_cleanup: F,
|
||||
) -> anyhow::Result<V>
|
||||
where
|
||||
F: std::future::Future<Output = anyhow::Result<V>>,
|
||||
{
|
||||
// It's important to keep the tenant in memory after the final cleanup, to avoid cleanup races.
|
||||
// The exclusive lock here ensures we don't miss the tenant state updates before trying another removal.
|
||||
// tenant-wde cleanup operations may take some time (removing the entire tenant directory), we want to
|
||||
// avoid holding the lock for the entire process.
|
||||
{
|
||||
let tenants_accessor = TENANTS.write().await;
|
||||
match tenants_accessor.get(&tenant_id) {
|
||||
Some(tenant) => match tenant.current_state() {
|
||||
TenantState::Attaching
|
||||
| TenantState::Loading
|
||||
| TenantState::Broken
|
||||
| TenantState::Active => tenant.set_stopping(),
|
||||
TenantState::Stopping => {
|
||||
anyhow::bail!("Tenant {tenant_id} is stopping already")
|
||||
// Cannot attach a tenant that already exists. The error message depends on
|
||||
// the state it's in.
|
||||
match e.get().current_state() {
|
||||
TenantState::Attaching => {
|
||||
anyhow::bail!("tenant {tenant_id} attach is already in progress")
|
||||
}
|
||||
current_state => {
|
||||
anyhow::bail!("tenant already exists, current state: {current_state:?}")
|
||||
}
|
||||
},
|
||||
None => anyhow::bail!("Tenant not found for id {tenant_id}"),
|
||||
}
|
||||
}
|
||||
|
||||
// shutdown all tenant and timeline tasks: gc, compaction, page service)
|
||||
// No new tasks will be started for this tenant because it's in `Stopping` state.
|
||||
// Hence, once we're done here, the `tenant_cleanup` callback can mutate tenant on-disk state freely.
|
||||
task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
|
||||
|
||||
match tenant_cleanup
|
||||
.await
|
||||
.with_context(|| format!("Failed to run cleanup for tenant {tenant_id}"))
|
||||
{
|
||||
Ok(hook_value) => {
|
||||
let mut tenants_accessor = TENANTS.write().await;
|
||||
if tenants_accessor.remove(&tenant_id).is_none() {
|
||||
warn!("Tenant {tenant_id} got removed from memory before operation finished");
|
||||
}
|
||||
Ok(hook_value)
|
||||
}
|
||||
Err(e) => {
|
||||
let tenants_accessor = TENANTS.read().await;
|
||||
match tenants_accessor.get(&tenant_id) {
|
||||
Some(tenant) => tenant.set_broken(),
|
||||
None => warn!("Tenant {tenant_id} got removed from memory"),
|
||||
}
|
||||
Err(e)
|
||||
hash_map::Entry::Vacant(v) => {
|
||||
let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage);
|
||||
v.insert(tenant);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn immediate_gc(
|
||||
#[cfg(feature = "testing")]
|
||||
use {
|
||||
crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
|
||||
utils::http::error::ApiError,
|
||||
};
|
||||
|
||||
#[cfg(feature = "testing")]
|
||||
pub fn immediate_gc(
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
gc_req: TimelineGcRequest,
|
||||
) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
|
||||
let guard = TENANTS.read().await;
|
||||
let guard = tenants_state::read_tenants();
|
||||
|
||||
let tenant = guard
|
||||
.get(&tenant_id)
|
||||
@@ -490,7 +405,7 @@ pub async fn immediate_gc(
|
||||
&format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
|
||||
false,
|
||||
async move {
|
||||
crate::fail_point!("immediate_gc_task_pre");
|
||||
fail::fail_point!("immediate_gc_task_pre");
|
||||
let result = tenant
|
||||
.gc_iteration(Some(timeline_id), gc_horizon, pitr, true)
|
||||
.instrument(info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id))
|
||||
|
||||
@@ -155,7 +155,7 @@ async fn wait_for_active_tenant(
|
||||
wait: Duration,
|
||||
) -> ControlFlow<(), Arc<Tenant>> {
|
||||
let tenant = loop {
|
||||
match tenant_mgr::get_tenant(tenant_id, false).await {
|
||||
match tenant_mgr::get_tenant(tenant_id, false) {
|
||||
Ok(tenant) => break tenant,
|
||||
Err(e) => {
|
||||
error!("Failed to get a tenant {tenant_id}: {e:#}");
|
||||
|
||||
@@ -9,6 +9,7 @@ use std::{
|
||||
use anyhow::{bail, ensure, Context};
|
||||
use bytes::BytesMut;
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use fail::fail_point;
|
||||
use futures::StreamExt;
|
||||
use postgres::{SimpleQueryMessage, SimpleQueryRow};
|
||||
use postgres_ffi::v14::xlog_utils::normalize_lsn;
|
||||
@@ -19,7 +20,6 @@ use tokio::{pin, select, sync::watch, time};
|
||||
use tokio_postgres::{replication::ReplicationStream, Client};
|
||||
use tracing::{debug, error, info, trace, warn};
|
||||
|
||||
use crate::fail_point;
|
||||
use crate::{metrics::LIVE_CONNECTIONS_COUNT, walreceiver::TaskStateUpdate};
|
||||
use crate::{
|
||||
task_mgr,
|
||||
|
||||
@@ -84,7 +84,7 @@ pub trait WalRedoManager: Send + Sync {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError>;
|
||||
@@ -147,7 +147,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<(Lsn, Bytes)>,
|
||||
base_img: Option<Bytes>,
|
||||
records: Vec<(Lsn, NeonWalRecord)>,
|
||||
pg_version: u32,
|
||||
) -> Result<Bytes, WalRedoError> {
|
||||
@@ -156,8 +156,7 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
return Err(WalRedoError::InvalidRequest);
|
||||
}
|
||||
|
||||
let base_img_lsn = base_img.as_ref().map(|p| p.0).unwrap_or(Lsn::INVALID);
|
||||
let mut img = base_img.map(|p| p.1);
|
||||
let mut img: Option<Bytes> = base_img;
|
||||
let mut batch_neon = can_apply_in_neon(&records[0].1);
|
||||
let mut batch_start = 0;
|
||||
for i in 1..records.len() {
|
||||
@@ -171,7 +170,6 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
key,
|
||||
lsn,
|
||||
img,
|
||||
base_img_lsn,
|
||||
&records[batch_start..i],
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
@@ -191,7 +189,6 @@ impl WalRedoManager for PostgresRedoManager {
|
||||
key,
|
||||
lsn,
|
||||
img,
|
||||
base_img_lsn,
|
||||
&records[batch_start..],
|
||||
self.conf.wal_redo_timeout,
|
||||
pg_version,
|
||||
@@ -226,13 +223,11 @@ impl PostgresRedoManager {
|
||||
///
|
||||
/// Process one request for WAL redo using wal-redo postgres
|
||||
///
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn apply_batch_postgres(
|
||||
&self,
|
||||
key: Key,
|
||||
lsn: Lsn,
|
||||
base_img: Option<Bytes>,
|
||||
base_img_lsn: Lsn,
|
||||
records: &[(Lsn, NeonWalRecord)],
|
||||
wal_redo_timeout: Duration,
|
||||
pg_version: u32,
|
||||
@@ -287,12 +282,9 @@ impl PostgresRedoManager {
|
||||
// next request will launch a new one.
|
||||
if result.is_err() {
|
||||
error!(
|
||||
"error applying {} WAL records {}..{} ({} bytes) to base image with LSN {} to reconstruct page image at LSN {}",
|
||||
"error applying {} WAL records ({} bytes) to reconstruct page image at LSN {}",
|
||||
records.len(),
|
||||
records.first().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
records.last().map(|p| p.0).unwrap_or(Lsn(0)),
|
||||
nbytes,
|
||||
base_img_lsn,
|
||||
lsn
|
||||
);
|
||||
let process = process_guard.take().unwrap();
|
||||
@@ -930,7 +922,8 @@ impl NoLeakChild {
|
||||
|
||||
match child.wait() {
|
||||
Ok(exit_status) => {
|
||||
info!(exit_status = %exit_status, "wait successful");
|
||||
// log at error level since .kill() is something we only do on errors ATM
|
||||
error!(exit_status = %exit_status, "wait successful");
|
||||
}
|
||||
Err(e) => {
|
||||
error!(error = %e, "wait error; might leak the child process; it will show as zombie (defunct)");
|
||||
|
||||
@@ -464,12 +464,12 @@ pg_init_libpagestore(void)
|
||||
NULL, NULL, NULL);
|
||||
DefineCustomIntVariable("neon.readahead_buffer_size",
|
||||
"number of prefetches to buffer",
|
||||
"This buffer is used to hold and manage prefetched "
|
||||
"data; so it is important that this buffer is at "
|
||||
"least as large as the configured value of all "
|
||||
"tablespaces' effective_io_concurrency and "
|
||||
"maintenance_io_concurrency, and your sessions' "
|
||||
"values for these settings.",
|
||||
"This buffer is used to store prefetched data; so "
|
||||
"it is important that this buffer is at least as "
|
||||
"large as the configured value of all tablespaces' "
|
||||
"effective_io_concurrency and maintenance_io_concurrency, "
|
||||
"your sessions' values of these, and the value for "
|
||||
"seqscan_prefetch_buffers.",
|
||||
&readahead_buffer_size,
|
||||
128, 16, 1024,
|
||||
PGC_USERSET,
|
||||
|
||||
@@ -242,14 +242,6 @@ PrefetchState *MyPState;
|
||||
) \
|
||||
)
|
||||
|
||||
#define ReceiveBufferNeedsCompaction() (\
|
||||
(MyPState->n_responses_buffered / 8) < ( \
|
||||
MyPState->ring_receive - \
|
||||
MyPState->ring_last - \
|
||||
MyPState->n_responses_buffered \
|
||||
) \
|
||||
)
|
||||
|
||||
int n_prefetch_hits = 0;
|
||||
int n_prefetch_misses = 0;
|
||||
int n_prefetch_missed_caches = 0;
|
||||
@@ -257,99 +249,17 @@ int n_prefetch_dupes = 0;
|
||||
|
||||
XLogRecPtr prefetch_lsn = 0;
|
||||
|
||||
static bool compact_prefetch_buffers(void);
|
||||
static void consume_prefetch_responses(void);
|
||||
static uint64 prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_read(PrefetchRequest *slot);
|
||||
static void prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force_lsn);
|
||||
static bool prefetch_wait_for(uint64 ring_index);
|
||||
static void prefetch_cleanup_trailing_unused(void);
|
||||
static void prefetch_cleanup(void);
|
||||
static inline void prefetch_set_unused(uint64 ring_index);
|
||||
|
||||
static XLogRecPtr neon_get_request_lsn(bool *latest, RelFileNode rnode,
|
||||
ForkNumber forknum, BlockNumber blkno);
|
||||
|
||||
static bool
|
||||
compact_prefetch_buffers(void)
|
||||
{
|
||||
uint64 empty_ring_index = MyPState->ring_last;
|
||||
uint64 search_ring_index = MyPState->ring_receive;
|
||||
int n_moved = 0;
|
||||
|
||||
if (MyPState->ring_receive == MyPState->ring_last)
|
||||
return false;
|
||||
|
||||
while (search_ring_index > MyPState->ring_last)
|
||||
{
|
||||
search_ring_index--;
|
||||
if (GetPrfSlot(search_ring_index)->status == PRFS_UNUSED)
|
||||
{
|
||||
empty_ring_index = search_ring_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we have established:
|
||||
* slots < search_ring_index may be unused (not scanned)
|
||||
* slots >= search_ring_index and <= empty_ring_index are unused
|
||||
* slots > empty_ring_index are in use, or outside our buffer's range.
|
||||
*
|
||||
* Therefore, there is a gap of at least one unused items between
|
||||
* search_ring_index and empty_ring_index, which grows as we hit
|
||||
* more unused items while moving backwards through the array.
|
||||
*/
|
||||
|
||||
while (search_ring_index > MyPState->ring_last)
|
||||
{
|
||||
PrefetchRequest *source_slot;
|
||||
PrefetchRequest *target_slot;
|
||||
bool found;
|
||||
|
||||
search_ring_index--;
|
||||
|
||||
source_slot = GetPrfSlot(search_ring_index);
|
||||
|
||||
if (source_slot->status == PRFS_UNUSED)
|
||||
continue;
|
||||
|
||||
target_slot = GetPrfSlot(empty_ring_index);
|
||||
|
||||
Assert(source_slot->status == PRFS_RECEIVED);
|
||||
Assert(target_slot->status == PRFS_UNUSED);
|
||||
|
||||
target_slot->buftag = source_slot->buftag;
|
||||
target_slot->status = source_slot->status;
|
||||
target_slot->response = source_slot->response;
|
||||
target_slot->effective_request_lsn = source_slot->effective_request_lsn;
|
||||
target_slot->my_ring_index = empty_ring_index;
|
||||
|
||||
prfh_delete(MyPState->prf_hash, source_slot);
|
||||
prfh_insert(MyPState->prf_hash, target_slot, &found);
|
||||
|
||||
Assert(!found);
|
||||
|
||||
/* Adjust the location of our known-empty slot */
|
||||
empty_ring_index--;
|
||||
|
||||
source_slot->status = PRFS_UNUSED;
|
||||
source_slot->buftag = (BufferTag) {0};
|
||||
source_slot->response = NULL;
|
||||
source_slot->my_ring_index = 0;
|
||||
source_slot->effective_request_lsn = 0;
|
||||
|
||||
n_moved++;
|
||||
}
|
||||
|
||||
if (MyPState->ring_last != empty_ring_index)
|
||||
{
|
||||
MyPState->ring_last = empty_ring_index;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
readahead_buffer_resize(int newsize, void *extra)
|
||||
{
|
||||
@@ -413,7 +323,7 @@ readahead_buffer_resize(int newsize, void *extra)
|
||||
prfh_insert(newPState->prf_hash, newslot, &found);
|
||||
|
||||
Assert(!found);
|
||||
|
||||
|
||||
switch (newslot->status)
|
||||
{
|
||||
case PRFS_UNUSED:
|
||||
@@ -460,7 +370,7 @@ consume_prefetch_responses(void)
|
||||
}
|
||||
|
||||
static void
|
||||
prefetch_cleanup_trailing_unused(void)
|
||||
prefetch_cleanup(void)
|
||||
{
|
||||
uint64 ring_index;
|
||||
PrefetchRequest *slot;
|
||||
@@ -621,10 +531,7 @@ prefetch_set_unused(uint64 ring_index)
|
||||
|
||||
/* run cleanup if we're holding back ring_last */
|
||||
if (MyPState->ring_last == ring_index)
|
||||
prefetch_cleanup_trailing_unused();
|
||||
/* ... and try to store the buffered responses more compactly if > 12.5% of the buffer is gaps */
|
||||
else if (ReceiveBufferNeedsCompaction())
|
||||
compact_prefetch_buffers();
|
||||
prefetch_cleanup();
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -795,31 +702,20 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
|
||||
|
||||
Assert(slot->status != PRFS_UNUSED);
|
||||
|
||||
/*
|
||||
* If there is good reason to run compaction on the prefetch buffers,
|
||||
* try to do that.
|
||||
*/
|
||||
if (ReceiveBufferNeedsCompaction() && compact_prefetch_buffers())
|
||||
/* We have the slot for ring_last, so that must still be in progress */
|
||||
switch (slot->status)
|
||||
{
|
||||
Assert(slot->status == PRFS_UNUSED);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We have the slot for ring_last, so that must still be in progress */
|
||||
switch (slot->status)
|
||||
{
|
||||
case PRFS_REQUESTED:
|
||||
Assert(MyPState->ring_receive == cleanup_index);
|
||||
prefetch_wait_for(cleanup_index);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
case PRFS_TAG_REMAINS:
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
default:
|
||||
pg_unreachable();
|
||||
}
|
||||
case PRFS_REQUESTED:
|
||||
Assert(MyPState->ring_receive == cleanup_index);
|
||||
prefetch_wait_for(cleanup_index);
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
case PRFS_RECEIVED:
|
||||
case PRFS_TAG_REMAINS:
|
||||
prefetch_set_unused(cleanup_index);
|
||||
break;
|
||||
default:
|
||||
pg_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1206,7 +1102,7 @@ PageIsEmptyHeapPage(char *buffer)
|
||||
}
|
||||
|
||||
static void
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool force)
|
||||
neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer)
|
||||
{
|
||||
XLogRecPtr lsn = PageGetLSN(buffer);
|
||||
|
||||
@@ -1220,7 +1116,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
* correctness, the non-logged updates are not critical. But we want to
|
||||
* have a reasonably up-to-date VM and FSM in the page server.
|
||||
*/
|
||||
if ((force || forknum == FSM_FORKNUM || forknum == VISIBILITYMAP_FORKNUM) && !RecoveryInProgress())
|
||||
if (forknum == FSM_FORKNUM && !RecoveryInProgress())
|
||||
{
|
||||
/* FSM is never WAL-logged and we don't care. */
|
||||
XLogRecPtr recptr;
|
||||
@@ -1229,7 +1125,30 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, ch
|
||||
XLogFlush(recptr);
|
||||
lsn = recptr;
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
|
||||
(errmsg("FSM page %u of relation %u/%u/%u.%u was force logged. Evicted at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
reln->smgr_rnode.node.relNode,
|
||||
forknum, LSN_FORMAT_ARGS(lsn))));
|
||||
}
|
||||
else if (forknum == VISIBILITYMAP_FORKNUM && !RecoveryInProgress())
|
||||
{
|
||||
/*
|
||||
* Always WAL-log vm. We should never miss clearing visibility map
|
||||
* bits.
|
||||
*
|
||||
* TODO Is it too bad for performance? Hopefully we do not evict
|
||||
* actively used vm too often.
|
||||
*/
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = log_newpage_copy(&reln->smgr_rnode.node, forknum, blocknum, buffer, false);
|
||||
XLogFlush(recptr);
|
||||
lsn = recptr;
|
||||
|
||||
ereport(SmgrTrace,
|
||||
(errmsg("Visibilitymap page %u of relation %u/%u/%u.%u was force logged at lsn=%X/%X",
|
||||
blocknum,
|
||||
reln->smgr_rnode.node.spcNode,
|
||||
reln->smgr_rnode.node.dbNode,
|
||||
@@ -1624,7 +1543,6 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
char *buffer, bool skipFsync)
|
||||
{
|
||||
XLogRecPtr lsn;
|
||||
BlockNumber n_blocks = 0;
|
||||
|
||||
switch (reln->smgr_relpersistence)
|
||||
{
|
||||
@@ -1664,16 +1582,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
errhint("This limit is defined by neon.max_cluster_size GUC")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Usually Postgres doesn't extend relation on more than one page
|
||||
* (leaving holes). But this rule is violated in PG-15 where CreateAndCopyRelationData
|
||||
* call smgrextend for destination relation n using size of source relation
|
||||
*/
|
||||
get_cached_relsize(reln->smgr_rnode.node, forkNum, &n_blocks);
|
||||
while (n_blocks < blkno)
|
||||
neon_wallog_page(reln, forkNum, n_blocks++, buffer, true);
|
||||
|
||||
neon_wallog_page(reln, forkNum, blkno, buffer, false);
|
||||
neon_wallog_page(reln, forkNum, blkno, buffer);
|
||||
set_cached_relsize(reln->smgr_rnode.node, forkNum, blkno + 1);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
@@ -1920,7 +1829,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
|
||||
/* buffer was used, clean up for later reuse */
|
||||
prefetch_set_unused(ring_index);
|
||||
prefetch_cleanup_trailing_unused();
|
||||
prefetch_cleanup();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2101,7 +2010,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
neon_wallog_page(reln, forknum, blocknum, buffer, false);
|
||||
neon_wallog_page(reln, forknum, blocknum, buffer);
|
||||
|
||||
lsn = PageGetLSN(buffer);
|
||||
elog(SmgrTrace, "smgrwrite called for %u/%u/%u.%u blk %u, page LSN: %X/%08X",
|
||||
|
||||
@@ -28,7 +28,6 @@ use std::{borrow::Cow, future::Future, net::SocketAddr};
|
||||
use tokio::{net::TcpListener, task::JoinError};
|
||||
use tracing::info;
|
||||
use utils::project_git_version;
|
||||
use utils::sentry_init::{init_sentry, release_name};
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
@@ -46,9 +45,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
.with_target(false)
|
||||
.init();
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[]);
|
||||
|
||||
let arg_matches = cli().get_matches();
|
||||
|
||||
let tls_config = match (
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# avoid running regular linting script that checks every feature.
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
# no extra features to test currently, add more here when needed
|
||||
cargo clippy --locked --all --all-targets -- -A unknown_lints -D warnings
|
||||
cargo clippy --locked --all --all-targets --features testing -- -A unknown_lints -D warnings
|
||||
else
|
||||
# * `-A unknown_lints` – do not warn about unknown lint suppressions
|
||||
# that people with newer toolchains might use
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
use anyhow::{bail, Context, Result};
|
||||
use clap::{value_parser, Arg, ArgAction, Command};
|
||||
use const_format::formatcp;
|
||||
use nix::unistd::Pid;
|
||||
use remote_storage::RemoteStorageConfig;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{ErrorKind, Write};
|
||||
@@ -14,7 +15,7 @@ use tokio::sync::mpsc;
|
||||
use toml_edit::Document;
|
||||
use tracing::*;
|
||||
use url::{ParseError, Url};
|
||||
use utils::pid_file;
|
||||
use utils::lock_file;
|
||||
|
||||
use metrics::set_build_info_metric;
|
||||
use safekeeper::broker;
|
||||
@@ -34,14 +35,11 @@ use utils::{
|
||||
http::endpoint,
|
||||
id::NodeId,
|
||||
logging::{self, LogFormat},
|
||||
project_git_version,
|
||||
sentry_init::{init_sentry, release_name},
|
||||
signals, tcp_listener,
|
||||
project_git_version, signals, tcp_listener,
|
||||
};
|
||||
|
||||
const PID_FILE_NAME: &str = "safekeeper.pid";
|
||||
const ID_FILE_NAME: &str = "safekeeper.id";
|
||||
|
||||
project_git_version!(GIT_VERSION);
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@@ -135,8 +133,6 @@ fn main() -> anyhow::Result<()> {
|
||||
conf.log_format = LogFormat::from_config(log_format)?;
|
||||
}
|
||||
|
||||
// initialize sentry if SENTRY_DSN is provided
|
||||
let _sentry_guard = init_sentry(release_name!(), &[("node_id", &conf.my_id.to_string())]);
|
||||
start_safekeeper(conf, given_id, arg_matches.get_flag("init"))
|
||||
}
|
||||
|
||||
@@ -146,13 +142,28 @@ fn start_safekeeper(mut conf: SafeKeeperConf, given_id: Option<NodeId>, init: bo
|
||||
|
||||
// Prevent running multiple safekeepers on the same directory
|
||||
let lock_file_path = conf.workdir.join(PID_FILE_NAME);
|
||||
let lock_file =
|
||||
pid_file::claim_for_current_process(&lock_file_path).context("claim pid file")?;
|
||||
info!("Claimed pid file at {lock_file_path:?}");
|
||||
|
||||
let lock_file = match lock_file::create_lock_file(&lock_file_path, Pid::this().to_string()) {
|
||||
lock_file::LockCreationResult::Created {
|
||||
new_lock_contents,
|
||||
file,
|
||||
} => {
|
||||
info!("Created lock file at {lock_file_path:?} with contenst {new_lock_contents}");
|
||||
file
|
||||
}
|
||||
lock_file::LockCreationResult::AlreadyLocked {
|
||||
existing_lock_contents,
|
||||
} => anyhow::bail!(
|
||||
"Could not lock pid file; safekeeper is already running in {:?} with PID {}",
|
||||
conf.workdir,
|
||||
existing_lock_contents
|
||||
),
|
||||
lock_file::LockCreationResult::CreationFailed(e) => {
|
||||
return Err(e.context(format!("Failed to create lock file at {lock_file_path:?}")))
|
||||
}
|
||||
};
|
||||
// ensure that the lock file is held even if the main thread of the process is panics
|
||||
// we need to release the lock file only when the current process is gone
|
||||
std::mem::forget(lock_file);
|
||||
let _ = Box::leak(Box::new(lock_file));
|
||||
|
||||
// Set or read our ID.
|
||||
set_id(&mut conf, given_id)?;
|
||||
|
||||
@@ -226,7 +226,6 @@ impl ReplicationConn {
|
||||
let mut end_pos = stop_pos.unwrap_or(inmem_state.commit_lsn);
|
||||
|
||||
let mut wal_reader = WalReader::new(
|
||||
spg.conf.workdir.clone(),
|
||||
spg.conf.timeline_dir(&tli.ttid),
|
||||
&persisted_state,
|
||||
start_pos,
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::time::Duration;
|
||||
use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
|
||||
use postgres_ffi::XLogFileName;
|
||||
use postgres_ffi::{XLogSegNo, PG_TLI};
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use tokio::fs::File;
|
||||
use tokio::runtime::Builder;
|
||||
|
||||
@@ -151,7 +151,7 @@ async fn update_task(
|
||||
let timeline_dir = conf.timeline_dir(&ttid);
|
||||
|
||||
let handle = tokio::spawn(
|
||||
backup_task_main(ttid, timeline_dir, conf.workdir.clone(), shutdown_rx)
|
||||
backup_task_main(ttid, timeline_dir, shutdown_rx)
|
||||
.instrument(info_span!("WAL backup task", ttid = %ttid)),
|
||||
);
|
||||
|
||||
@@ -182,10 +182,10 @@ async fn wal_backup_launcher_main_loop(
|
||||
|
||||
let conf_ = conf.clone();
|
||||
REMOTE_STORAGE.get_or_init(|| {
|
||||
conf_
|
||||
.remote_storage
|
||||
.as_ref()
|
||||
.map(|c| GenericRemoteStorage::from_config(c).expect("failed to create remote storage"))
|
||||
conf_.remote_storage.as_ref().map(|c| {
|
||||
GenericRemoteStorage::from_config(conf_.workdir, c)
|
||||
.expect("failed to create remote storage")
|
||||
})
|
||||
});
|
||||
|
||||
// Presense in this map means launcher is aware s3 offloading is needed for
|
||||
@@ -234,7 +234,6 @@ async fn wal_backup_launcher_main_loop(
|
||||
struct WalBackupTask {
|
||||
timeline: Arc<Timeline>,
|
||||
timeline_dir: PathBuf,
|
||||
workspace_dir: PathBuf,
|
||||
wal_seg_size: usize,
|
||||
commit_lsn_watch_rx: watch::Receiver<Lsn>,
|
||||
}
|
||||
@@ -243,7 +242,6 @@ struct WalBackupTask {
|
||||
async fn backup_task_main(
|
||||
ttid: TenantTimelineId,
|
||||
timeline_dir: PathBuf,
|
||||
workspace_dir: PathBuf,
|
||||
mut shutdown_rx: Receiver<()>,
|
||||
) {
|
||||
info!("started");
|
||||
@@ -259,7 +257,6 @@ async fn backup_task_main(
|
||||
commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(),
|
||||
timeline: tli,
|
||||
timeline_dir,
|
||||
workspace_dir,
|
||||
};
|
||||
|
||||
// task is spinned up only when wal_seg_size already initialized
|
||||
@@ -324,7 +321,6 @@ impl WalBackupTask {
|
||||
commit_lsn,
|
||||
self.wal_seg_size,
|
||||
&self.timeline_dir,
|
||||
&self.workspace_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -357,12 +353,11 @@ pub async fn backup_lsn_range(
|
||||
end_lsn: Lsn,
|
||||
wal_seg_size: usize,
|
||||
timeline_dir: &Path,
|
||||
workspace_dir: &Path,
|
||||
) -> Result<Lsn> {
|
||||
let mut res = start_lsn;
|
||||
let segments = get_segments(start_lsn, end_lsn, wal_seg_size);
|
||||
for s in &segments {
|
||||
backup_single_segment(s, timeline_dir, workspace_dir)
|
||||
backup_single_segment(s, timeline_dir)
|
||||
.await
|
||||
.with_context(|| format!("offloading segno {}", s.seg_no))?;
|
||||
|
||||
@@ -377,24 +372,11 @@ pub async fn backup_lsn_range(
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
async fn backup_single_segment(
|
||||
seg: &Segment,
|
||||
timeline_dir: &Path,
|
||||
workspace_dir: &Path,
|
||||
) -> Result<()> {
|
||||
let segment_file_path = seg.file_path(timeline_dir)?;
|
||||
let remote_segment_path = segment_file_path
|
||||
.strip_prefix(&workspace_dir)
|
||||
.context("Failed to strip workspace dir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {segment_file_path:?} for base {workspace_dir:?}",
|
||||
)
|
||||
})?;
|
||||
async fn backup_single_segment(seg: &Segment, timeline_dir: &Path) -> Result<()> {
|
||||
let segment_file_name = seg.file_path(timeline_dir)?;
|
||||
|
||||
backup_object(&segment_file_path, &remote_segment_path, seg.size()).await?;
|
||||
debug!("Backup of {} done", segment_file_path.display());
|
||||
backup_object(&segment_file_name, seg.size()).await?;
|
||||
debug!("Backup of {} done", segment_file_name.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -444,7 +426,7 @@ fn get_segments(start: Lsn, end: Lsn, seg_size: usize) -> Vec<Segment> {
|
||||
|
||||
static REMOTE_STORAGE: OnceCell<Option<GenericRemoteStorage>> = OnceCell::new();
|
||||
|
||||
async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize) -> Result<()> {
|
||||
async fn backup_object(source_file: &Path, size: usize) -> Result<()> {
|
||||
let storage = REMOTE_STORAGE
|
||||
.get()
|
||||
.expect("failed to get remote storage")
|
||||
@@ -459,12 +441,12 @@ async fn backup_object(source_file: &Path, target_file: &RemotePath, size: usize
|
||||
})?);
|
||||
|
||||
storage
|
||||
.upload_storage_object(Box::new(file), size, target_file)
|
||||
.upload_storage_object(Box::new(file), size, source_file)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn read_object(
|
||||
file_path: &RemotePath,
|
||||
file_path: PathBuf,
|
||||
offset: u64,
|
||||
) -> anyhow::Result<Pin<Box<dyn tokio::io::AsyncRead>>> {
|
||||
let storage = REMOTE_STORAGE
|
||||
@@ -473,13 +455,19 @@ pub async fn read_object(
|
||||
.as_ref()
|
||||
.context("No remote storage configured")?;
|
||||
|
||||
info!("segment download about to start from remote path {file_path:?} at offset {offset}");
|
||||
|
||||
info!(
|
||||
"segment download about to start for local path {} at offset {}",
|
||||
file_path.display(),
|
||||
offset
|
||||
);
|
||||
let download = storage
|
||||
.download_storage_object(Some((offset, None)), file_path)
|
||||
.download_storage_object(Some((offset, None)), &file_path)
|
||||
.await
|
||||
.with_context(|| {
|
||||
format!("Failed to open WAL segment download stream for remote path {file_path:?}")
|
||||
format!(
|
||||
"Failed to open WAL segment download stream for local path {}",
|
||||
file_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(download.download_stream)
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
//! Note that last file has `.partial` suffix, that's different from postgres.
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
use remote_storage::RemotePath;
|
||||
|
||||
use std::io::{self, Seek, SeekFrom};
|
||||
use std::pin::Pin;
|
||||
@@ -446,7 +445,6 @@ fn remove_segments_from_disk(
|
||||
}
|
||||
|
||||
pub struct WalReader {
|
||||
workdir: PathBuf,
|
||||
timeline_dir: PathBuf,
|
||||
wal_seg_size: usize,
|
||||
pos: Lsn,
|
||||
@@ -461,7 +459,6 @@ pub struct WalReader {
|
||||
|
||||
impl WalReader {
|
||||
pub fn new(
|
||||
workdir: PathBuf,
|
||||
timeline_dir: PathBuf,
|
||||
state: &SafeKeeperState,
|
||||
start_pos: Lsn,
|
||||
@@ -481,7 +478,6 @@ impl WalReader {
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
workdir,
|
||||
timeline_dir,
|
||||
wal_seg_size: state.server.wal_seg_size as usize,
|
||||
pos: start_pos,
|
||||
@@ -549,17 +545,7 @@ impl WalReader {
|
||||
|
||||
// Try to open remote file, if remote reads are enabled
|
||||
if self.enable_remote_read {
|
||||
let remote_wal_file_path = wal_file_path
|
||||
.strip_prefix(&self.workdir)
|
||||
.context("Failed to strip workdir prefix")
|
||||
.and_then(RemotePath::new)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to resolve remote part of path {:?} for base {:?}",
|
||||
wal_file_path, self.workdir,
|
||||
)
|
||||
})?;
|
||||
return read_object(&remote_wal_file_path, xlogoff as u64).await;
|
||||
return read_object(wal_file_path, xlogoff as u64).await;
|
||||
}
|
||||
|
||||
bail!("WAL segment is not found")
|
||||
|
||||
@@ -6,6 +6,9 @@ Prerequisites:
|
||||
- Correctly configured Python, see [`/docs/sourcetree.md`](/docs/sourcetree.md#using-python)
|
||||
- Neon and Postgres binaries
|
||||
- See the root [README.md](/README.md) for build directions
|
||||
If you want to test tests with test-only APIs, you would need to add `--features testing` to Rust code build commands.
|
||||
For convenience, repository cargo config contains `build_testing` alias, that serves as a subcommand, adding the required feature flags.
|
||||
Usage example: `cargo build_testing --release` is equivalent to `cargo build --features testing --release`
|
||||
- Tests can be run from the git tree; or see the environment variables
|
||||
below to run from other directories.
|
||||
- The neon git repo, including the postgres submodule
|
||||
|
||||
@@ -33,7 +33,7 @@ from _pytest.config import Config
|
||||
from _pytest.fixtures import FixtureRequest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture
|
||||
from fixtures.utils import Fn, allure_attach_from_dir, etcd_path, get_self_dir, subprocess_capture
|
||||
|
||||
# Type-related stuff
|
||||
from psycopg2.extensions import connection as PgConnection
|
||||
@@ -587,7 +587,6 @@ class NeonEnvBuilder:
|
||||
auth_enabled: bool = False,
|
||||
rust_log_override: Optional[str] = None,
|
||||
default_branch_name: str = DEFAULT_BRANCH_NAME,
|
||||
testing_mode: bool = True,
|
||||
):
|
||||
self.repo_dir = repo_dir
|
||||
self.rust_log_override = rust_log_override
|
||||
@@ -609,7 +608,6 @@ class NeonEnvBuilder:
|
||||
self.neon_binpath = neon_binpath
|
||||
self.pg_distrib_dir = pg_distrib_dir
|
||||
self.pg_version = pg_version
|
||||
self.testing_mode = testing_mode
|
||||
|
||||
def init(self) -> NeonEnv:
|
||||
# Cannot create more than one environment from one builder
|
||||
@@ -860,7 +858,6 @@ class NeonEnv:
|
||||
http=self.port_distributor.get_port(),
|
||||
)
|
||||
pageserver_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
|
||||
pageserver_testing_mode = "true" if config.testing_mode else "false"
|
||||
|
||||
toml += textwrap.dedent(
|
||||
f"""
|
||||
@@ -869,7 +866,6 @@ class NeonEnv:
|
||||
listen_pg_addr = 'localhost:{pageserver_port.pg}'
|
||||
listen_http_addr = 'localhost:{pageserver_port.http}'
|
||||
auth_type = '{pageserver_auth_type}'
|
||||
testing_mode = {pageserver_testing_mode}
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -982,10 +978,6 @@ def _shared_simple_env(
|
||||
pg_distrib_dir=pg_distrib_dir,
|
||||
pg_version=pg_version,
|
||||
run_id=run_id,
|
||||
# Disable failpoint support. Failpoints could have unexpected consequences
|
||||
# when the pageserver is shared by concurrent tests. Also, it might affect
|
||||
# performance, and we use the shared simple env in performance tests.
|
||||
testing_mode=False,
|
||||
) as builder:
|
||||
env = builder.init_start()
|
||||
|
||||
@@ -1056,10 +1048,11 @@ class PageserverApiException(Exception):
|
||||
|
||||
|
||||
class PageserverHttpClient(requests.Session):
|
||||
def __init__(self, port: int, auth_token: Optional[str] = None):
|
||||
def __init__(self, port: int, is_testing_enabled_or_skip: Fn, auth_token: Optional[str] = None):
|
||||
super().__init__()
|
||||
self.port = port
|
||||
self.auth_token = auth_token
|
||||
self.is_testing_enabled_or_skip = is_testing_enabled_or_skip
|
||||
|
||||
if auth_token is not None:
|
||||
self.headers["Authorization"] = f"Bearer {auth_token}"
|
||||
@@ -1078,6 +1071,8 @@ class PageserverHttpClient(requests.Session):
|
||||
self.get(f"http://localhost:{self.port}/v1/status").raise_for_status()
|
||||
|
||||
def configure_failpoints(self, config_strings: Tuple[str, str] | List[Tuple[str, str]]):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
if isinstance(config_strings, tuple):
|
||||
pairs = [config_strings]
|
||||
else:
|
||||
@@ -1124,14 +1119,6 @@ class PageserverHttpClient(requests.Session):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/detach")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_load(self, tenant_id: TenantId):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_ignore(self, tenant_id: TenantId):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/ignore")
|
||||
self.verbose_error(res)
|
||||
|
||||
def tenant_status(self, tenant_id: TenantId) -> Dict[Any, Any]:
|
||||
res = self.get(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
|
||||
self.verbose_error(res)
|
||||
@@ -1217,6 +1204,8 @@ class PageserverHttpClient(requests.Session):
|
||||
def timeline_gc(
|
||||
self, tenant_id: TenantId, timeline_id: TimelineId, gc_horizon: Optional[int]
|
||||
) -> dict[str, Any]:
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(
|
||||
f"Requesting GC: tenant {tenant_id}, timeline {timeline_id}, gc_horizon {repr(gc_horizon)}"
|
||||
)
|
||||
@@ -1232,6 +1221,8 @@ class PageserverHttpClient(requests.Session):
|
||||
return res_json
|
||||
|
||||
def timeline_compact(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(f"Requesting compact: tenant {tenant_id}, timeline {timeline_id}")
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/compact"
|
||||
@@ -1255,6 +1246,8 @@ class PageserverHttpClient(requests.Session):
|
||||
return res_json
|
||||
|
||||
def timeline_checkpoint(self, tenant_id: TenantId, timeline_id: TimelineId):
|
||||
self.is_testing_enabled_or_skip()
|
||||
|
||||
log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
|
||||
res = self.put(
|
||||
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint"
|
||||
@@ -1814,6 +1807,10 @@ class NeonPageserver(PgProtocol):
|
||||
):
|
||||
self.stop(immediate=True)
|
||||
|
||||
def is_testing_enabled_or_skip(self):
|
||||
if '"testing"' not in self.version:
|
||||
pytest.skip("pageserver was built without 'testing' feature")
|
||||
|
||||
def is_profiling_enabled_or_skip(self):
|
||||
if '"profiling"' not in self.version:
|
||||
pytest.skip("pageserver was built without 'profiling' feature")
|
||||
@@ -1822,6 +1819,7 @@ class NeonPageserver(PgProtocol):
|
||||
return PageserverHttpClient(
|
||||
port=self.service_port.http,
|
||||
auth_token=auth_token,
|
||||
is_testing_enabled_or_skip=self.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
def assert_no_errors(self):
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
First make a release build. The profiling flag is optional, used only for tests that
|
||||
generate flame graphs. The `-s` flag just silences a lot of output, and makes it
|
||||
easier to see if you have compile errors without scrolling up.
|
||||
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=profiling" make -s -j8`
|
||||
`BUILD_TYPE=release CARGO_BUILD_FLAGS="--features=testing,profiling" make -s -j8`
|
||||
|
||||
NOTE: the `profiling` flag only works on linux because we use linux-specific
|
||||
libc APIs like `libc::timer_t`.
|
||||
|
||||
@@ -42,8 +42,7 @@ def test_bulk_update(neon_env_builder: NeonEnvBuilder, zenbenchmark, fillfactor)
|
||||
|
||||
cur.execute("drop table t")
|
||||
cur.execute("set enable_seqscan_prefetch=on")
|
||||
cur.execute("set effective_io_concurrency=32")
|
||||
cur.execute("set maintenance_io_concurrency=32")
|
||||
cur.execute("set seqscan_prefetch_buffers=100")
|
||||
|
||||
cur.execute(f"create table t2(x integer) WITH (fillfactor={fillfactor})")
|
||||
|
||||
|
||||
@@ -1,14 +1,10 @@
|
||||
from contextlib import closing
|
||||
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, wait_for_last_record_lsn
|
||||
from fixtures.types import Lsn, TenantId, TimelineId
|
||||
from fixtures.utils import query_scalar
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
|
||||
|
||||
# This test demonstrates how to collect a read trace. It's useful until
|
||||
# it gets replaced by a test that actually does stuff with the trace.
|
||||
#
|
||||
# Additionally, tests that pageserver is able to create tenants with custom configs.
|
||||
def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start()
|
||||
@@ -27,12 +23,6 @@ def test_read_request_tracing(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("create table t (i integer);")
|
||||
cur.execute(f"insert into t values (generate_series(1,{10000}));")
|
||||
cur.execute("select count(*) from t;")
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
# wait until pageserver receives that data
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# Stop pg so we drop the connection and flush the traces
|
||||
pg.stop()
|
||||
@@ -327,6 +327,7 @@ def check_neon_works(
|
||||
auth_token = snapshot_config["pageserver"]["auth_token"]
|
||||
pageserver_http = PageserverHttpClient(
|
||||
port=pageserver_port,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
auth_token=auth_token,
|
||||
)
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ def test_pageserver_recovery(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.pageserver_config_override = "tenant_config={checkpoint_distance = 1048576}"
|
||||
|
||||
env = neon_env_builder.init()
|
||||
env.pageserver.is_testing_enabled_or_skip()
|
||||
|
||||
neon_env_builder.start()
|
||||
|
||||
|
||||
@@ -71,10 +71,8 @@ def test_remote_storage_backup_and_restore(
|
||||
# FIXME retry downloads without throwing errors
|
||||
env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
|
||||
# we have a bunch of pytest.raises for these below
|
||||
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
env.pageserver.allowed_errors.append(".*tenant already exists.*")
|
||||
env.pageserver.allowed_errors.append(".*attach is already in progress.*")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
@@ -138,7 +136,7 @@ def test_remote_storage_backup_and_restore(
|
||||
|
||||
# assert cannot attach timeline that is scheduled for download
|
||||
# FIXME implement layer download retries
|
||||
with pytest.raises(Exception, match=f"tenant {tenant_id} already exists, state: Broken"):
|
||||
with pytest.raises(Exception, match="tenant already exists, current state: Broken"):
|
||||
client.tenant_attach(tenant_id)
|
||||
|
||||
tenant_status = client.tenant_status(tenant_id)
|
||||
@@ -151,7 +149,9 @@ def test_remote_storage_backup_and_restore(
|
||||
env.pageserver.start()
|
||||
|
||||
# ensure that an initiated attach operation survives pageserver restart
|
||||
with pytest.raises(Exception, match=f"tenant {tenant_id} already exists, state:"):
|
||||
with pytest.raises(
|
||||
Exception, match=r".*(tenant already exists|attach is already in progress).*"
|
||||
):
|
||||
client.tenant_attach(tenant_id)
|
||||
log.info("waiting for timeline redownload")
|
||||
wait_until(
|
||||
@@ -165,6 +165,7 @@ def test_remote_storage_backup_and_restore(
|
||||
assert (
|
||||
Lsn(detail["last_record_lsn"]) >= current_lsn
|
||||
), "current db Lsn should should not be less than the one stored on remote storage"
|
||||
assert not detail["awaits_download"]
|
||||
|
||||
pg = env.postgres.create_start("main")
|
||||
with pg.cursor() as cur:
|
||||
@@ -190,7 +191,7 @@ def test_remote_storage_upload_queue_retries(
|
||||
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_remote_storage_upload_queue_retries",
|
||||
test_name="test_remote_storage_backup_and_restore",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
@@ -352,7 +353,7 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_timeline_deletion_with_files_stuck_in_upload_queue",
|
||||
test_name="test_remote_storage_backup_and_restore",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
@@ -7,7 +7,6 @@ from fixtures.neon_fixtures import (
|
||||
NeonEnvBuilder,
|
||||
PageserverApiException,
|
||||
PageserverHttpClient,
|
||||
Postgres,
|
||||
RemoteStorageKind,
|
||||
available_remote_storages,
|
||||
wait_for_last_record_lsn,
|
||||
@@ -168,337 +167,3 @@ def test_detach_while_attaching(
|
||||
|
||||
with pg.cursor() as cur:
|
||||
cur.execute("SELECT COUNT(*) FROM foo")
|
||||
|
||||
|
||||
# Tests that `ignore` and `get` operations' combination is able to remove and restore the tenant in pageserver's memory.
|
||||
# * writes some data into tenant's timeline
|
||||
# * ensures it's synced with the remote storage
|
||||
# * `ignore` the tenant
|
||||
# * verify that ignored tenant files are generally unchanged, only an ignored mark had appeared
|
||||
# * verify the ignored tenant is gone from pageserver's memory
|
||||
# * restart the pageserver and verify that ignored tenant is still not loaded
|
||||
# * `load` the same tenant
|
||||
# * ensure that it's status is `Active` and it's present in pageserver's memory with all timelines
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.NOOP, RemoteStorageKind.MOCK_S3])
|
||||
def test_ignored_tenant_reattach(
|
||||
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_remote_storage_backup_and_restore",
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
ignored_tenant_id, _ = env.neon_cli.create_tenant()
|
||||
tenant_dir = env.repo_dir / "tenants" / str(ignored_tenant_id)
|
||||
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_before_ignore.sort()
|
||||
timelines_before_ignore = [
|
||||
timeline["timeline_id"]
|
||||
for timeline in pageserver_http.timeline_list(tenant_id=ignored_tenant_id)
|
||||
]
|
||||
files_before_ignore = [tenant_path for tenant_path in tenant_dir.glob("**/*")]
|
||||
|
||||
# ignore the tenant and veirfy it's not present in pageserver replies, with its files still on disk
|
||||
pageserver_http.tenant_ignore(ignored_tenant_id)
|
||||
|
||||
files_after_ignore_with_retain = [tenant_path for tenant_path in tenant_dir.glob("**/*")]
|
||||
new_files = set(files_after_ignore_with_retain) - set(files_before_ignore)
|
||||
disappeared_files = set(files_before_ignore) - set(files_after_ignore_with_retain)
|
||||
assert (
|
||||
len(disappeared_files) == 0
|
||||
), f"Tenant ignore should not remove files from disk, missing: {disappeared_files}"
|
||||
assert (
|
||||
len(new_files) == 1
|
||||
), f"Only tenant ignore file should appear on disk but got: {new_files}"
|
||||
|
||||
tenants_after_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
assert ignored_tenant_id not in tenants_after_ignore, "Ignored tenant should be missing"
|
||||
assert len(tenants_after_ignore) + 1 == len(
|
||||
tenants_before_ignore
|
||||
), "Only ignored tenant should be missing"
|
||||
|
||||
# restart the pageserver to ensure we don't load the ignore timeline
|
||||
env.pageserver.stop()
|
||||
env.pageserver.start()
|
||||
tenants_after_restart = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_after_restart.sort()
|
||||
assert (
|
||||
tenants_after_restart == tenants_after_ignore
|
||||
), "Ignored tenant should not be reloaded after pageserver restart"
|
||||
|
||||
# now, load it from the local files and expect it works
|
||||
pageserver_http.tenant_load(tenant_id=ignored_tenant_id)
|
||||
wait_until_tenant_status(pageserver_http, ignored_tenant_id, "Active", 5)
|
||||
|
||||
tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_after_attach.sort()
|
||||
assert tenants_after_attach == tenants_before_ignore, "Should have all tenants back"
|
||||
|
||||
timelines_after_ignore = [
|
||||
timeline["timeline_id"]
|
||||
for timeline in pageserver_http.timeline_list(tenant_id=ignored_tenant_id)
|
||||
]
|
||||
assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back"
|
||||
|
||||
|
||||
# Tests that it's possible to `load` tenants with missing layers and get them restored:
|
||||
# * writes some data into tenant's timeline
|
||||
# * ensures it's synced with the remote storage
|
||||
# * `ignore` the tenant
|
||||
# * removes all timeline's local layers
|
||||
# * `load` the same tenant
|
||||
# * ensure that it's status is `Active`
|
||||
# * check that timeline data is restored
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_ignored_tenant_download_missing_layers(
|
||||
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_ignored_tenant_download_and_attach",
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
data_id = 1
|
||||
data_secret = "very secret secret"
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
|
||||
|
||||
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_before_ignore.sort()
|
||||
timelines_before_ignore = [
|
||||
timeline["timeline_id"] for timeline in pageserver_http.timeline_list(tenant_id=tenant_id)
|
||||
]
|
||||
|
||||
# ignore the tenant and remove its layers
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
tenant_timeline_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
||||
layers_removed = False
|
||||
for dir_entry in tenant_timeline_dir.iterdir():
|
||||
if dir_entry.name.startswith("00000"):
|
||||
# Looks like a layer file. Remove it
|
||||
dir_entry.unlink()
|
||||
layers_removed = True
|
||||
assert layers_removed, f"Found no layers for tenant {tenant_timeline_dir}"
|
||||
|
||||
# now, load it from the local files and expect it to work due to remote storage restoration
|
||||
pageserver_http.tenant_load(tenant_id=tenant_id)
|
||||
wait_until_tenant_status(pageserver_http, tenant_id, "Active", 5)
|
||||
|
||||
tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
tenants_after_attach.sort()
|
||||
assert tenants_after_attach == tenants_before_ignore, "Should have all tenants back"
|
||||
|
||||
timelines_after_ignore = [
|
||||
timeline["timeline_id"] for timeline in pageserver_http.timeline_list(tenant_id=tenant_id)
|
||||
]
|
||||
assert timelines_before_ignore == timelines_after_ignore, "Should have all timelines back"
|
||||
|
||||
pg.stop()
|
||||
pg.start()
|
||||
ensure_test_data(data_id, data_secret, pg)
|
||||
|
||||
|
||||
# Tests that it's possible to `load` broken tenants:
|
||||
# * `ignore` a tenant
|
||||
# * removes its `metadata` file locally
|
||||
# * `load` the same tenant
|
||||
# * ensure that it's status is `Broken`
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_ignored_tenant_stays_broken_without_metadata(
|
||||
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_ignored_tenant_stays_broken_without_metadata",
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
# ignore the tenant and remove its metadata
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
tenant_timeline_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" / str(timeline_id)
|
||||
metadata_removed = False
|
||||
for dir_entry in tenant_timeline_dir.iterdir():
|
||||
if dir_entry.name == "metadata":
|
||||
# Looks like a layer file. Remove it
|
||||
dir_entry.unlink()
|
||||
metadata_removed = True
|
||||
assert metadata_removed, f"Failed to find metadata file in {tenant_timeline_dir}"
|
||||
|
||||
env.pageserver.allowed_errors.append(".*could not load tenant .*?: failed to load metadata.*")
|
||||
|
||||
# now, load it from the local files and expect it to be broken due to inability to load tenant files into memory
|
||||
pageserver_http.tenant_load(tenant_id=tenant_id)
|
||||
wait_until_tenant_status(pageserver_http, tenant_id, "Broken", 5)
|
||||
|
||||
|
||||
# Tests that attach is never working on a tenant, ignored or not, as long as it's not absent locally
|
||||
# Similarly, tests that it's not possible to schedule a `load` for tenat that's not ignored.
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_load_attach_negatives(
|
||||
neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_load_attach_negatives",
|
||||
)
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
|
||||
env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"tenant {tenant_id} already exists, state: Active",
|
||||
):
|
||||
pageserver_http.tenant_load(tenant_id)
|
||||
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"tenant {tenant_id} already exists, state: Active",
|
||||
):
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
|
||||
):
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("remote_storage_kind", [RemoteStorageKind.LOCAL_FS])
|
||||
def test_ignore_while_attaching(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_ignore_while_attaching",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
pg = env.postgres.create_start("main")
|
||||
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
tenant_id = TenantId(pg.safe_psql("show neon.tenant_id")[0][0])
|
||||
timeline_id = TimelineId(pg.safe_psql("show neon.timeline_id")[0][0])
|
||||
|
||||
data_id = 1
|
||||
data_secret = "very secret secret"
|
||||
insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, pg)
|
||||
|
||||
tenants_before_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
|
||||
# Detach it
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
# And re-attach, but stop attach task_mgr task from completing
|
||||
pageserver_http.configure_failpoints([("attach-before-activate", "return(5000)")])
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
# Run ignore on the task, thereby cancelling the attach.
|
||||
# XXX This should take priority over attach, i.e., it should cancel the attach task.
|
||||
# But neither the failpoint, nor the proper storage_sync2 download functions,
|
||||
# are sensitive to task_mgr::shutdown.
|
||||
# This problem is tracked in https://github.com/neondatabase/neon/issues/2996 .
|
||||
# So, for now, effectively, this ignore here will block until attach task completes.
|
||||
pageserver_http.tenant_ignore(tenant_id)
|
||||
|
||||
# Cannot attach it due to some local files existing
|
||||
env.pageserver.allowed_errors.append(
|
||||
".*Cannot attach tenant .*?, local tenant directory already exists.*"
|
||||
)
|
||||
with pytest.raises(
|
||||
expected_exception=PageserverApiException,
|
||||
match=f"Cannot attach tenant {tenant_id}, local tenant directory already exists",
|
||||
):
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
tenants_after_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
|
||||
assert tenant_id not in tenants_after_ignore, "Ignored tenant should be missing"
|
||||
assert len(tenants_after_ignore) + 1 == len(
|
||||
tenants_before_ignore
|
||||
), "Only ignored tenant should be missing"
|
||||
|
||||
# But can load it from local files, that will restore attach.
|
||||
pageserver_http.tenant_load(tenant_id)
|
||||
|
||||
wait_until_tenant_status(pageserver_http, tenant_id, "Active", 5)
|
||||
|
||||
pg.stop()
|
||||
pg.start()
|
||||
ensure_test_data(data_id, data_secret, pg)
|
||||
|
||||
|
||||
def insert_test_data(
|
||||
pageserver_http: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
data_id: int,
|
||||
data: str,
|
||||
pg: Postgres,
|
||||
):
|
||||
with pg.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""
|
||||
CREATE TABLE test(id int primary key, secret text);
|
||||
INSERT INTO test VALUES ({data_id}, '{data}');
|
||||
"""
|
||||
)
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
# wait until pageserver receives that data
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
# run checkpoint manually to be sure that data landed in remote storage
|
||||
pageserver_http.timeline_checkpoint(tenant_id, timeline_id)
|
||||
|
||||
# wait until pageserver successfully uploaded a checkpoint to remote storage
|
||||
log.info("waiting for to be ignored tenant data checkpoint upload")
|
||||
wait_for_upload(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
|
||||
|
||||
def ensure_test_data(data_id: int, data: str, pg: Postgres):
|
||||
with pg.cursor() as cur:
|
||||
assert (
|
||||
query_scalar(cur, f"SELECT secret FROM test WHERE id = {data_id};") == data
|
||||
), "Should have timeline data back"
|
||||
|
||||
|
||||
# Does not use `wait_until` for debugging purposes
|
||||
def wait_until_tenant_status(
|
||||
pageserver_http: PageserverHttpClient,
|
||||
tenant_id: TenantId,
|
||||
expected_status: str,
|
||||
iterations: int,
|
||||
) -> bool:
|
||||
for _ in range(iterations):
|
||||
try:
|
||||
tenant = pageserver_http.tenant_status(tenant_id=tenant_id)
|
||||
log.debug(f"Tenant {tenant_id} status: {tenant}")
|
||||
if tenant["state"] == expected_status:
|
||||
return True
|
||||
except Exception as e:
|
||||
log.debug(f"Tenant {tenant_id} status retrieval failure: {e}")
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
raise Exception(f"Tenant {tenant_id} did not become {expected_status} in {iterations} seconds")
|
||||
|
||||
@@ -58,6 +58,7 @@ def new_pageserver_service(
|
||||
pageserver_client = PageserverHttpClient(
|
||||
port=http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=lambda: True, # TODO: check if testing really enabled
|
||||
)
|
||||
try:
|
||||
pageserver_process = start_in_background(
|
||||
@@ -359,6 +360,7 @@ def test_tenant_relocation(
|
||||
new_pageserver_http = PageserverHttpClient(
|
||||
port=new_pageserver_http_port,
|
||||
auth_token=None,
|
||||
is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
|
||||
)
|
||||
|
||||
with new_pageserver_service(
|
||||
|
||||
2
vendor/postgres-v14
vendored
2
vendor/postgres-v14
vendored
Submodule vendor/postgres-v14 updated: 06edb5af61...da50d99db5
2
vendor/postgres-v15
vendored
2
vendor/postgres-v15
vendored
Submodule vendor/postgres-v15 updated: edf4c161dd...780c3f8e35
@@ -19,6 +19,7 @@ bytes = { version = "1", features = ["serde", "std"] }
|
||||
clap = { version = "4", features = ["color", "derive", "error-context", "help", "std", "string", "suggestions", "usage"] }
|
||||
crossbeam-utils = { version = "0.8", features = ["once_cell", "std"] }
|
||||
either = { version = "1", features = ["use_std"] }
|
||||
fail = { version = "0.5", default-features = false, features = ["failpoints"] }
|
||||
futures-channel = { version = "0.3", features = ["alloc", "futures-sink", "sink", "std"] }
|
||||
futures-task = { version = "0.3", default-features = false, features = ["alloc", "std"] }
|
||||
futures-util = { version = "0.3", features = ["alloc", "async-await", "async-await-macro", "channel", "futures-channel", "futures-io", "futures-macro", "futures-sink", "io", "memchr", "sink", "slab", "std"] }
|
||||
@@ -36,16 +37,16 @@ prost-a6292c17cd707f01 = { package = "prost", version = "0.11", features = ["pro
|
||||
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
|
||||
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["__rustls", "__tls", "blocking", "default-tls", "hyper-rustls", "hyper-tls", "json", "native-tls-crate", "rustls", "rustls-pemfile", "rustls-tls", "rustls-tls-webpki-roots", "serde_json", "tokio-native-tls", "tokio-rustls", "webpki-roots"] }
|
||||
reqwest = { version = "0.11", default-features = false, features = ["__rustls", "__tls", "blocking", "hyper-rustls", "json", "rustls", "rustls-pemfile", "rustls-tls", "rustls-tls-webpki-roots", "serde_json", "tokio-rustls", "webpki-roots"] }
|
||||
scopeguard = { version = "1", features = ["use_std"] }
|
||||
serde = { version = "1", features = ["alloc", "derive", "serde_derive", "std"] }
|
||||
stable_deref_trait = { version = "1", features = ["alloc", "std"] }
|
||||
time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "std", "time-macros"] }
|
||||
tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros"] }
|
||||
tokio-util = { version = "0.7", features = ["codec", "io", "io-util", "tracing"] }
|
||||
tower = { version = "0.4", features = ["__common", "balance", "buffer", "discover", "futures-core", "futures-util", "indexmap", "limit", "load", "log", "make", "pin-project", "pin-project-lite", "rand", "ready-cache", "retry", "slab", "timeout", "tokio", "tokio-util", "tracing", "util"] }
|
||||
tracing = { version = "0.1", features = ["attributes", "log", "std", "tracing-attributes"] }
|
||||
tracing-core = { version = "0.1", features = ["once_cell", "std"] }
|
||||
url = { version = "2", features = ["serde"] }
|
||||
|
||||
[build-dependencies]
|
||||
ahash = { version = "0.7", features = ["std"] }
|
||||
|
||||
Reference in New Issue
Block a user