mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-15 04:00:38 +00:00
Compare commits
1 Commits
unlogged_b
...
arthur/rep
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f1fcb4d0d7 |
@@ -24,3 +24,4 @@
|
||||
!storage_controller/
|
||||
!vendor/postgres-*/
|
||||
!workspace_hack/
|
||||
!debug-oom/
|
||||
|
||||
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -4494,9 +4494,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "pq-sys"
|
||||
version = "0.6.3"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
|
||||
checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
|
||||
dependencies = [
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -68,6 +68,7 @@ use compute_tools::spec::*;
|
||||
use compute_tools::swap::resize_swap;
|
||||
use rlimit::{setrlimit, Resource};
|
||||
use utils::failpoint_support;
|
||||
use utils::id::{TenantId, TimelineId};
|
||||
|
||||
// this is an arbitrary build tag. Fine as a default / for testing purposes
|
||||
// in-case of not-set environment var
|
||||
@@ -87,9 +88,9 @@ fn main() -> Result<()> {
|
||||
|
||||
let cli_args = process_cli(&clap_args)?;
|
||||
|
||||
let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
// let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
|
||||
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
|
||||
let wait_spec_result = wait_spec(build_tag, cli_args)?;
|
||||
|
||||
start_postgres(&clap_args, wait_spec_result)?
|
||||
|
||||
@@ -313,14 +314,41 @@ fn wait_spec(
|
||||
http_port,
|
||||
..
|
||||
}: ProcessCliResult,
|
||||
CliSpecParams {
|
||||
spec,
|
||||
live_config_allowed,
|
||||
}: CliSpecParams,
|
||||
) -> Result<WaitSpecResult> {
|
||||
let mut new_state = ComputeState::new();
|
||||
let spec_set;
|
||||
|
||||
let live_config_allowed = true;
|
||||
|
||||
let spec = Some(ComputeSpec {
|
||||
// format_version: todo!(),
|
||||
// operation_uuid: todo!(),
|
||||
// features: todo!(),
|
||||
// swap_size_bytes: todo!(),
|
||||
// disk_quota_bytes: todo!(),
|
||||
// disable_lfc_resizing: todo!(),
|
||||
// cluster: todo!(),
|
||||
// delta_operations: todo!(),
|
||||
// skip_pg_catalog_updates: todo!(),
|
||||
// tenant_id: todo!(),
|
||||
// timeline_id: todo!(),
|
||||
// pageserver_connstring: todo!(),
|
||||
// safekeeper_connstrings: todo!(),
|
||||
// mode: todo!(),
|
||||
// storage_auth_token: todo!(),
|
||||
// remote_extensions: todo!(),
|
||||
// pgbouncer_settings: todo!(),
|
||||
// shard_stripe_size: todo!(),
|
||||
// local_proxy_config: todo!(),
|
||||
// reconfigure_concurrency: todo!(),
|
||||
pageserver_connstring: Some("pageserver-1.example.com:5432".to_string()),
|
||||
safekeeper_connstrings: vec!["safekeeper-1.example.com:5432".to_string()],
|
||||
tenant_id: Some(TenantId::generate()),
|
||||
timeline_id: Some(TimelineId::generate()),
|
||||
|
||||
..Default::default()
|
||||
});
|
||||
|
||||
if let Some(spec) = spec {
|
||||
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
|
||||
info!("new pspec.spec: {:?}", pspec.spec);
|
||||
@@ -355,9 +383,7 @@ fn wait_spec(
|
||||
// available for binding. Prewarming helps Postgres start quicker later,
|
||||
// because QEMU will already have its memory allocated from the host, and
|
||||
// the necessary binaries will already be cached.
|
||||
if !spec_set {
|
||||
compute.prewarm_postgres()?;
|
||||
}
|
||||
compute.prewarm_postgres()?;
|
||||
|
||||
// Launch http service first, so that we can serve control-plane requests
|
||||
// while configuration is still in progress.
|
||||
|
||||
@@ -358,64 +358,22 @@ impl ComputeNode {
|
||||
let spec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let start_time = Instant::now();
|
||||
|
||||
let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
|
||||
let mut config = postgres::Config::from_str(shard0_connstr)?;
|
||||
|
||||
// Use the storage auth token from the config file, if given.
|
||||
// Note: this overrides any password set in the connection string.
|
||||
if let Some(storage_auth_token) = &spec.storage_auth_token {
|
||||
info!("Got storage auth token from spec file");
|
||||
config.password(storage_auth_token);
|
||||
} else {
|
||||
info!("Storage auth token not set");
|
||||
}
|
||||
|
||||
// Connect to pageserver
|
||||
let mut client = config.connect(NoTls)?;
|
||||
let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;
|
||||
|
||||
let basebackup_cmd = match lsn {
|
||||
Lsn(0) => {
|
||||
if spec.spec.mode != ComputeMode::Primary {
|
||||
format!(
|
||||
"basebackup {} {} --gzip --replica",
|
||||
spec.tenant_id, spec.timeline_id
|
||||
)
|
||||
} else {
|
||||
format!("basebackup {} {} --gzip", spec.tenant_id, spec.timeline_id)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if spec.spec.mode != ComputeMode::Primary {
|
||||
format!(
|
||||
"basebackup {} {} {} --gzip --replica",
|
||||
spec.tenant_id, spec.timeline_id, lsn
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"basebackup {} {} {} --gzip",
|
||||
spec.tenant_id, spec.timeline_id, lsn
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let copyreader = client.copy_out(basebackup_cmd.as_str())?;
|
||||
let mut measured_reader = MeasuredReader::new(copyreader);
|
||||
// Open backup file directly
|
||||
let backup_file = std::fs::File::open("/var/db/backups/backup.tar.gz")?;
|
||||
let mut measured_reader = MeasuredReader::new(backup_file);
|
||||
let mut bufreader = std::io::BufReader::new(&mut measured_reader);
|
||||
|
||||
// Read the archive directly from the `CopyOutReader`
|
||||
// Read the archive directly from the file
|
||||
//
|
||||
// Set `ignore_zeros` so that unpack() reads all the Copy data and
|
||||
// doesn't stop at the end-of-archive marker. Otherwise, if the server
|
||||
// sends an Error after finishing the tarball, we will not notice it.
|
||||
// doesn't stop at the end-of-archive marker.
|
||||
let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
|
||||
ar.set_ignore_zeros(true);
|
||||
ar.unpack(&self.pgdata)?;
|
||||
|
||||
// Report metrics
|
||||
let mut state = self.state.lock().unwrap();
|
||||
state.metrics.pageserver_connect_micros = pageserver_connect_micros;
|
||||
state.metrics.pageserver_connect_micros = 0;
|
||||
state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
|
||||
state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
|
||||
Ok(())
|
||||
@@ -628,32 +586,7 @@ impl ComputeNode {
|
||||
self.http_port,
|
||||
)?;
|
||||
|
||||
// Syncing safekeepers is only safe with primary nodes: if a primary
|
||||
// is already connected it will be kicked out, so a secondary (standby)
|
||||
// cannot sync safekeepers.
|
||||
let lsn = match spec.mode {
|
||||
ComputeMode::Primary => {
|
||||
info!("checking if safekeepers are synced");
|
||||
let lsn = if let Ok(Some(lsn)) = self.check_safekeepers_synced(compute_state) {
|
||||
lsn
|
||||
} else {
|
||||
info!("starting safekeepers syncing");
|
||||
self.sync_safekeepers(pspec.storage_auth_token.clone())
|
||||
.with_context(|| "failed to sync safekeepers")?
|
||||
};
|
||||
info!("safekeepers synced at LSN {}", lsn);
|
||||
lsn
|
||||
}
|
||||
ComputeMode::Static(lsn) => {
|
||||
info!("Starting read-only node at static LSN {}", lsn);
|
||||
lsn
|
||||
}
|
||||
ComputeMode::Replica => {
|
||||
info!("Initializing standby from latest Pageserver LSN");
|
||||
Lsn(0)
|
||||
}
|
||||
};
|
||||
|
||||
let lsn = Lsn(0);
|
||||
info!(
|
||||
"getting basebackup@{} from pageserver {}",
|
||||
lsn, &pspec.pageserver_connstr
|
||||
|
||||
1
debug-oom/.gitignore
vendored
Normal file
1
debug-oom/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
backup.tar.gz
|
||||
21
debug-oom/README.md
Normal file
21
debug-oom/README.md
Normal file
@@ -0,0 +1,21 @@
|
||||
To build a compute image:
|
||||
```
|
||||
docker build --build-arg GIT_VERSION=custombuild --build-arg PG_VERSION=v16 -t neon-local-v16 -f ../compute/compute-node.Dockerfile .. && \
|
||||
../../autoscaling/bin/vm-builder \
|
||||
-spec=../compute/vm-image-spec-bullseye.yaml \
|
||||
-src=neon-local-v16:latest \
|
||||
-dst=vm-neon-local-v16:latest \
|
||||
-target-arch=linux/amd64 \
|
||||
-size 2G && \
|
||||
../../autoscaling/bin/kind load docker-image vm-neon-local-v16:latest --name neonvm-arthur
|
||||
```
|
||||
|
||||
To start a compute node:
|
||||
```
|
||||
kubectl apply -f ./spec.yml
|
||||
```
|
||||
|
||||
How to destroy:
|
||||
```
|
||||
kubectl delete -f ./spec.yml
|
||||
```
|
||||
99
debug-oom/spec.yml
Normal file
99
debug-oom/spec.yml
Normal file
@@ -0,0 +1,99 @@
|
||||
apiVersion: vm.neon.tech/v1
|
||||
kind: VirtualMachine
|
||||
metadata:
|
||||
annotations:
|
||||
autoscaling.neon.tech/bounds: '{"min":{"cpu":"250m","mem":"1Gi"},"max":{"cpu":"2","mem":"8Gi"}}'
|
||||
autoscaling.neon.tech/config: '{"enableLFCMetrics":true}'
|
||||
creationTimestamp: "2025-01-04T18:37:29Z"
|
||||
finalizers:
|
||||
- vm.neon.tech/finalizer
|
||||
generation: 1
|
||||
labels:
|
||||
autoscaling.neon.tech/enabled: "true"
|
||||
neon/component: compute-node
|
||||
neon/compute-id: compute-purple-art-unreal
|
||||
neon/endpoint-id: ep-unreal
|
||||
name: compute-purple-art-unreal
|
||||
namespace: default
|
||||
spec:
|
||||
cpuScalingMode: QmpScaling
|
||||
disks:
|
||||
- emptyDisk:
|
||||
discard: true
|
||||
size: 36096Mi
|
||||
mountPath: /neonvm/cache
|
||||
name: cache
|
||||
readOnly: false
|
||||
- emptyDisk:
|
||||
discard: true
|
||||
enableQuotas: true
|
||||
size: 150Gi
|
||||
mountPath: /var/db/postgres/compute
|
||||
name: pgdata
|
||||
readOnly: false
|
||||
enableAcceleration: true
|
||||
enableNetworkMonitoring: false
|
||||
enableSSH: true
|
||||
guest:
|
||||
args:
|
||||
- -c
|
||||
- /usr/local/bin/compute_ctl -D /var/db/postgres/compute/pgdata -b /usr/local/bin/postgres
|
||||
-C postgresql://cloud_admin@127.0.0.1/postgres?options=-c%20default_transaction_read_only%3Dfalse
|
||||
--compute-id compute-purple-art-unreal --control-plane-uri http://dontexist.local:9096
|
||||
--resize-swap-on-bind --set-disk-quota-for-fs /var/db/postgres/compute 2>&1
|
||||
command:
|
||||
- /bin/sh
|
||||
cpus:
|
||||
max: 10
|
||||
min: 250m
|
||||
use: 500m
|
||||
env:
|
||||
- name: RUST_LOG
|
||||
value: info
|
||||
- name: OTEL_SDK_DISABLED
|
||||
value: "true"
|
||||
- name: AUTOSCALING
|
||||
value: "true"
|
||||
memorySlotSize: 1Gi
|
||||
memorySlots:
|
||||
max: 40
|
||||
min: 1
|
||||
use: 2
|
||||
ports:
|
||||
- name: postgres
|
||||
port: 5432
|
||||
protocol: TCP
|
||||
- name: control
|
||||
port: 3080
|
||||
protocol: TCP
|
||||
- name: pooler
|
||||
port: 6432
|
||||
protocol: TCP
|
||||
- name: host-metrics
|
||||
port: 9100
|
||||
protocol: TCP
|
||||
- name: metrics
|
||||
port: 9187
|
||||
protocol: TCP
|
||||
- name: sql-exporter
|
||||
port: 9399
|
||||
protocol: TCP
|
||||
- name: sql-exporter-2
|
||||
port: 9499
|
||||
protocol: TCP
|
||||
- name: vm-monitor
|
||||
port: 10301
|
||||
protocol: TCP
|
||||
- name: local-proxy
|
||||
port: 10432
|
||||
protocol: TCP
|
||||
rootDisk:
|
||||
image: vm-neon-local-v16
|
||||
imagePullPolicy: IfNotPresent
|
||||
size: 20Gi
|
||||
settings:
|
||||
swap: 40Gi
|
||||
sysctl:
|
||||
- vm.overcommit_memory=2
|
||||
restartPolicy: Always
|
||||
schedulerName: autoscale-scheduler
|
||||
Reference in New Issue
Block a user