mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-23 06:09:59 +00:00
fast_import: Make CPU & memory size configurable (#10709)
The old values assumed that you have at least about 18 GB of RAM available (shared_buffers=10GB and maintenance_work_mem=8GB). That's a lot when testing locally. Make it configurable, and make the default assumption much smaller: 256 MB. This is nice for local testing, but it's also in preparation for starting to use VMs to run these jobs. When launched in a VM, the control plane can set these env variables according to the max size of the VM. Also change the formula for how RAM is distributed: use 10% of RAM for shared_buffers, and 70% for maintenance_work_mem. That leaves a good amount for misc. other stuff and the OS. A very large shared_buffers setting won't typically help with bulk loading. It won't help with the network and I/O of processing all the tables, unless maybe if the whole database fits in shared buffers, but even then it's not much faster than using local disk. Bulk loading is all sequential I/O. It also won't help much with index creation, which is also sequential I/O. A large maintenance_work_mem can be quite useful, however, so that's where we put most of the RAM.
This commit is contained in:
committed by
GitHub
parent
2c4c6e6330
commit
9537829ccd
@@ -60,6 +60,16 @@ struct Args {
|
||||
pg_lib_dir: Utf8PathBuf,
|
||||
#[clap(long)]
|
||||
pg_port: Option<u16>, // port to run postgres on, 5432 is default
|
||||
|
||||
/// Number of CPUs in the system. This is used to configure # of
|
||||
/// parallel worker processes, for index creation.
|
||||
#[clap(long, env = "NEON_IMPORTER_NUM_CPUS")]
|
||||
num_cpus: Option<usize>,
|
||||
|
||||
/// Amount of RAM in the system. This is used to configure shared_buffers
|
||||
/// and maintenance_work_mem.
|
||||
#[clap(long, env = "NEON_IMPORTER_MEMORY_MB")]
|
||||
memory_mb: Option<usize>,
|
||||
}
|
||||
|
||||
#[serde_with::serde_as]
|
||||
@@ -202,7 +212,16 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.await
|
||||
.context("initdb")?;
|
||||
|
||||
let nproc = num_cpus::get();
|
||||
// If the caller didn't specify CPU / RAM to use for sizing, default to
|
||||
// number of CPUs in the system, and pretty arbitrarily, 256 MB of RAM.
|
||||
let nproc = args.num_cpus.unwrap_or_else(num_cpus::get);
|
||||
let memory_mb = args.memory_mb.unwrap_or(256);
|
||||
|
||||
// Somewhat arbitrarily, use 10 % of memory for shared buffer cache, 70% for
|
||||
// maintenance_work_mem (i.e. for sorting during index creation), and leave the rest
|
||||
// available for misc other stuff that PostgreSQL uses memory for.
|
||||
let shared_buffers_mb = ((memory_mb as f32) * 0.10) as usize;
|
||||
let maintenance_work_mem_mb = ((memory_mb as f32) * 0.70) as usize;
|
||||
|
||||
//
|
||||
// Launch postgres process
|
||||
@@ -212,12 +231,15 @@ pub(crate) async fn main() -> anyhow::Result<()> {
|
||||
.arg(&pgdata_dir)
|
||||
.args(["-p", &format!("{pg_port}")])
|
||||
.args(["-c", "wal_level=minimal"])
|
||||
.args(["-c", "shared_buffers=10GB"])
|
||||
.args(["-c", &format!("shared_buffers={shared_buffers_mb}MB")])
|
||||
.args(["-c", "max_wal_senders=0"])
|
||||
.args(["-c", "fsync=off"])
|
||||
.args(["-c", "full_page_writes=off"])
|
||||
.args(["-c", "synchronous_commit=off"])
|
||||
.args(["-c", "maintenance_work_mem=8388608"])
|
||||
.args([
|
||||
"-c",
|
||||
&format!("maintenance_work_mem={maintenance_work_mem_mb}MB"),
|
||||
])
|
||||
.args(["-c", &format!("max_parallel_maintenance_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers={nproc}")])
|
||||
.args(["-c", &format!("max_parallel_workers_per_gather={nproc}")])
|
||||
|
||||
Reference in New Issue
Block a user