diff --git a/compute/manifest.yaml b/compute/manifest.yaml new file mode 100644 index 0000000000..f1cd20c497 --- /dev/null +++ b/compute/manifest.yaml @@ -0,0 +1,121 @@ +pg_settings: + # Common settings for primaries and replicas of all versions. + common: + # Check for client disconnection every 1 minute. By default, Postgres will detect the + # loss of the connection only at the next interaction with the socket, when it waits + # for, receives or sends data, so it will likely waste resources till the end of the + # query execution. There should be no drawbacks in setting this for everyone, so enable + # it by default. If anyone will complain, we can allow editing it. + # https://www.postgresql.org/docs/16/runtime-config-connection.html#GUC-CLIENT-CONNECTION-CHECK-INTERVAL + client_connection_check_interval: "60000" # 1 minute + # ---- IO ---- + effective_io_concurrency: "20" + maintenance_io_concurrency: "100" + fsync: "off" + hot_standby: "off" + # We allow users to change this if needed, but by default we + # just don't want to see long-lasting idle transactions, as they + # prevent activity monitor from suspending projects. + idle_in_transaction_session_timeout: "300000" # 5 minutes + listen_addresses: "*" + # --- LOGGING ---- helps investigations + log_connections: "on" + log_disconnections: "on" + # 1GB, unit is KB + log_temp_files: "1048576" + # Disable dumping customer data to logs, both to increase data privacy + # and to reduce the amount the logs. + log_error_verbosity: "terse" + log_min_error_statement: "panic" + max_connections: "100" + # --- WAL --- + # - flush lag is the max amount of WAL that has been generated but not yet stored + # to disk in the page server. A smaller value means less delay after a pageserver + # restart, but if you set it too small you might again need to slow down writes if the + # pageserver cannot flush incoming WAL to disk fast enough. This must be larger + # than the pageserver's checkpoint interval, currently 1 GB! Otherwise you get a + # a deadlock where the compute node refuses to generate more WAL before the + # old WAL has been uploaded to S3, but the pageserver is waiting for more WAL + # to be generated before it is uploaded to S3. + max_replication_flush_lag: "10GB" + max_replication_slots: "10" + # Backpressure configuration: + # - write lag is the max amount of WAL that has been generated by Postgres but not yet + # processed by the page server. Making this smaller reduces the worst case latency + # of a GetPage request, if you request a page that was recently modified. On the other + # hand, if this is too small, the compute node might need to wait on a write if there is a + # hiccup in the network or page server so that the page server has temporarily fallen + # behind. + # + # Previously it was set to 500 MB, but it caused compute being unresponsive under load + # https://github.com/neondatabase/neon/issues/2028 + max_replication_write_lag: "500MB" + max_wal_senders: "10" + # A Postgres checkpoint is cheap in storage, as doesn't involve any significant amount + # of real I/O. Only the SLRU buffers and some other small files are flushed to disk. + # However, as long as we have full_page_writes=on, page updates after a checkpoint + # include full-page images which bloats the WAL. So may want to bump max_wal_size to + # reduce the WAL bloating, but at the same it will increase pg_wal directory size on + # compute and can lead to out of disk error on k8s nodes. + max_wal_size: "1024" + wal_keep_size: "0" + wal_level: "replica" + # Reduce amount of WAL generated by default. + wal_log_hints: "off" + # - without wal_sender_timeout set we don't get feedback messages, + # required for backpressure. + wal_sender_timeout: "10000" + # We have some experimental extensions, which we don't want users to install unconsciously. + # To install them, users would need to set the `neon.allow_unstable_extensions` setting. + # There are two of them currently: + # - `pgrag` - https://github.com/neondatabase-labs/pgrag - extension is actually called just `rag`, + # and two dependencies: + # - `rag_bge_small_en_v15` + # - `rag_jina_reranker_v1_tiny_en` + # - `pg_mooncake` - https://github.com/Mooncake-Labs/pg_mooncake/ + neon.unstable_extensions: "rag,rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en,pg_mooncake,anon" + neon.protocol_version: "3" + password_encryption: "scram-sha-256" + # This is important to prevent Postgres from trying to perform + # a local WAL redo after backend crash. It should exit and let + # the systemd or k8s to do a fresh startup with compute_ctl. + restart_after_crash: "off" + # By default 3. We have the following persistent connections in the VM: + # * compute_activity_monitor (from compute_ctl) + # * postgres-exporter (metrics collector; it has 2 connections) + # * sql_exporter (metrics collector; we have 2 instances [1 for us & users; 1 for autoscaling]) + # * vm-monitor (to query & change file cache size) + # i.e. total of 6. Let's reserve 7, so there's still at least one left over. + superuser_reserved_connections: "7" + synchronous_standby_names: "walproposer" + + replica: + hot_standby: "on" + + per_version: + 17: + common: + # PostgreSQL 17 has a new IO system called "read stream", which can combine IOs up to some + # size. It still has some issues with readahead, though, so we default to disabled/ + # "no combining of IOs" to make sure we get the maximum prefetch depth. + # See also: https://github.com/neondatabase/neon/pull/9860 + io_combine_limit: "1" + replica: + # prefetching of blocks referenced in WAL doesn't make sense for us + # Neon hot standby ignores pages that are not in the shared_buffers + recovery_prefetch: "off" + 16: + common: + replica: + # prefetching of blocks referenced in WAL doesn't make sense for us + # Neon hot standby ignores pages that are not in the shared_buffers + recovery_prefetch: "off" + 15: + common: + replica: + # prefetching of blocks referenced in WAL doesn't make sense for us + # Neon hot standby ignores pages that are not in the shared_buffers + recovery_prefetch: "off" + 14: + common: + replica: