neon/compute/manifest.yaml

pg_settings:
  # Common settings for primaries and replicas of all versions.
  common:
    # Check for client disconnection every 1 minute. By default, Postgres will detect the
    # loss of the connection only at the next interaction with the socket, when it waits
    # for, receives or sends data, so it will likely waste resources till the end of the
    # query execution. There should be no drawbacks in setting this for everyone, so enable
    # it by default. If anyone will complain, we can allow editing it.
    # https://www.postgresql.org/docs/16/runtime-config-connection.html#GUC-CLIENT-CONNECTION-CHECK-INTERVAL
    client_connection_check_interval: "60000" # 1 minute
    # ---- IO ----
    effective_io_concurrency: "20"
    maintenance_io_concurrency: "100"
    fsync: "off"
    hot_standby: "off"
    # We allow users to change this if needed, but by default we
    # just don't want to see long-lasting idle transactions, as they
    # prevent activity monitor from suspending projects.
    idle_in_transaction_session_timeout: "300000" # 5 minutes
    listen_addresses: "*"
    # --- LOGGING ---- helps investigations
    log_connections: "on"
    log_disconnections: "on"
    # 1GB, unit is KB
    log_temp_files: "1048576"
    # Disable dumping customer data to logs, both to increase data privacy
    # and to reduce the amount the logs.
    log_error_verbosity: "terse"
    log_min_error_statement: "panic"
    max_connections: "100"
    # --- WAL ---
    # - flush lag is the max amount of WAL that has been generated but not yet stored
    # to disk in the page server. A smaller value means less delay after a pageserver
    # restart, but if you set it too small you might again need to slow down writes if the
    # pageserver cannot flush incoming WAL to disk fast enough. This must be larger
    # than the pageserver's checkpoint interval, currently 1 GB! Otherwise you get a
    # a deadlock where the compute node refuses to generate more WAL before the
    # old WAL has been uploaded to S3, but the pageserver is waiting for more WAL
    # to be generated before it is uploaded to S3.
    max_replication_flush_lag: "10GB"
    max_replication_slots: "10"
    # Backpressure configuration:
    # - write lag is the max amount of WAL that has been generated by Postgres but not yet
    # processed by the page server. Making this smaller reduces the worst case latency
    # of a GetPage request, if you request a page that was recently modified. On the other
    # hand, if this is too small, the compute node might need to wait on a write if there is a
    # hiccup in the network or page server so that the page server has temporarily fallen
    # behind.
    #
    # Previously it was set to 500 MB, but it caused compute being unresponsive under load
    # https://github.com/neondatabase/neon/issues/2028
    max_replication_write_lag: "500MB"
    max_wal_senders: "10"
    # A Postgres checkpoint is cheap in storage, as doesn't involve any significant amount
    # of real I/O. Only the SLRU buffers and some other small files are flushed to disk.
    # However, as long as we have full_page_writes=on, page updates after a checkpoint
    # include full-page images which bloats the WAL. So may want to bump max_wal_size to
    # reduce the WAL bloating, but at the same it will increase pg_wal directory size on
    # compute and can lead to out of disk error on k8s nodes.
    max_wal_size: "1024"
    wal_keep_size: "0"
    wal_level: "replica"
    # Reduce amount of WAL generated by default.
    wal_log_hints: "off"
    # - without wal_sender_timeout set we don't get feedback messages,
    # required for backpressure.
    wal_sender_timeout: "10000"
    # We have some experimental extensions, which we don't want users to install unconsciously.
    # To install them, users would need to set the `neon.allow_unstable_extensions` setting.
    # There are two of them currently:
    # - `pgrag` - https://github.com/neondatabase-labs/pgrag - extension is actually called just `rag`,
    #                                                          and two dependencies:
    #                                                          - `rag_bge_small_en_v15`
    #                                                          - `rag_jina_reranker_v1_tiny_en`
    # - `pg_mooncake` - https://github.com/Mooncake-Labs/pg_mooncake/
    neon.unstable_extensions: "rag,rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en,pg_mooncake,anon"
    neon.protocol_version: "3"
    password_encryption: "scram-sha-256"
    # This is important to prevent Postgres from trying to perform
    # a local WAL redo after backend crash. It should exit and let
    # the systemd or k8s to do a fresh startup with compute_ctl.
    restart_after_crash: "off"
    # By default 3. We have the following persistent connections in the VM:
    # * compute_activity_monitor (from compute_ctl)
    # * postgres-exporter (metrics collector; it has 2 connections)
    # * sql_exporter (metrics collector; we have 2 instances [1 for us & users; 1 for autoscaling])
    # * vm-monitor (to query & change file cache size)
    # i.e. total of 6. Let's reserve 7, so there's still at least one left over.
    superuser_reserved_connections: "7"
    synchronous_standby_names: "walproposer"

  replica:
    hot_standby: "on"

  per_version:
    17:
      common:
        # PostgreSQL 17 has a new IO system called "read stream", which can combine IOs up to some
        # size. It still has some issues with readahead, though, so we default to disabled/
        # "no combining of IOs" to make sure we get the maximum prefetch depth.
        # See also: https://github.com/neondatabase/neon/pull/9860
        io_combine_limit: "1"
      replica:
        # prefetching of blocks referenced in WAL doesn't make sense for us
        # Neon hot standby ignores pages that are not in the shared_buffers
        recovery_prefetch: "off"
    16:
      common: {}
      replica:
        # prefetching of blocks referenced in WAL doesn't make sense for us
        # Neon hot standby ignores pages that are not in the shared_buffers
        recovery_prefetch: "off"
    15:
      common: {}
      replica:
        # prefetching of blocks referenced in WAL doesn't make sense for us
        # Neon hot standby ignores pages that are not in the shared_buffers
        recovery_prefetch: "off"
    14:
      common: {}
      replica: {}