Remove unnecessary client code. Use traits insead of enums in pagebench.

Remove debug info messages from client grpc code.
Add new files for client grpc.
2026-05-31 03:50:37 +00:00 · 2025-05-28 07:46:52 -07:00 · 2025-05-28 06:49:21 -07:00 · 2025-05-28 06:48:46 -07:00 · 2025-05-28 06:14:56 -07:00
23 changed files with 806 additions and 1344 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -701,7 +701,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "itoa",
 "matchit",
@@ -2330,7 +2330,7 @@ dependencies = [
 "futures-core",
 "futures-sink",
 "http-body-util",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "pin-project",
 "rand 0.8.5",
@@ -2883,9 +2883,9 @@ dependencies = [

 [[package]]
 name = "httparse"
-version = "1.8.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"

 [[package]]
 name = "httpdate"
@@ -2935,9 +2935,9 @@ dependencies = [

 [[package]]
 name = "hyper"
-version = "1.4.1"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
+checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
 dependencies = [
 "bytes",
 "futures-channel",
@@ -2977,7 +2977,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
 dependencies = [
 "futures-util",
 "http 1.1.0",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "rustls 0.22.4",
 "rustls-pki-types",
@@ -2992,7 +2992,7 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
 dependencies = [
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "pin-project-lite",
 "tokio",
@@ -3001,20 +3001,20 @@ dependencies = [

 [[package]]
 name = "hyper-util"
-version = "0.1.7"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
+checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710"
 dependencies = [
 "bytes",
 "futures-channel",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
- "hyper 1.4.1",
+ "hyper 1.6.0",
+ "libc",
 "pin-project-lite",
 "socket2",
 "tokio",
- "tower 0.4.13",
 "tower-service",
 "tracing",
 ]
@@ -4237,6 +4237,7 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-trait",
+ "bytes",
 "camino",
 "clap",
 "futures",
@@ -4245,15 +4246,15 @@ dependencies = [
 "humantime-serde",
 "pageserver_api",
 "pageserver_client",
+ "pageserver_client_grpc",
 "pageserver_page_api",
 "rand 0.8.5",
 "reqwest",
 "serde",
 "serde_json",
+ "thiserror 1.0.69",
 "tokio",
- "tokio-stream",
 "tokio-util",
- "tonic 0.13.1",
 "tracing",
 "utils",
 "workspace_hack",
@@ -4436,6 +4437,29 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "pageserver_client_grpc"
+version = "0.1.0"
+dependencies = [
+ "bytes",
+ "futures",
+ "http 1.1.0",
+ "hyper 1.6.0",
+ "hyper-util",
+ "metrics",
+ "pageserver_page_api",
+ "priority-queue",
+ "rand 0.8.5",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-util",
+ "tonic 0.13.1",
+ "tower 0.4.13",
+ "tracing",
+ "utils",
+ "uuid",
+]
+
 [[package]]
 name = "pageserver_compaction"
 version = "0.1.0"
@@ -5012,6 +5036,17 @@ dependencies = [
 "elliptic-curve 0.13.8",
 ]

+[[package]]
+name = "priority-queue"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef08705fa1589a1a59aa924ad77d14722cb0cd97b67dd5004ed5f4a4873fce8d"
+dependencies = [
+ "autocfg",
+ "equivalent",
+ "indexmap 2.9.0",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.94"
@@ -5212,7 +5247,7 @@ dependencies = [
 "humantime",
 "humantime-serde",
 "hyper 0.14.30",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "indexmap 2.9.0",
 "ipnet",
@@ -5608,7 +5643,7 @@ dependencies = [
 "http-body-util",
 "http-types",
 "humantime-serde",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "itertools 0.10.5",
 "metrics",
 "once_cell",
@@ -5648,7 +5683,7 @@ dependencies = [
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-rustls 0.26.0",
 "hyper-util",
 "ipnet",
@@ -5705,7 +5740,7 @@ dependencies = [
 "futures",
 "getrandom 0.2.11",
 "http 1.1.0",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "parking_lot 0.11.2",
 "reqwest",
 "reqwest-middleware",
@@ -6646,12 +6681,12 @@ dependencies = [

 [[package]]
 name = "socket2"
-version = "0.5.5"
+version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
 dependencies = [
 "libc",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -6717,7 +6752,7 @@ dependencies = [
 "http-body-util",
 "http-utils",
 "humantime",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "metrics",
 "once_cell",
@@ -7542,11 +7577,12 @@ dependencies = [
 "axum",
 "base64 0.22.1",
 "bytes",
+ "flate2",
 "h2 0.4.4",
 "http 1.1.0",
 "http-body 1.0.0",
 "http-body-util",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-timeout",
 "hyper-util",
 "percent-encoding",
@@ -7603,6 +7639,7 @@ dependencies = [
 "tokio",
 "tower-layer",
 "tower-service",
+ "tracing",
 ]

 [[package]]
@@ -8595,7 +8632,7 @@ dependencies = [
 "hex",
 "hmac",
 "hyper 0.14.30",
- "hyper 1.4.1",
+ "hyper 1.6.0",
 "hyper-util",
 "indexmap 2.9.0",
 "itertools 0.12.1",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
+    "pageserver/client_grpc",
    "pageserver/pagebench",
    "pageserver/page_api",
    "proxy",
@@ -199,7 +200,7 @@ tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
+tonic = { version = "0.13.1", default-features = false, features = ["gzip", "channel", "codegen", "prost", "router", "server", "tls-ring", "tls-native-roots"] }
 tonic-reflection = { version = "0.13.1", features = ["server"] }
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] }
@@ -254,6 +255,7 @@ metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
+pageserver_client_grpc = { path = "./pageserver/client_grpc" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -136,10 +136,6 @@ struct Cli {
        requires = "compute-id"
    )]
    pub control_plane_uri: Option<String>,
-
-    /// Interval in seconds for collecting installed extensions statistics
-    #[arg(long, default_value = "3600")]
-    pub installed_extensions_collection_interval: u64,
 }

 fn main() -> Result<()> {
@@ -183,7 +179,6 @@ fn main() -> Result<()> {
            cgroup: cli.cgroup,
            #[cfg(target_os = "linux")]
            vm_monitor_addr: cli.vm_monitor_addr,
-            installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
        },
        config,
    )?;
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -97,9 +97,6 @@ pub struct ComputeNodeParams {

    /// the address of extension storage proxy gateway
    pub remote_ext_base_url: Option<String>,
-
-    /// Interval for installed extensions collection
-    pub installed_extensions_collection_interval: u64,
 }

 /// Compute node info shared across several `compute_ctl` threads.
@@ -745,7 +742,17 @@ impl ComputeNode {

            let conf = self.get_tokio_conn_conf(None);
            tokio::task::spawn(async {
-                let _ = installed_extensions(conf).await;
+                let res = get_installed_extensions(conf).await;
+                match res {
+                    Ok(extensions) => {
+                        info!(
+                            "[NEON_EXT_STAT] {}",
+                            serde_json::to_string(&extensions)
+                                .expect("failed to serialize extensions list")
+                        );
+                    }
+                    Err(err) => error!("could not get installed extensions: {err:?}"),
+                }
            });
        }

@@ -775,9 +782,6 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

-        // Spawn the extension stats background task
-        self.spawn_extension_stats_task();
-
        if pspec.spec.prewarm_lfc_on_startup {
            self.prewarm_lfc();
        }
@@ -2188,41 +2192,6 @@ LIMIT 100",
            info!("Pageserver config changed");
        }
    }
-
-    pub fn spawn_extension_stats_task(&self) {
-        let conf = self.tokio_conn_conf.clone();
-        let installed_extensions_collection_interval =
-            self.params.installed_extensions_collection_interval;
-        tokio::spawn(async move {
-            // An initial sleep is added to ensure that two collections don't happen at the same time.
-            // The first collection happens during compute startup.
-            tokio::time::sleep(tokio::time::Duration::from_secs(
-                installed_extensions_collection_interval,
-            ))
-            .await;
-            let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
-                installed_extensions_collection_interval,
-            ));
-            loop {
-                interval.tick().await;
-                let _ = installed_extensions(conf.clone()).await;
-            }
-        });
-    }
-}
-
-pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
-    let res = get_installed_extensions(conf).await;
-    match res {
-        Ok(extensions) => {
-            info!(
-                "[NEON_EXT_STAT] {}",
-                serde_json::to_string(&extensions).expect("failed to serialize extensions list")
-            );
-        }
-        Err(err) => error!("could not get installed extensions: {err:?}"),
-    }
-    Ok(())
 }

 pub fn forward_termination_signal() {
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -20,7 +20,7 @@ first_path="$(ldconfig --verbose 2>/dev/null \
    | grep --invert-match ^$'\t' \
    | cut --delimiter=: --fields=1 \
    | head --lines=1)"
-test "$first_path" == '/usr/local/lib'
+test "$first_path" == '/usr/local/lib' || true # Remove the || true in a follow-up PR. Needed for backwards compat.

 echo "Waiting pageserver become ready."
 while ! nc -z pageserver 6400; do
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1931,7 +1931,7 @@ pub enum PagestreamFeMessage {
 }

 // Wrapped in libpq CopyData
-#[derive(Debug, strum_macros::EnumProperty)]
+#[derive(strum_macros::EnumProperty)]
 pub enum PagestreamBeMessage {
    Exists(PagestreamExistsResponse),
    Nblocks(PagestreamNblocksResponse),
@@ -2042,7 +2042,7 @@ pub enum PagestreamProtocolVersion {

 pub type RequestId = u64;

-#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamRequest {
    pub reqid: RequestId,
    pub request_lsn: Lsn,
@@ -2061,7 +2061,7 @@ pub struct PagestreamNblocksRequest {
    pub rel: RelTag,
 }

-#[derive(Debug, Default, PartialEq, Eq, Clone, Copy)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub struct PagestreamGetPageRequest {
    pub hdr: PagestreamRequest,
    pub rel: RelTag,
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -24,7 +24,7 @@ use serde::{Deserialize, Serialize};
 // FIXME: should move 'forknum' as last field to keep this consistent with Postgres.
 // Then we could replace the custom Ord and PartialOrd implementations below with
 // deriving them. This will require changes in walredoproc.c.
-#[derive(Debug, Default, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Serialize, Deserialize)]
 pub struct RelTag {
    pub forknum: u8,
    pub spcnode: Oid,
--- a/libs/posthog_client_lite/src/lib.rs
+++ b/libs/posthog_client_lite/src/lib.rs
@@ -37,7 +37,7 @@ pub struct LocalEvaluationFlag {
 #[derive(Deserialize)]
 pub struct LocalEvaluationFlagFilters {
    groups: Vec<LocalEvaluationFlagFilterGroup>,
-    multivariate: Option<LocalEvaluationFlagMultivariate>,
+    multivariate: LocalEvaluationFlagMultivariate,
 }

 #[derive(Deserialize)]
@@ -254,7 +254,7 @@ impl FeatureStore {
        }
    }

-    /// Evaluate a multivariate feature flag. Returns an error if the flag is not available or if there are errors
+    /// Evaluate a multivariate feature flag. Returns `None` if the flag is not available or if there are errors
    /// during the evaluation.
    ///
    /// The parsing logic is as follows:
@@ -272,10 +272,6 @@ impl FeatureStore {
    /// Example: we have a multivariate flag with 3 groups of the configured global rollout percentage: A (10%), B (20%), C (70%).
    /// There is a single group with a condition that has a rollout percentage of 10% and it does not have a variant override.
    /// Then, we will have 1% of the users evaluated to A, 2% to B, and 7% to C.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
    pub fn evaluate_multivariate(
        &self,
        flag_key: &str,
@@ -294,35 +290,6 @@ impl FeatureStore {
        )
    }

-    /// Evaluate a boolean feature flag. Returns  an error if the flag is not available or if there are errors
-    /// during the evaluation.
-    ///
-    /// The parsing logic is as follows:
-    ///
-    /// * Generate a consistent hash for the tenant-feature.
-    /// * Match each filter group.
-    ///   - If a group is matched, it will first determine whether the user is in the range of the rollout
-    ///     percentage.
-    ///   - If the hash falls within the group's rollout percentage, return true.
-    /// * Otherwise, continue with the next group until all groups are evaluated and no group is within the
-    ///   rollout percentage.
-    /// * If there are no matching groups, return an error.
-    ///
-    /// Returns `Ok(())` if the feature flag evaluates to true. In the future, it will return a payload.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
-    pub fn evaluate_boolean(
-        &self,
-        flag_key: &str,
-        user_id: &str,
-        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
-    ) -> Result<(), PostHogEvaluationError> {
-        let hash_on_global_rollout_percentage = Self::consistent_hash(user_id, flag_key, "boolean");
-        self.evaluate_boolean_inner(flag_key, hash_on_global_rollout_percentage, properties)
-    }
-
    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
    /// and avoid duplicate computations.
@@ -349,11 +316,6 @@ impl FeatureStore {
                    flag_key
                )));
            }
-            let Some(ref multivariate) = flag_config.filters.multivariate else {
-                return Err(PostHogEvaluationError::Internal(format!(
-                    "No multivariate available, should use evaluate_boolean?: {flag_key}"
-                )));
-            };
            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
            // does not matter.
@@ -362,7 +324,7 @@ impl FeatureStore {
                    GroupEvaluationResult::MatchedAndOverride(variant) => return Ok(variant),
                    GroupEvaluationResult::MatchedAndEvaluate => {
                        let mut percentage = 0;
-                        for variant in &multivariate.variants {
+                        for variant in &flag_config.filters.multivariate.variants {
                            percentage += variant.rollout_percentage;
                            if self
                                .evaluate_percentage(hash_on_global_rollout_percentage, percentage)
@@ -390,64 +352,6 @@ impl FeatureStore {
            )))
        }
    }
-
-    /// Evaluate a multivariate feature flag. Note that we directly take the mapped user ID
-    /// (a consistent hash ranging from 0 to 1) so that it is easier to use it in the tests
-    /// and avoid duplicate computations.
-    ///
-    /// Use a different consistent hash for evaluating the group rollout percentage.
-    /// The behavior: if the condition is set to rolling out to 10% of the users, and
-    /// we set the variant A to 20% in the global config, then 2% of the total users will
-    /// be evaluated to variant A.
-    ///
-    /// Note that the hash to determine group rollout percentage is shared across all groups. So if we have two
-    /// exactly-the-same conditions with 10% and 20% rollout percentage respectively, a total of 20% of the users
-    /// will be evaluated (versus 30% if group evaluation is done independently).
-    pub(crate) fn evaluate_boolean_inner(
-        &self,
-        flag_key: &str,
-        hash_on_global_rollout_percentage: f64,
-        properties: &HashMap<String, PostHogFlagFilterPropertyValue>,
-    ) -> Result<(), PostHogEvaluationError> {
-        if let Some(flag_config) = self.flags.get(flag_key) {
-            if !flag_config.active {
-                return Err(PostHogEvaluationError::NotAvailable(format!(
-                    "The feature flag is not active: {}",
-                    flag_key
-                )));
-            }
-            if flag_config.filters.multivariate.is_some() {
-                return Err(PostHogEvaluationError::Internal(format!(
-                    "This looks like a multivariate flag, should use evaluate_multivariate?: {flag_key}"
-                )));
-            };
-            // TODO: sort the groups so that variant overrides always get evaluated first and it follows the PostHog
-            // Python SDK behavior; for now we do not configure conditions without variant overrides in Neon so it
-            // does not matter.
-            for group in &flag_config.filters.groups {
-                match self.evaluate_group(group, hash_on_global_rollout_percentage, properties)? {
-                    GroupEvaluationResult::MatchedAndOverride(_) => {
-                        return Err(PostHogEvaluationError::Internal(format!(
-                            "Boolean flag cannot have overrides: {}",
-                            flag_key
-                        )));
-                    }
-                    GroupEvaluationResult::MatchedAndEvaluate => {
-                        return Ok(());
-                    }
-                    GroupEvaluationResult::Unmatched => continue,
-                }
-            }
-            // If no group is matched, the feature is not available, and up to the caller to decide what to do.
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        } else {
-            // The feature flag is not available yet
-            Err(PostHogEvaluationError::NotAvailable(format!(
-                "Not found in the local evaluation spec: {}",
-                flag_key
-            )))
-        }
-    }
 }

 pub struct PostHogClientConfig {
@@ -565,162 +469,95 @@ mod tests {

    fn data() -> &'static str {
        r#"{
-  "flags": [
-    {
-      "id": 141807,
-      "team_id": 152860,
-      "name": "",
-      "key": "image-compaction-boundary",
-      "filters": {
-        "groups": [
-          {
-            "variant": null,
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              }
+            "flags": [
+                {
+                    "id": 132794,
+                    "team_id": 152860,
+                    "name": "",
+                    "key": "gc-compaction",
+                    "filters": {
+                        "groups": [
+                            {
+                                "variant": "enabled-stage-2",
+                                "properties": [
+                                    {
+                                        "key": "plan_type",
+                                        "type": "person",
+                                        "value": [
+                                            "free"
+                                        ],
+                                        "operator": "exact"
+                                    },
+                                    {
+                                        "key": "pageserver_remote_size",
+                                        "type": "person",
+                                        "value": "10000000",
+                                        "operator": "lt"
+                                    }
+                                ],
+                                "rollout_percentage": 50
+                            },
+                            {
+                                "properties": [
+                                    {
+                                        "key": "plan_type",
+                                        "type": "person",
+                                        "value": [
+                                            "free"
+                                        ],
+                                        "operator": "exact"
+                                    },
+                                    {
+                                        "key": "pageserver_remote_size",
+                                        "type": "person",
+                                        "value": "10000000",
+                                        "operator": "lt"
+                                    }
+                                ],
+                                "rollout_percentage": 80
+                            }
+                        ],
+                        "payloads": {},
+                        "multivariate": {
+                            "variants": [
+                                {
+                                    "key": "disabled",
+                                    "name": "",
+                                    "rollout_percentage": 90
+                                },
+                                {
+                                    "key": "enabled-stage-1",
+                                    "name": "",
+                                    "rollout_percentage": 10
+                                },
+                                {
+                                    "key": "enabled-stage-2",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                },
+                                {
+                                    "key": "enabled-stage-3",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                },
+                                {
+                                    "key": "enabled",
+                                    "name": "",
+                                    "rollout_percentage": 0
+                                }
+                            ]
+                        }
+                    },
+                    "deleted": false,
+                    "active": true,
+                    "ensure_experience_continuity": false,
+                    "has_encrypted_payloads": false,
+                    "version": 6
+                }
            ],
-            "rollout_percentage": 40
-          },
-          {
-            "variant": null,
-            "properties": [],
-            "rollout_percentage": 10
-          }
-        ],
-        "payloads": {},
-        "multivariate": null
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 1
-    },
-    {
-      "id": 135586,
-      "team_id": 152860,
-      "name": "",
-      "key": "boolean-flag",
-      "filters": {
-        "groups": [
-          {
-            "variant": null,
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              }
-            ],
-            "rollout_percentage": 47
-          }
-        ],
-        "payloads": {},
-        "multivariate": null
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 1
-    },
-    {
-      "id": 132794,
-      "team_id": 152860,
-      "name": "",
-      "key": "gc-compaction",
-      "filters": {
-        "groups": [
-          {
-            "variant": "enabled-stage-2",
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              },
-              {
-                "key": "pageserver_remote_size",
-                "type": "person",
-                "value": "10000000",
-                "operator": "lt"
-              }
-            ],
-             "rollout_percentage": 50
-          },
-          {
-            "properties": [
-              {
-                "key": "plan_type",
-                "type": "person",
-                "value": [
-                  "free"
-                ],
-                "operator": "exact"
-              },
-              {
-                "key": "pageserver_remote_size",
-                "type": "person",
-                "value": "10000000",
-                "operator": "lt"
-              }
-            ],
-            "rollout_percentage": 80
-          }
-        ],
-        "payloads": {},
-        "multivariate": {
-          "variants": [
-            {
-              "key": "disabled",
-              "name": "",
-              "rollout_percentage": 90
-            },
-            {
-              "key": "enabled-stage-1",
-              "name": "",
-              "rollout_percentage": 10
-            },
-            {
-              "key": "enabled-stage-2",
-              "name": "",
-              "rollout_percentage": 0
-            },
-            {
-              "key": "enabled-stage-3",
-              "name": "",
-              "rollout_percentage": 0
-            },
-            {
-              "key": "enabled",
-              "name": "",
-              "rollout_percentage": 0
-            }
-          ]
-        }
-      },
-      "deleted": false,
-      "active": true,
-      "ensure_experience_continuity": false,
-      "has_encrypted_payloads": false,
-      "version": 7
-    }
-  ],
-  "group_type_mapping": {},
-  "cohorts": {}
-}"#
+            "group_type_mapping": {},
+            "cohorts": {}
+        }"#
    }

    #[test]
@@ -796,125 +633,4 @@ mod tests {
            Err(PostHogEvaluationError::NoConditionGroupMatched)
        ),);
    }
-
-    #[test]
-    fn evaluate_boolean_1() {
-        // The `boolean-flag` feature flag only has one group that matches on the free user.
-
-        let mut store = FeatureStore::new();
-        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
-        store.set_flags(response.flags);
-
-        // This lacks the required properties and cannot be evaluated.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &HashMap::new());
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NotAvailable(_))
-        ),);
-
-        let properties_unmatched = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("paid".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // This does not match any group so there will be an error.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties_unmatched);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-
-        let properties = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("free".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // It matches the first group as 0.10 <= 0.50 and the properties are matched. Then it gets evaluated to the variant override.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 0.10, &properties);
-        assert!(variant.is_ok());
-
-        // It matches the group conditions but not the group rollout percentage.
-        let variant = store.evaluate_boolean_inner("boolean-flag", 1.00, &properties);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-    }
-
-    #[test]
-    fn evaluate_boolean_2() {
-        // The `image-compaction-boundary` feature flag has one group that matches on the free user and a group that matches on all users.
-
-        let mut store = FeatureStore::new();
-        let response: LocalEvaluationResponse = serde_json::from_str(data()).unwrap();
-        store.set_flags(response.flags);
-
-        // This lacks the required properties and cannot be evaluated.
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &HashMap::new());
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NotAvailable(_))
-        ),);
-
-        let properties_unmatched = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("paid".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // This does not match the filtered group but the all user group.
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties_unmatched);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-        let variant =
-            store.evaluate_boolean_inner("image-compaction-boundary", 0.05, &properties_unmatched);
-        assert!(variant.is_ok());
-
-        let properties = HashMap::from([
-            (
-                "plan_type".to_string(),
-                PostHogFlagFilterPropertyValue::String("free".to_string()),
-            ),
-            (
-                "pageserver_remote_size".to_string(),
-                PostHogFlagFilterPropertyValue::Number(1000.0),
-            ),
-        ]);
-
-        // It matches the first group as 0.30 <= 0.40 and the properties are matched. Then it gets evaluated to the variant override.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.30, &properties);
-        assert!(variant.is_ok());
-
-        // It matches the group conditions but not the group rollout percentage.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 1.00, &properties);
-        assert!(matches!(
-            variant,
-            Err(PostHogEvaluationError::NoConditionGroupMatched)
-        ),);
-
-        // It matches the second "all" group conditions.
-        let variant = store.evaluate_boolean_inner("image-compaction-boundary", 0.09, &properties);
-        assert!(variant.is_ok());
-    }
 }
--- a/pageserver/client_grpc/Cargo.toml
+++ b/pageserver/client_grpc/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "pageserver_client_grpc"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+bytes.workspace = true
+futures.workspace = true
+http.workspace = true
+thiserror.workspace = true
+tonic.workspace = true
+tracing.workspace = true
+tokio = { version = "1.43.1", features = ["full", "macros", "net", "io-util", "rt", "rt-multi-thread"] }
+uuid = { version = "1", features = ["v4"] }
+tower = {  version = "0.4", features = ["timeout", "util"] }
+rand = "0.8"
+tokio-util = { version = "0.7", features = ["compat"] }
+hyper-util = "0.1.9"
+hyper = "1.6.0"
+metrics.workspace = true
+priority-queue = "2.3.1"
+
+
+pageserver_page_api.workspace = true
+utils.workspace = true
--- a/pageserver/client_grpc/src/lib.rs
+++ b/pageserver/client_grpc/src/lib.rs
@@ -0,0 +1,193 @@
+//
+// Pageserver gRPC client library
+//
+// This library provides a gRPC client for the pageserver for the
+// communicator project.
+//
+// This library is a work in progress.
+//
+// TODO: This should properly use the shard map
+//
+
+use std::collections::HashMap;
+
+use bytes::Bytes;
+use futures::{StreamExt};
+use thiserror::Error;
+use tonic::metadata::AsciiMetadataValue;
+
+use pageserver_page_api::model::*;
+use pageserver_page_api::proto;
+
+use pageserver_page_api::proto::PageServiceClient;
+use utils::shard::ShardIndex;
+
+use std::fmt::Debug;
+use tracing::error;
+
+use tokio::sync::RwLock;
+
+use tonic::transport::{Channel, Endpoint};
+#[derive(Error, Debug)]
+pub enum PageserverClientError {
+    #[error("could not connect to service: {0}")]
+    ConnectError(#[from] tonic::transport::Error),
+    #[error("could not perform request: {0}`")]
+    RequestError(#[from] tonic::Status),
+    #[error("protocol error: {0}")]
+    ProtocolError(#[from] ProtocolError),
+    #[error("could not perform request: {0}`")]
+    InvalidUri(#[from] http::uri::InvalidUri),
+    #[error("could not perform request: {0}`")]
+    Other(String),
+}
+
+pub struct PageserverClient {
+    _tenant_id: String,
+    _timeline_id: String,
+    _auth_token: Option<String>,
+    shard_map: HashMap<ShardIndex, String>,
+    channels: tokio::sync::RwLock<HashMap<ShardIndex, Channel>>,
+    auth_interceptor: AuthInterceptor,
+}
+
+impl PageserverClient {
+    /// TODO: this doesn't currently react to changes in the shard map.
+    pub fn new(
+        tenant_id: &str,
+        timeline_id: &str,
+        auth_token: &Option<String>,
+        shard_map: HashMap<ShardIndex, String>,
+    ) -> Self {
+        Self {
+            _tenant_id: tenant_id.to_string(),
+            _timeline_id: timeline_id.to_string(),
+            _auth_token: auth_token.clone(),
+            shard_map,
+            channels: RwLock::new(HashMap::new()),
+            auth_interceptor: AuthInterceptor::new(tenant_id, timeline_id, auth_token.as_deref()),
+        }
+    }
+    //
+    // TODO: This opens a new gRPC stream for every request, which is extremely inefficient
+    pub async fn get_page(
+        &self,
+        request: &GetPageRequest,
+    ) -> Result<Vec<Bytes>, PageserverClientError> {
+        // FIXME: calculate the shard number correctly
+        let shard = ShardIndex::unsharded();
+        let chan = self.get_client(shard).await;
+
+        let mut client =
+            PageServiceClient::with_interceptor(chan, self.auth_interceptor.for_shard(shard));
+
+        let request = proto::GetPageRequest::from(request);
+        let request_stream = futures::stream::once(std::future::ready(request));
+
+        let mut response_stream = client
+            .get_pages(tonic::Request::new(request_stream))
+            .await?
+            .into_inner();
+
+        let Some(response) = response_stream.next().await else {
+            return Err(PageserverClientError::Other(
+                "no response received for getpage request".to_string(),
+            ));
+        };
+
+        match response {
+            Err(status) => {
+                return Err(PageserverClientError::RequestError(status));
+            }
+            Ok(resp) => {
+                let response: GetPageResponse = resp.try_into().unwrap();
+                return Ok(response.page_images.to_vec());
+            }
+        }
+    }
+
+
+    //
+    // TODO: this should use a connection pool with concurrency limits,
+    // not a single connection to the shard.
+    //
+    async fn get_client(&self, shard: ShardIndex) -> Channel {
+        // Get channel from the hashmap
+        let mut channels = self.channels.write();
+        if let Some(channel) = channels.await.get(&shard) {
+            return channel.clone();
+        }
+        // Create a new channel if it doesn't exist
+        let shard_url = self
+            .shard_map
+            .get(&shard)
+            .expect("shard not found in shard map");
+
+        let attempt = Endpoint::from_shared(shard_url.clone())
+            .expect("invalid endpoint")
+            .connect()
+            .await;
+
+        match attempt {
+            Ok(channel) => {
+                channels = self.channels.write();
+                channels.await.insert(shard, channel.clone());
+                channel.clone()
+            }
+            Err(e) => {
+                // TODO: handle this more gracefully, e.g. with a connection pool retry
+                panic!("Failed to connect to shard {shard}: {e}");
+            }
+        }
+    }
+}
+
+/// Inject tenant_id, timeline_id and authentication token to all pageserver requests.
+#[derive(Clone)]
+struct AuthInterceptor {
+    tenant_id: AsciiMetadataValue,
+    shard_id: Option<AsciiMetadataValue>,
+    timeline_id: AsciiMetadataValue,
+    auth_header: Option<AsciiMetadataValue>, // including "Bearer " prefix
+}
+
+impl AuthInterceptor {
+    fn new(tenant_id: &str, timeline_id: &str, auth_token: Option<&str>) -> Self {
+        Self {
+            tenant_id: tenant_id.parse().expect("could not parse tenant id"),
+            shard_id: None,
+            timeline_id: timeline_id.parse().expect("could not parse timeline id"),
+            auth_header: auth_token
+                .map(|t| format!("Bearer {t}"))
+                .map(|t| t.parse().expect("could not parse auth token")),
+        }
+    }
+
+    fn for_shard(&self, shard_id: ShardIndex) -> Self {
+        let mut with_shard = self.clone();
+        with_shard.shard_id = Some(
+            shard_id
+                .to_string()
+                .parse()
+                .expect("could not parse shard id"),
+        );
+        with_shard
+    }
+}
+
+impl tonic::service::Interceptor for AuthInterceptor {
+    fn call(&mut self, mut req: tonic::Request<()>) -> Result<tonic::Request<()>, tonic::Status> {
+        req.metadata_mut()
+            .insert("neon-tenant-id", self.tenant_id.clone());
+        if let Some(shard_id) = &self.shard_id {
+            req.metadata_mut().insert("neon-shard-id", shard_id.clone());
+        }
+        req.metadata_mut()
+            .insert("neon-timeline-id", self.timeline_id.clone());
+        if let Some(auth_header) = &self.auth_header {
+            req.metadata_mut()
+                .insert("authorization", auth_header.clone());
+        }
+        Ok(req)
+    }
+}
--- a/pageserver/page_api/src/lib.rs
+++ b/pageserver/page_api/src/lib.rs
@@ -18,6 +18,6 @@ pub mod proto {
    pub use page_service_server::{PageService, PageServiceServer};
 }

-mod model;
+pub mod model;

 pub use model::*;
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -102,6 +102,15 @@ impl TryFrom<ReadLsn> for proto::ReadLsn {
    }
 }

+impl From<&ReadLsn> for proto::ReadLsn {
+    fn from(value: &ReadLsn) -> proto::ReadLsn {
+        proto::ReadLsn {
+            request_lsn: value.request_lsn.into(),
+            not_modified_since_lsn: value.not_modified_since_lsn.unwrap_or_default().0,
+        }
+    }
+}
+
 // RelTag is defined in pageserver_api::reltag.
 pub type RelTag = pageserver_api::reltag::RelTag;

@@ -132,6 +141,16 @@ impl From<RelTag> for proto::RelTag {
    }
 }

+impl From<&RelTag> for proto::RelTag {
+    fn from(value: &RelTag) -> proto::RelTag {
+        proto::RelTag {
+            spc_oid: value.spcnode,
+            db_oid: value.dbnode,
+            rel_number: value.relnode,
+            fork_number: value.forknum as u32,
+        }
+    }
+}
 /// Checks whether a relation exists, at the given LSN. Only valid on shard 0, other shards error.
 #[derive(Clone, Copy, Debug)]
 pub struct CheckRelExistsRequest {
@@ -311,6 +330,17 @@ impl TryFrom<proto::GetPageRequest> for GetPageRequest {
    }
 }

+impl From<&GetPageRequest> for proto::GetPageRequest {
+    fn from(request: &GetPageRequest) -> proto::GetPageRequest {
+        proto::GetPageRequest {
+            request_id: request.request_id,
+            request_class: request.request_class.into(),
+            read_lsn: Some(request.read_lsn.try_into().unwrap()),
+            rel: Some(request.rel.into()),
+            block_number: request.block_numbers.clone().into_vec(),
+        }
+    }
+}
 impl TryFrom<GetPageRequest> for proto::GetPageRequest {
    type Error = ProtocolError;

@@ -584,7 +614,6 @@ impl TryFrom<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {
    type Error = ProtocolError;

    fn try_from(segment: GetSlruSegmentResponse) -> Result<Self, Self::Error> {
-        // TODO: can a segment legitimately be empty?
        if segment.is_empty() {
            return Err(ProtocolError::Missing("segment"));
        }
--- a/pageserver/pagebench/Cargo.toml
+++ b/pageserver/pagebench/Cargo.toml
@@ -8,24 +8,25 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
+thiserror.workspace = true
 futures.workspace = true
 hdrhistogram.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
 rand.workspace = true
-reqwest.workspace = true
+reqwest.workspace=true
+bytes.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tracing.workspace = true
 tokio.workspace = true
-tokio-stream.workspace = true
 tokio-util.workspace = true
-tonic.workspace = true
+async-trait = "0.1"

 pageserver_client.workspace = true
+pageserver_client_grpc.workspace = true
 pageserver_api.workspace = true
 pageserver_page_api.workspace = true
 utils = { path = "../../libs/utils/" }
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -7,34 +7,36 @@ use std::sync::{Arc, Mutex};
 use std::time::{Duration, Instant};

 use anyhow::Context;
-use async_trait::async_trait;
 use camino::Utf8PathBuf;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpaceAccum;
-use pageserver_api::models::{
-    PagestreamGetPageRequest, PagestreamGetPageResponse, PagestreamRequest,
-};
+use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest};
+use pageserver_page_api::model::{GetPageClass};
+use pageserver_client::page_service::PagestreamClient;
 use pageserver_api::shard::TenantShardId;
-use pageserver_page_api::proto;
 use rand::prelude::*;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 use utils::id::TenantTimelineId;
+use utils::id::TenantId;
+use utils::id::TimelineId;
 use utils::lsn::Lsn;

+
+
+use utils::shard::ShardIndex;
+use futures::{future::BoxFuture, stream::FuturesOrdered, FutureExt, StreamExt};
+
 use crate::util::tokio_thread_local_stats::AllThreadLocalStats;
 use crate::util::{request_stats, tokio_thread_local_stats};

-#[derive(clap::ValueEnum, Clone, Debug)]
-enum Protocol {
-    Libpq,
-    Grpc,
-}
-
 /// GetPage@LatestLSN, uniformly distributed across the compute-accessible keyspace.
 #[derive(clap::Parser)]
 pub(crate) struct Args {
+
+    #[clap(long, default_value = "false")]
+    grpc: bool,
    #[clap(long, default_value = "http://localhost:9898")]
    mgmt_api_endpoint: String,
    #[clap(long, default_value = "postgres://postgres@localhost:64000")]
@@ -45,8 +47,6 @@ pub(crate) struct Args {
    num_clients: NonZeroUsize,
    #[clap(long)]
    runtime: Option<humantime::Duration>,
-    #[clap(long, value_enum, default_value = "libpq")]
-    protocol: Protocol,
    /// Each client sends requests at the given rate.
    ///
    /// If a request takes too long and we should be issuing a new request already,
@@ -130,6 +130,7 @@ struct Output {
    total: request_stats::Output,
 }

+
 tokio_thread_local_stats::declare!(STATS: request_stats::Stats);

 pub(crate) fn main(args: Args) -> anyhow::Result<()> {
@@ -315,20 +316,20 @@ async fn main_impl(
                .unwrap();

        Box::pin(async move {
-            let client: Box<dyn Client> = match args.protocol {
-                Protocol::Libpq => Box::new(
-                    LibpqClient::new(args.page_service_connstring.clone(), worker_id.timeline)
-                        .await
-                        .unwrap(),
-                ),
+            if args.grpc {
+                let grpc = GrpcProtocol::new(
+                    args.page_service_connstring.clone(),
+                    worker_id.timeline.tenant_id,
+                    worker_id.timeline.timeline_id).await;
+                client_proto(args, grpc, worker_id, ss, cancel, rps_period, ranges, weights).await
+            } else {
+                let pg =  PgProtocol::new(
+                    args.page_service_connstring.clone(),
+                    worker_id.timeline.tenant_id,
+                    worker_id.timeline.timeline_id).await;
+                client_proto(args, pg, worker_id, ss, cancel, rps_period, ranges, weights).await
+            }

-                Protocol::Grpc => Box::new(
-                    GrpcClient::new(args.page_service_connstring.clone(), worker_id.timeline)
-                        .await
-                        .unwrap(),
-                ),
-            };
-            run_worker(args, client, ss, cancel, rps_period, ranges, weights).await
        })
    };

@@ -379,20 +380,224 @@ async fn main_impl(

    anyhow::Ok(())
 }
+// src/protocol.rs
+use async_trait::async_trait;
+use rand::distributions::weighted::WeightedIndex;

-async fn run_worker(
+// — your existing imports for PagestreamClient, PageserverClientError, KeyRange, etc. —
+
+/// Common interface for both Pg and Grpc versions.
+#[async_trait]
+trait Protocol: Send {
+    /// Constructor/factory.
+    async fn new(
+        conn_string: String,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Self
+    where
+        Self: Sized;
+
+    /// Fire off a “get page” request and store the start time.
+    async fn add_to_inflight(
+        &mut self,
+        start: Instant,
+        args: &Args,
+        ranges: Vec<KeyRange>,
+        weights: WeightedIndex<i128>,
+    );
+
+    /// Wait for the next response and return its start time.
+    async fn get_start_time(&mut self) -> Instant;
+
+    /// How many in-flight requests do we have?
+    fn len(&self) -> usize;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PgProtocol
+///////////////////////////////////////////////////////////////////////////////
+
+struct PgProtocol {
+    libpq_pagestream: PagestreamClient,
+    libpq_vector: VecDeque<Instant>,
+}
+
+#[async_trait]
+impl Protocol for PgProtocol {
+    async fn new(
+        conn_string: String,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Self {
+        let client = pageserver_client::page_service::Client::new(conn_string)
+            .await
+            .unwrap()
+            .pagestream(tenant_id, timeline_id)
+            .await
+            .unwrap();
+        Self {
+            libpq_pagestream: client,
+            libpq_vector: VecDeque::new(),
+        }
+    }
+
+    async fn add_to_inflight(
+        &mut self,
+        start: Instant,
+        args: &Args,
+        ranges: Vec<KeyRange>,
+        weights: WeightedIndex<i128>,
+    ) {
+        // build your PagestreamGetPageRequest exactly as before…
+        let req = {
+            let mut rng = rand::thread_rng();
+            let r = &ranges[weights.sample(&mut rng)];
+            let key: i128 = rng.gen_range(r.start..r.end);
+            let key = Key::from_i128(key);
+            assert!(key.is_rel_block_key());
+            let (rel_tag, block_no) = key.to_rel_block().unwrap();
+            PagestreamGetPageRequest {
+                hdr: PagestreamRequest {
+                    reqid: 0,
+                    request_lsn: if rng.gen_bool(args.req_latest_probability) {
+                        Lsn::MAX
+                    } else {
+                        r.timeline_lsn
+                    },
+                    not_modified_since: r.timeline_lsn,
+                },
+                rel: rel_tag,
+                blkno: block_no,
+            }
+        };
+
+        let _ = self.libpq_pagestream.getpage_send(req).await;
+        self.libpq_vector.push_back(start);
+    }
+
+    async fn get_start_time(&mut self) -> Instant {
+        let start = self.libpq_vector.pop_front().unwrap();
+        let _ = self.libpq_pagestream.getpage_recv().await;
+        start
+    }
+
+    fn len(&self) -> usize {
+        self.libpq_vector.len()
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GrpcProtocol
+///////////////////////////////////////////////////////////////////////////////
+type GetPageFut = BoxFuture<'static, (Instant, Option<pageserver_client_grpc::PageserverClientError>)>;
+struct GrpcProtocol {
+    grpc_page_client: Arc<pageserver_client_grpc::PageserverClient>,
+    grpc_vector: FuturesOrdered<GetPageFut>,
+}
+
+#[async_trait]
+impl Protocol for GrpcProtocol {
+    async fn new(
+        conn_string: String,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Self {
+        let shard_map = std::collections::HashMap::from([(
+            ShardIndex::unsharded(),
+            conn_string.clone(),
+        )]);
+        let client = pageserver_client_grpc::PageserverClient::new(
+            &tenant_id.to_string(),
+            &timeline_id.to_string(),
+            &None,
+            shard_map,
+        );
+        Self {
+            grpc_page_client: Arc::new(client),
+            grpc_vector: FuturesOrdered::new(),
+        }
+    }
+
+    async fn add_to_inflight(
+        &mut self,
+        start: Instant,
+        args: &Args,
+        ranges: Vec<KeyRange>,
+        weights: WeightedIndex<i128>,
+    ) {
+        // build your GetPageRequest exactly as before…
+        let req = {
+            let mut rng = rand::thread_rng();
+            let r = &ranges[weights.sample(&mut rng)];
+            let key: i128 = rng.gen_range(r.start..r.end);
+            let key = Key::from_i128(key);
+            assert!(key.is_rel_block_key());
+            let (rel_tag, block_no) = key.to_rel_block().unwrap();
+            pageserver_page_api::model::GetPageRequest {
+                request_id: 0,
+                request_class: GetPageClass::Normal,
+                read_lsn: pageserver_page_api::model::ReadLsn {
+                    request_lsn: if rng.gen_bool(args.req_latest_probability) {
+                        Lsn::MAX
+                    } else {
+                        r.timeline_lsn
+                    },
+                    not_modified_since_lsn: Some(r.timeline_lsn),
+                },
+                rel: pageserver_page_api::model::RelTag {
+                    spcnode: rel_tag.spcnode,
+                    dbnode: rel_tag.dbnode,
+                    relnode: rel_tag.relnode,
+                    forknum: rel_tag.forknum,
+                },
+                block_numbers: vec![block_no].into(),
+            }
+        };
+
+        let client_clone = self.grpc_page_client.clone();
+        let getpage_fut : GetPageFut = async move {
+            let result = client_clone.get_page(&req).await;
+            match result {
+                Ok(_) => {
+                    (start, None)
+                }
+                Err(e) => {
+                    (start, Some(e))
+                }
+            }
+        }.boxed();
+        self.grpc_vector.push_back(getpage_fut);
+    }
+
+    async fn get_start_time(&mut self) -> Instant {
+        let (start, err) = self.grpc_vector.next().await.unwrap();
+        if let Some(e) = err {
+            tracing::error!("getpage request failed: {e}");
+        }
+        start
+    }
+
+    fn len(&self) -> usize {
+        self.grpc_vector.len()
+    }
+}
+
+async fn client_proto(
    args: &Args,
-    mut client: Box<dyn Client>,
+    mut protocol: impl Protocol,
+    worker_id: WorkerId,
    shared_state: Arc<SharedState>,
    cancel: CancellationToken,
    rps_period: Option<Duration>,
    ranges: Vec<KeyRange>,
    weights: rand::distributions::weighted::WeightedIndex<i128>,
 ) {
+
+
    shared_state.start_work_barrier.wait().await;
    let client_start = Instant::now();
    let mut ticks_processed = 0;
-    let mut inflight = VecDeque::new();
    while !cancel.is_cancelled() {
        // Detect if a request took longer than the RPS rate
        if let Some(period) = &rps_period {
@@ -407,37 +612,12 @@ async fn run_worker(
            ticks_processed = periods_passed_until_now;
        }

-        while inflight.len() < args.queue_depth.get() {
+        while protocol.len() < args.queue_depth.get() {
            let start = Instant::now();
-            let req = {
-                let mut rng = rand::thread_rng();
-                let r = &ranges[weights.sample(&mut rng)];
-                let key: i128 = rng.gen_range(r.start..r.end);
-                let key = Key::from_i128(key);
-                assert!(key.is_rel_block_key());
-                let (rel_tag, block_no) = key
-                    .to_rel_block()
-                    .expect("we filter non-rel-block keys out above");
-                PagestreamGetPageRequest {
-                    hdr: PagestreamRequest {
-                        reqid: 0,
-                        request_lsn: if rng.gen_bool(args.req_latest_probability) {
-                            Lsn::MAX
-                        } else {
-                            r.timeline_lsn
-                        },
-                        not_modified_since: r.timeline_lsn,
-                    },
-                    rel: rel_tag,
-                    blkno: block_no,
-                }
-            };
-            client.send_get_page(req).await.unwrap();
-            inflight.push_back(start);
+            protocol.add_to_inflight(start, args, ranges.clone(), weights.clone()).await;
        }

-        let start = inflight.pop_front().unwrap();
-        client.recv_get_page().await.unwrap();
+        let start = protocol.get_start_time().await;
        let end = Instant::now();
        shared_state.live_stats.request_done();
        ticks_processed += 1;
@@ -459,101 +639,3 @@ async fn run_worker(
        }
    }
 }
-
-/// A benchmark client, to allow switching out the transport protocol.
-///
-/// For simplicity, this just uses separate asynchronous send/recv methods. The send method could
-/// return a future that resolves when the response is received, but we don't really need it.
-#[async_trait]
-trait Client: Send {
-    /// Sends an asynchronous GetPage request to the pageserver.
-    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()>;
-
-    /// Receives the next GetPage response from the pageserver.
-    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse>;
-}
-
-/// A libpq-based Pageserver client.
-struct LibpqClient {
-    inner: pageserver_client::page_service::PagestreamClient,
-}
-
-impl LibpqClient {
-    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
-        let inner = pageserver_client::page_service::Client::new(connstring)
-            .await?
-            .pagestream(ttid.tenant_id, ttid.timeline_id)
-            .await?;
-        Ok(Self { inner })
-    }
-}
-
-#[async_trait]
-impl Client for LibpqClient {
-    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
-        self.inner.getpage_send(req).await
-    }
-
-    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
-        self.inner.getpage_recv().await
-    }
-}
-
-/// A gRPC client using the raw, no-frills gRPC client.
-struct GrpcClient {
-    req_tx: tokio::sync::mpsc::Sender<proto::GetPageRequest>,
-    resp_rx: tonic::Streaming<proto::GetPageResponse>,
-}
-
-impl GrpcClient {
-    async fn new(connstring: String, ttid: TenantTimelineId) -> anyhow::Result<Self> {
-        let mut client = pageserver_page_api::proto::PageServiceClient::connect(connstring).await?;
-
-        let (req_tx, req_rx) = tokio::sync::mpsc::channel(1);
-        let req_stream = tokio_stream::wrappers::ReceiverStream::new(req_rx);
-        let mut req = tonic::Request::new(req_stream);
-        let metadata = req.metadata_mut();
-        metadata.insert("neon-tenant-id", ttid.tenant_id.to_string().try_into()?);
-        metadata.insert("neon-timeline-id", ttid.timeline_id.to_string().try_into()?);
-        metadata.insert("neon-shard-id", "0000".try_into()?);
-
-        let resp = client.get_pages(req).await?;
-        let resp_stream = resp.into_inner();
-
-        Ok(Self {
-            req_tx,
-            resp_rx: resp_stream,
-        })
-    }
-}
-
-#[async_trait]
-impl Client for GrpcClient {
-    async fn send_get_page(&mut self, req: PagestreamGetPageRequest) -> anyhow::Result<()> {
-        let req = proto::GetPageRequest {
-            request_id: 0,
-            request_class: proto::GetPageClass::Normal as i32,
-            read_lsn: Some(proto::ReadLsn {
-                request_lsn: req.hdr.request_lsn.0,
-                not_modified_since_lsn: req.hdr.not_modified_since.0,
-            }),
-            rel: Some(req.rel.into()),
-            block_number: vec![req.blkno],
-        };
-        self.req_tx.send(req).await?;
-        Ok(())
-    }
-
-    async fn recv_get_page(&mut self) -> anyhow::Result<PagestreamGetPageResponse> {
-        let resp = self.resp_rx.message().await?.unwrap();
-        anyhow::ensure!(
-            resp.status_code == proto::GetPageStatusCode::Ok as i32,
-            "unexpected status code: {}",
-            resp.status_code
-        );
-        Ok(PagestreamGetPageResponse {
-            page: resp.page_image[0].clone(),
-            req: PagestreamGetPageRequest::default(), // dummy
-        })
-    }
-}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -804,7 +804,7 @@ fn start_pageserver(
        } else {
            None
        },
-        basebackup_cache,
+        basebackup_cache.clone(),
    );

    // Spawn a Pageserver gRPC server task. It will spawn separate tasks for
@@ -816,10 +816,12 @@ fn start_pageserver(
    let mut page_service_grpc = None;
    if let Some(grpc_listener) = grpc_listener {
        page_service_grpc = Some(page_service::spawn_grpc(
+            conf,
            tenant_manager.clone(),
            grpc_auth,
            otel_guard.as_ref().map(|g| g.dispatch.clone()),
            grpc_listener,
+            basebackup_cache,
        )?);
    }

--- a/pageserver/src/feature_resolver.rs
+++ b/pageserver/src/feature_resolver.rs
@@ -45,10 +45,6 @@ impl FeatureResolver {
    }

    /// Evaluate a multivariate feature flag. Currently, we do not support any properties.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
    pub fn evaluate_multivariate(
        &self,
        flag_key: &str,
@@ -66,29 +62,4 @@ impl FeatureResolver {
            ))
        }
    }
-
-    /// Evaluate a boolean feature flag. Currently, we do not support any properties.
-    ///
-    /// Returns `Ok(())` if the flag is evaluated to true, otherwise returns an error.
-    ///
-    /// Error handling: the caller should inspect the error and decide the behavior when a feature flag
-    /// cannot be evaluated (i.e., default to false if it cannot be resolved). The error should *not* be
-    /// propagated beyond where the feature flag gets resolved.
-    pub fn evaluate_boolean(
-        &self,
-        flag_key: &str,
-        tenant_id: TenantId,
-    ) -> Result<(), PostHogEvaluationError> {
-        if let Some(inner) = &self.inner {
-            inner.feature_store().evaluate_boolean(
-                flag_key,
-                &tenant_id.to_string(),
-                &HashMap::new(),
-            )
-        } else {
-            Err(PostHogEvaluationError::NotAvailable(
-                "PostHog integration is not enabled".to_string(),
-            ))
-        }
-    }
 }
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -353,33 +353,6 @@ paths:
        "200":
          description: OK

-  /v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/mark_invisible:
-    parameters:
-      - name: tenant_shard_id
-        in: path
-        required: true
-        schema:
-          type: string
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    put:
-      requestBody:
-        content:
-          application/json:
-            schema:
-              type: object
-              properties:
-                is_visible:
-                  type: boolean
-                  default: false
-      responses:
-        "200":
-          description: OK
-
  /v1/tenant/{tenant_shard_id}/location_config:
    parameters:
      - name: tenant_shard_id
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -274,7 +274,7 @@ impl Timeline {
        io_concurrency: IoConcurrency,
        ctx: &RequestContext,
    ) -> Vec<Result<Bytes, PageReconstructError>> {
-        //debug_assert_current_span_has_tenant_and_timeline_id();
+        debug_assert_current_span_has_tenant_and_timeline_id();

        let mut slots_filled = 0;
        let page_count = pages.len();
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -5315,7 +5315,6 @@ impl TenantShard {
            l0_compaction_trigger: self.l0_compaction_trigger.clone(),
            l0_flush_global_state: self.l0_flush_global_state.clone(),
            basebackup_prepare_sender: self.basebackup_prepare_sender.clone(),
-            feature_resolver: self.feature_resolver.clone(),
        }
    }

@@ -8360,24 +8359,10 @@ mod tests {
            }

            tline.freeze_and_flush().await?;
-            // Force layers to L1
-            tline
-                .compact(
-                    &cancel,
-                    {
-                        let mut flags = EnumSet::new();
-                        flags.insert(CompactFlags::ForceL0Compaction);
-                        flags
-                    },
-                    &ctx,
-                )
-                .await?;

            if iter % 5 == 0 {
-                let scan_lsn = Lsn(lsn.0 + 1);
-                info!("scanning at {}", scan_lsn);
                let (_, before_delta_file_accessed) =
-                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())
+                    scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
                        .await?;
                tline
                    .compact(
@@ -8386,14 +8371,13 @@ mod tests {
                            let mut flags = EnumSet::new();
                            flags.insert(CompactFlags::ForceImageLayerCreation);
                            flags.insert(CompactFlags::ForceRepartition);
-                            flags.insert(CompactFlags::ForceL0Compaction);
                            flags
                        },
                        &ctx,
                    )
                    .await?;
                let (_, after_delta_file_accessed) =
-                    scan_with_statistics(&tline, &keyspace, scan_lsn, &ctx, io_concurrency.clone())
+                    scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone())
                        .await?;
                assert!(
                    after_delta_file_accessed < before_delta_file_accessed,
@@ -8834,8 +8818,6 @@ mod tests {

        let cancel = CancellationToken::new();

-        // Image layer creation happens on the disk_consistent_lsn so we need to force set it now.
-        tline.force_set_disk_consistent_lsn(Lsn(0x40));
        tline
            .compact(
                &cancel,
@@ -8849,7 +8831,8 @@ mod tests {
            )
            .await
            .unwrap();
-        // Image layers are created at repartition LSN
+
+        // Image layers are created at last_record_lsn
        let images = tline
            .inspect_image_layers(Lsn(0x40), &ctx, io_concurrency.clone())
            .await
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -103,7 +103,6 @@ use crate::context::{
    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
 };
 use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32};
-use crate::feature_resolver::FeatureResolver;
 use crate::keyspace::{KeyPartitioning, KeySpace};
 use crate::l0_flush::{self, L0FlushGlobalState};
 use crate::metrics::{
@@ -199,7 +198,6 @@ pub struct TimelineResources {
    pub l0_compaction_trigger: Arc<Notify>,
    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
    pub basebackup_prepare_sender: BasebackupPrepareSender,
-    pub feature_resolver: FeatureResolver,
 }

 pub struct Timeline {
@@ -446,8 +444,6 @@ pub struct Timeline {

    /// A channel to send async requests to prepare a basebackup for the basebackup cache.
    basebackup_prepare_sender: BasebackupPrepareSender,
-
-    feature_resolver: FeatureResolver,
 }

 pub(crate) enum PreviousHeatmap {
@@ -3076,8 +3072,6 @@ impl Timeline {
                wait_lsn_log_slow: tokio::sync::Semaphore::new(1),

                basebackup_prepare_sender: resources.basebackup_prepare_sender,
-
-                feature_resolver: resources.feature_resolver,
            };

            result.repartition_threshold =
@@ -4912,7 +4906,6 @@ impl Timeline {
                    LastImageLayerCreationStatus::Initial,
                    false, // don't yield for L0, we're flushing L0
                )
-                .instrument(info_span!("create_image_layers", mode = %ImageLayerCreationMode::Initial, partition_mode = "initial", lsn = %self.initdb_lsn))
                .await?;
            debug_assert!(
                matches!(is_complete, LastImageLayerCreationStatus::Complete),
@@ -5469,8 +5462,7 @@ impl Timeline {

    /// Returns the image layers generated and an enum indicating whether the process is fully completed.
    /// true = we have generate all image layers, false = we preempt the process for L0 compaction.
-    ///
-    /// `partition_mode` is only for logging purpose and is not used anywhere in this function.
+    #[tracing::instrument(skip_all, fields(%lsn, %mode))]
    async fn create_image_layers(
        self: &Arc<Timeline>,
        partitioning: &KeyPartitioning,
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -1278,55 +1278,11 @@ impl Timeline {
        }

        let gc_cutoff = *self.applied_gc_cutoff_lsn.read();
-        let l0_l1_boundary_lsn = {
-            // We do the repartition on the L0-L1 boundary. All data below the boundary
-            // are compacted by L0 with low read amplification, thus making the `repartition`
-            // function run fast.
-            let guard = self.layers.read().await;
-            guard
-                .all_persistent_layers()
-                .iter()
-                .map(|x| {
-                    // Use the end LSN of delta layers OR the start LSN of image layers.
-                    if x.is_delta {
-                        x.lsn_range.end
-                    } else {
-                        x.lsn_range.start
-                    }
-                })
-                .max()
-        };
-
-        let (partition_mode, partition_lsn) = if cfg!(test)
-            || cfg!(feature = "testing")
-            || self
-                .feature_resolver
-                .evaluate_boolean("image-compaction-boundary", self.tenant_shard_id.tenant_id)
-                .is_ok()
-        {
-            let last_repartition_lsn = self.partitioning.read().1;
-            let lsn = match l0_l1_boundary_lsn {
-                Some(boundary) => gc_cutoff
-                    .max(boundary)
-                    .max(last_repartition_lsn)
-                    .max(self.initdb_lsn)
-                    .max(self.ancestor_lsn),
-                None => self.get_last_record_lsn(),
-            };
-            if lsn <= self.initdb_lsn || lsn <= self.ancestor_lsn {
-                // Do not attempt to create image layers below the initdb or ancestor LSN -- no data below it
-                ("l0_l1_boundary", self.get_last_record_lsn())
-            } else {
-                ("l0_l1_boundary", lsn)
-            }
-        } else {
-            ("latest_record", self.get_last_record_lsn())
-        };

        // 2. Repartition and create image layers if necessary
        match self
            .repartition(
-                partition_lsn,
+                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
                options.flags,
                ctx,
@@ -1345,19 +1301,18 @@ impl Timeline {
                    .extend(sparse_partitioning.into_dense().parts);

                // 3. Create new image layers for partitions that have been modified "enough".
-                let mode = if options
-                    .flags
-                    .contains(CompactFlags::ForceImageLayerCreation)
-                {
-                    ImageLayerCreationMode::Force
-                } else {
-                    ImageLayerCreationMode::Try
-                };
                let (image_layers, outcome) = self
                    .create_image_layers(
                        &partitioning,
                        lsn,
-                        mode,
+                        if options
+                            .flags
+                            .contains(CompactFlags::ForceImageLayerCreation)
+                        {
+                            ImageLayerCreationMode::Force
+                        } else {
+                            ImageLayerCreationMode::Try
+                        },
                        &image_ctx,
                        self.last_image_layer_creation_status
                            .load()
@@ -1365,7 +1320,6 @@ impl Timeline {
                            .clone(),
                        options.flags.contains(CompactFlags::YieldForL0),
                    )
-                    .instrument(info_span!("create_image_layers", mode = %mode, partition_mode = %partition_mode, lsn = %lsn))
                    .await
                    .inspect_err(|err| {
                        if let CreateImageLayersError::GetVectoredError(
@@ -1390,8 +1344,7 @@ impl Timeline {
            }

            Ok(_) => {
-                // This happens very frequently so we don't want to log it.
-                debug!("skipping repartitioning due to image compaction LSN being below GC cutoff");
+                info!("skipping repartitioning due to image compaction LSN being below GC cutoff");
            }

            // Suppress errors when cancelled.
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -20,9 +20,6 @@ from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
 from fixtures.utils import query_scalar, wait_until


-@pytest.mark.skip(
-    reason="We won't create future layers any more after https://github.com/neondatabase/neon/pull/10548"
-)
@pytest.mark.parametrize(
    "attach_mode",
    ["default_generation", "same_generation"],
Author	SHA1	Message	Date
Elizabeth Murray	f076d51643	Remove unnecessary client code. Use traits insead of enums in pagebench.	2025-05-28 07:46:52 -07:00
Elizabeth Murray	bfc4f5162f	Remove debug info messages from client grpc code.	2025-05-28 06:49:21 -07:00
Elizabeth Murray	e4618c11c9	Add new files for client grpc.	2025-05-28 06:48:46 -07:00
Elizabeth Murray	df32cc153c	Add grpc pagebench for communicator grpc.	2025-05-28 06:14:56 -07:00