diff --git a/.circleci/config.yml b/.circleci/config.yml index 2f92095a83..68317e62ef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,7 +7,7 @@ executors: zenith-build-executor: resource_class: xlarge docker: - - image: cimg/rust:1.52.1 + - image: cimg/rust:1.55.0 jobs: check-codestyle: @@ -110,7 +110,7 @@ jobs: # Require an exact match. While an out of date cache might speed up the build, # there's no way to clean out old packages, so the cache grows every time something # changes. - - v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }} + - v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }} # Build the rust code, including test binaries - run: @@ -128,7 +128,7 @@ jobs: - save_cache: name: Save rust cache - key: v03-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }} + key: v04-rust-cache-deps-<< parameters.build_type >>-{{ checksum "Cargo.lock" }} paths: - ~/.cargo/registry - ~/.cargo/git @@ -182,6 +182,21 @@ jobs: paths: - "*" + check-python: + executor: python/default + steps: + - checkout + - run: + name: Install pipenv & deps + working_directory: test_runner + command: | + pip install pipenv + pipenv install --dev + - run: + name: Run yapf to ensure code format + working_directory: test_runner + command: pipenv run yapf --recursive --diff . + run-pytest: #description: "Run pytest" executor: python/default @@ -337,6 +352,7 @@ workflows: build_and_test: jobs: - check-codestyle + - check-python - build-postgres: name: build-postgres-<< matrix.build_type >> matrix: diff --git a/Dockerfile b/Dockerfile index 0bbf03d13c..528f29597f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,8 +38,6 @@ RUN apt-get update && apt-get -yq install libreadline-dev libseccomp-dev openssl COPY --from=build /zenith/target/release/pageserver /usr/local/bin COPY --from=build /zenith/target/release/safekeeper /usr/local/bin -# TODO: temporary alias for compatibility, see https://github.com/zenithdb/zenith/pull/740 -RUN ln -s /usr/local/bin/safekeeper /usr/local/bin/wal_acceptor COPY --from=build /zenith/target/release/proxy /usr/local/bin COPY --from=pg-build /zenith/tmp_install postgres_install COPY docker-entrypoint.sh /docker-entrypoint.sh diff --git a/Dockerfile.alpine b/Dockerfile.alpine index e0b569de22..dafb7eaf6b 100644 --- a/Dockerfile.alpine +++ b/Dockerfile.alpine @@ -82,8 +82,6 @@ RUN apk add --update openssl build-base libseccomp-dev RUN apk --no-cache --update --repository https://dl-cdn.alpinelinux.org/alpine/edge/testing add rocksdb COPY --from=build /zenith/target/release/pageserver /usr/local/bin COPY --from=build /zenith/target/release/safekeeper /usr/local/bin -# TODO: temporary alias for compatibility, see https://github.com/zenithdb/zenith/pull/740 -RUN ln -s /usr/local/bin/safekeeper /usr/local/bin/wal_acceptor COPY --from=build /zenith/target/release/proxy /usr/local/bin COPY --from=pg-build /zenith/tmp_install /usr/local COPY docker-entrypoint.sh /docker-entrypoint.sh diff --git a/README.md b/README.md index 1e0f20fd45..977d015bfc 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libsec libssl-dev clang pkg-config libpq-dev ``` -[Rust] 1.52 or later is also required. +[Rust] 1.55 or later is also required. To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `tmp_install/bin` and `tmp_install/lib`, respectively. diff --git a/control_plane/src/compute.rs b/control_plane/src/compute.rs index f33d77720c..c0107a431e 100644 --- a/control_plane/src/compute.rs +++ b/control_plane/src/compute.rs @@ -84,25 +84,53 @@ impl ComputeControlPlane { } } + // FIXME: see also parse_point_in_time in branches.rs. + fn parse_point_in_time( + &self, + tenantid: ZTenantId, + s: &str, + ) -> Result<(ZTimelineId, Option)> { + let mut strings = s.split('@'); + let name = strings.next().unwrap(); + + let lsn: Option; + if let Some(lsnstr) = strings.next() { + lsn = Some( + Lsn::from_str(lsnstr) + .with_context(|| "invalid LSN in point-in-time specification")?, + ); + } else { + lsn = None + } + + // Resolve the timeline ID, given the human-readable branch name + let timeline_id = self + .pageserver + .branch_get_by_name(&tenantid, name)? + .timeline_id; + + Ok((timeline_id, lsn)) + } + pub fn new_node( &mut self, tenantid: ZTenantId, - branch_name: &str, + name: &str, + timeline_spec: &str, port: Option, ) -> Result> { - let timeline_id = self - .pageserver - .branch_get_by_name(&tenantid, branch_name)? - .timeline_id; + // Resolve the human-readable timeline spec into timeline ID and LSN + let (timelineid, lsn) = self.parse_point_in_time(tenantid, timeline_spec)?; let port = port.unwrap_or_else(|| self.get_port()); let node = Arc::new(PostgresNode { - name: branch_name.to_owned(), + name: name.to_owned(), address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), env: self.env.clone(), pageserver: Arc::clone(&self.pageserver), is_test: false, - timelineid: timeline_id, + timelineid, + lsn, tenantid, uses_wal_proposer: false, }); @@ -127,6 +155,7 @@ pub struct PostgresNode { pageserver: Arc, is_test: bool, pub timelineid: ZTimelineId, + pub lsn: Option, // if it's a read-only node. None for primary pub tenantid: ZTenantId, uses_wal_proposer: bool, } @@ -161,9 +190,12 @@ impl PostgresNode { let port: u16 = conf.parse_field("port", &context)?; let timelineid: ZTimelineId = conf.parse_field("zenith.zenith_timeline", &context)?; let tenantid: ZTenantId = conf.parse_field("zenith.zenith_tenant", &context)?; - let uses_wal_proposer = conf.get("wal_acceptors").is_some(); + // parse recovery_target_lsn, if any + let recovery_target_lsn: Option = + conf.parse_field_optional("recovery_target_lsn", &context)?; + // ok now Ok(PostgresNode { address: SocketAddr::new("127.0.0.1".parse().unwrap(), port), @@ -172,6 +204,7 @@ impl PostgresNode { pageserver: Arc::clone(pageserver), is_test: false, timelineid, + lsn: recovery_target_lsn, tenantid, uses_wal_proposer, }) @@ -233,7 +266,7 @@ impl PostgresNode { // Read the archive directly from the `CopyOutReader` tar::Archive::new(copyreader) .unpack(&self.pgdata()) - .with_context(|| "extracting page backup failed")?; + .with_context(|| "extracting base backup failed")?; Ok(()) } @@ -301,6 +334,9 @@ impl PostgresNode { conf.append("zenith.page_server_connstring", &pageserver_connstr); conf.append("zenith.zenith_tenant", &self.tenantid.to_string()); conf.append("zenith.zenith_timeline", &self.timelineid.to_string()); + if let Some(lsn) = self.lsn { + conf.append("recovery_target_lsn", &lsn.to_string()); + } conf.append_line(""); // Configure the node to stream WAL directly to the pageserver @@ -314,7 +350,9 @@ impl PostgresNode { } fn load_basebackup(&self) -> Result<()> { - let lsn = if self.uses_wal_proposer { + let backup_lsn = if let Some(lsn) = self.lsn { + Some(lsn) + } else if self.uses_wal_proposer { // LSN 0 means that it is bootstrap and we need to download just // latest data from the pageserver. That is a bit clumsy but whole bootstrap // procedure evolves quite actively right now, so let's think about it again @@ -329,7 +367,7 @@ impl PostgresNode { None }; - self.do_basebackup(lsn)?; + self.do_basebackup(backup_lsn)?; Ok(()) } @@ -406,6 +444,10 @@ impl PostgresNode { // 3. Load basebackup self.load_basebackup()?; + if self.lsn.is_some() { + File::create(self.pgdata().join("standby.signal"))?; + } + // 4. Finally start the compute node postgres println!("Starting postgres node at '{}'", self.connstr()); self.pg_ctl(&["start"], auth_token) diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs index bcd463999b..7f50fe9c2f 100644 --- a/control_plane/src/postgresql_conf.rs +++ b/control_plane/src/postgresql_conf.rs @@ -83,6 +83,22 @@ impl PostgresConf { .with_context(|| format!("could not parse '{}' option {}", field_name, context)) } + pub fn parse_field_optional(&self, field_name: &str, context: &str) -> Result> + where + T: FromStr, + ::Err: std::error::Error + Send + Sync + 'static, + { + if let Some(val) = self.get(field_name) { + let result = val + .parse::() + .with_context(|| format!("could not parse '{}' option {}", field_name, context))?; + + Ok(Some(result)) + } else { + Ok(None) + } + } + /// /// Note: if you call this multiple times for the same option, the config /// file will a line for each call. It would be nice to have a function diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index a4ee89918c..def815a32d 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -13,6 +13,7 @@ use anyhow::Result; use bytes::{BufMut, BytesMut}; use log::*; +use std::fmt::Write as FmtWrite; use std::io; use std::io::Write; use std::sync::Arc; @@ -83,7 +84,7 @@ impl<'a> Basebackup<'a> { info!( "taking basebackup lsn={}, prev_lsn={}", - backup_prev, backup_lsn + backup_lsn, backup_prev ); Ok(Basebackup { @@ -248,13 +249,7 @@ impl<'a> Basebackup<'a> { let mut pg_control = ControlFileData::decode(&pg_control_bytes)?; let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?; - // Generate new pg_control and WAL needed for bootstrap - let checkpoint_segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE); - let checkpoint_lsn = XLogSegNoOffsetToRecPtr( - checkpoint_segno, - XLOG_SIZE_OF_XLOG_LONG_PHD as u32, - pg_constants::WAL_SEGMENT_SIZE, - ); + // Generate new pg_control needed for bootstrap checkpoint.redo = normalize_lsn(self.lsn, pg_constants::WAL_SEGMENT_SIZE).0; //reset some fields we don't want to preserve @@ -263,19 +258,24 @@ impl<'a> Basebackup<'a> { checkpoint.oldestActiveXid = 0; //save new values in pg_control - pg_control.checkPoint = checkpoint_lsn; + pg_control.checkPoint = 0; pg_control.checkPointCopy = checkpoint; pg_control.state = pg_constants::DB_SHUTDOWNED; // add zenith.signal file - let xl_prev = if self.prev_record_lsn == Lsn(0) { - 0xBAD0 // magic value to indicate that we don't know prev_lsn + let mut zenith_signal = String::new(); + if self.prev_record_lsn == Lsn(0) { + if self.lsn == self.timeline.get_ancestor_lsn() { + write!(zenith_signal, "PREV LSN: none")?; + } else { + write!(zenith_signal, "PREV LSN: invalid")?; + } } else { - self.prev_record_lsn.0 - }; + write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?; + } self.ar.append( - &new_tar_header("zenith.signal", 8)?, - &xl_prev.to_le_bytes()[..], + &new_tar_header("zenith.signal", zenith_signal.len() as u64)?, + zenith_signal.as_bytes(), )?; //send pg_control @@ -284,14 +284,15 @@ impl<'a> Basebackup<'a> { self.ar.append(&header, &pg_control_bytes[..])?; //send wal segment + let segno = self.lsn.segment_number(pg_constants::WAL_SEGMENT_SIZE); let wal_file_name = XLogFileName( 1, // FIXME: always use Postgres timeline 1 - checkpoint_segno, + segno, pg_constants::WAL_SEGMENT_SIZE, ); let wal_file_path = format!("pg_wal/{}", wal_file_name); let header = new_tar_header(&wal_file_path, pg_constants::WAL_SEGMENT_SIZE as u64)?; - let wal_seg = generate_wal_segment(&pg_control); + let wal_seg = generate_wal_segment(segno, pg_control.system_identifier); assert!(wal_seg.len() == pg_constants::WAL_SEGMENT_SIZE); self.ar.append(&header, &wal_seg[..])?; Ok(()) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 4e2c02a83a..3a577476dc 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -2,7 +2,6 @@ // Main entry point for the Page Server executable // -use pageserver::defaults::*; use serde::{Deserialize, Serialize}; use std::{ env, @@ -28,13 +27,8 @@ use clap::{App, Arg, ArgMatches}; use daemonize::Daemonize; use pageserver::{ - branches, - defaults::{ - DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_PG_LISTEN_ADDR, - DEFAULT_RELISH_STORAGE_MAX_CONCURRENT_SYNC_LIMITS, - }, - http, page_service, relish_storage, tenant_mgr, PageServerConf, RelishStorageConfig, - RelishStorageKind, S3Config, LOG_FILE_NAME, + branches, defaults::*, http, page_service, relish_storage, tenant_mgr, PageServerConf, + RelishStorageConfig, RelishStorageKind, S3Config, LOG_FILE_NAME, }; use zenith_utils::http::endpoint; use zenith_utils::postgres_backend; @@ -42,7 +36,7 @@ use zenith_utils::postgres_backend; use const_format::formatcp; /// String arguments that can be declared via CLI or config file -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)] struct CfgFileParams { listen_pg_addr: Option, listen_http_addr: Option, @@ -53,12 +47,21 @@ struct CfgFileParams { pg_distrib_dir: Option, auth_validation_public_key_path: Option, auth_type: Option, - // see https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for enum deserialisation examples - relish_storage: Option, relish_storage_max_concurrent_sync: Option, + ///////////////////////////////// + //// Don't put `Option` and other "simple" values below. + //// + /// `Option` is a table in TOML. + /// Values in TOML cannot be defined after tables (other tables can), + /// and [`toml`] crate serializes all fields in the order of their appearance. + //////////////////////////////// + relish_storage: Option, } -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)] +// Without this attribute, enums with values won't be serialized by the `toml` library (but can be deserialized nonetheless!). +// See https://github.com/alexcrichton/toml-rs/blob/6c162e6562c3e432bf04c82a3d1d789d80761a86/examples/enum_external.rs for the examples +#[serde(untagged)] enum RelishStorage { Local { local_path: String, @@ -552,7 +555,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> { info!("Got SIGQUIT. Terminate pageserver in immediate shutdown mode"); exit(111); } - SIGTERM => { + SIGINT | SIGTERM => { info!("Got SIGINT/SIGTERM. Terminate gracefully in fast shutdown mode"); // Terminate postgres backends postgres_backend::set_pgbackend_shutdown_requested(); @@ -577,11 +580,142 @@ fn start_pageserver(conf: &'static PageServerConf) -> Result<()> { info!("Pageserver shut down successfully completed"); exit(0); } - _ => { - debug!("Unknown signal."); + unknown_signal => { + debug!("Unknown signal {}", unknown_signal); } } } Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn page_server_conf_toml_serde() { + let params = CfgFileParams { + listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()), + listen_http_addr: Some("listen_http_addr_VALUE".to_string()), + checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()), + checkpoint_period: Some("checkpoint_period_VALUE".to_string()), + gc_horizon: Some("gc_horizon_VALUE".to_string()), + gc_period: Some("gc_period_VALUE".to_string()), + pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()), + auth_validation_public_key_path: Some( + "auth_validation_public_key_path_VALUE".to_string(), + ), + auth_type: Some("auth_type_VALUE".to_string()), + relish_storage: Some(RelishStorage::Local { + local_path: "relish_storage_local_VALUE".to_string(), + }), + relish_storage_max_concurrent_sync: Some( + "relish_storage_max_concurrent_sync_VALUE".to_string(), + ), + }; + + let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config"); + let toml_pretty_string = + toml::to_string_pretty(¶ms).expect("Failed to serialize correct config"); + assert_eq!( + r#"listen_pg_addr = 'listen_pg_addr_VALUE' +listen_http_addr = 'listen_http_addr_VALUE' +checkpoint_distance = 'checkpoint_distance_VALUE' +checkpoint_period = 'checkpoint_period_VALUE' +gc_horizon = 'gc_horizon_VALUE' +gc_period = 'gc_period_VALUE' +pg_distrib_dir = 'pg_distrib_dir_VALUE' +auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE' +auth_type = 'auth_type_VALUE' +relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE' + +[relish_storage] +local_path = 'relish_storage_local_VALUE' +"#, + toml_pretty_string + ); + + let params_from_serialized: CfgFileParams = toml::from_str(&toml_string) + .expect("Failed to deserialize the serialization result of the config"); + let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string) + .expect("Failed to deserialize the prettified serialization result of the config"); + assert!( + params_from_serialized == params, + "Expected the same config in the end of config -> serialize -> deserialize chain" + ); + assert!( + params_from_serialized_pretty == params, + "Expected the same config in the end of config -> serialize pretty -> deserialize chain" + ); + } + + #[test] + fn credentials_omitted_during_serialization() { + let params = CfgFileParams { + listen_pg_addr: Some("listen_pg_addr_VALUE".to_string()), + listen_http_addr: Some("listen_http_addr_VALUE".to_string()), + checkpoint_distance: Some("checkpoint_distance_VALUE".to_string()), + checkpoint_period: Some("checkpoint_period_VALUE".to_string()), + gc_horizon: Some("gc_horizon_VALUE".to_string()), + gc_period: Some("gc_period_VALUE".to_string()), + pg_distrib_dir: Some("pg_distrib_dir_VALUE".to_string()), + auth_validation_public_key_path: Some( + "auth_validation_public_key_path_VALUE".to_string(), + ), + auth_type: Some("auth_type_VALUE".to_string()), + relish_storage: Some(RelishStorage::AwsS3 { + bucket_name: "bucket_name_VALUE".to_string(), + bucket_region: "bucket_region_VALUE".to_string(), + access_key_id: Some("access_key_id_VALUE".to_string()), + secret_access_key: Some("secret_access_key_VALUE".to_string()), + }), + relish_storage_max_concurrent_sync: Some( + "relish_storage_max_concurrent_sync_VALUE".to_string(), + ), + }; + + let toml_string = toml::to_string(¶ms).expect("Failed to serialize correct config"); + let toml_pretty_string = + toml::to_string_pretty(¶ms).expect("Failed to serialize correct config"); + assert_eq!( + r#"listen_pg_addr = 'listen_pg_addr_VALUE' +listen_http_addr = 'listen_http_addr_VALUE' +checkpoint_distance = 'checkpoint_distance_VALUE' +checkpoint_period = 'checkpoint_period_VALUE' +gc_horizon = 'gc_horizon_VALUE' +gc_period = 'gc_period_VALUE' +pg_distrib_dir = 'pg_distrib_dir_VALUE' +auth_validation_public_key_path = 'auth_validation_public_key_path_VALUE' +auth_type = 'auth_type_VALUE' +relish_storage_max_concurrent_sync = 'relish_storage_max_concurrent_sync_VALUE' + +[relish_storage] +bucket_name = 'bucket_name_VALUE' +bucket_region = 'bucket_region_VALUE' +"#, + toml_pretty_string + ); + + let params_from_serialized: CfgFileParams = toml::from_str(&toml_string) + .expect("Failed to deserialize the serialization result of the config"); + let params_from_serialized_pretty: CfgFileParams = toml::from_str(&toml_pretty_string) + .expect("Failed to deserialize the prettified serialization result of the config"); + + let mut expected_params = params; + expected_params.relish_storage = Some(RelishStorage::AwsS3 { + bucket_name: "bucket_name_VALUE".to_string(), + bucket_region: "bucket_region_VALUE".to_string(), + access_key_id: None, + secret_access_key: None, + }); + assert!( + params_from_serialized == expected_params, + "Expected the config without credentials in the end of a 'config -> serialize -> deserialize' chain" + ); + assert!( + params_from_serialized_pretty == expected_params, + "Expected the config without credentials in the end of a 'config -> serialize pretty -> deserialize' chain" + ); + } +} diff --git a/pageserver/src/layered_repository.rs b/pageserver/src/layered_repository.rs index 7c992ddb0c..b17d08a33a 100644 --- a/pageserver/src/layered_repository.rs +++ b/pageserver/src/layered_repository.rs @@ -678,6 +678,10 @@ pub struct LayeredTimeline { /// Public interface functions impl Timeline for LayeredTimeline { + fn get_ancestor_lsn(&self) -> Lsn { + self.ancestor_lsn + } + /// Wait until WAL has been received up to the given LSN. fn wait_lsn(&self, lsn: Lsn) -> Result<()> { // This should never be called from the WAL receiver thread, because that could lead @@ -1340,50 +1344,54 @@ impl LayeredTimeline { timeline_dir.sync_all()?; } - // Save the metadata, with updated 'disk_consistent_lsn', to a - // file in the timeline dir. After crash, we will restart WAL - // streaming and processing from that point. + // If we were able to advance 'disk_consistent_lsn', save it the metadata file. + // After crash, we will restart WAL streaming and processing from that point. + let old_disk_consistent_lsn = self.disk_consistent_lsn.load(); + if disk_consistent_lsn != old_disk_consistent_lsn { + assert!(disk_consistent_lsn > old_disk_consistent_lsn); - // We can only save a valid 'prev_record_lsn' value on disk if we - // flushed *all* in-memory changes to disk. We only track - // 'prev_record_lsn' in memory for the latest processed record, so we - // don't remember what the correct value that corresponds to some old - // LSN is. But if we flush everything, then the value corresponding - // current 'last_record_lsn' is correct and we can store it on disk. - let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn { - Some(prev_record_lsn) - } else { - None - }; + // We can only save a valid 'prev_record_lsn' value on disk if we + // flushed *all* in-memory changes to disk. We only track + // 'prev_record_lsn' in memory for the latest processed record, so we + // don't remember what the correct value that corresponds to some old + // LSN is. But if we flush everything, then the value corresponding + // current 'last_record_lsn' is correct and we can store it on disk. + let ondisk_prev_record_lsn = if disk_consistent_lsn == last_record_lsn { + Some(prev_record_lsn) + } else { + None + }; - let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid); + let ancestor_timelineid = self.ancestor_timeline.as_ref().map(|x| x.timelineid); - let metadata = TimelineMetadata { - disk_consistent_lsn, - prev_record_lsn: ondisk_prev_record_lsn, - ancestor_timeline: ancestor_timelineid, - ancestor_lsn: self.ancestor_lsn, - }; - LayeredRepository::save_metadata( - self.conf, - self.timelineid, - self.tenantid, - &metadata, - false, - )?; - if self.upload_relishes { - schedule_timeline_upload(()) - // schedule_timeline_upload( - // self.tenantid, - // self.timelineid, - // layer_uploads, - // disk_consistent_lsn, - // }); + let metadata = TimelineMetadata { + disk_consistent_lsn, + prev_record_lsn: ondisk_prev_record_lsn, + ancestor_timeline: ancestor_timelineid, + ancestor_lsn: self.ancestor_lsn, + }; + LayeredRepository::save_metadata( + self.conf, + self.timelineid, + self.tenantid, + &metadata, + false, + )?; + + // Also update the in-memory copy + self.disk_consistent_lsn.store(disk_consistent_lsn); + + if self.upload_relishes { + schedule_timeline_upload(()) + // schedule_timeline_upload( + // self.tenantid, + // self.timelineid, + // layer_uploads, + // disk_consistent_lsn, + // }); + } } - // Also update the in-memory copy - self.disk_consistent_lsn.store(disk_consistent_lsn); - Ok(()) } diff --git a/pageserver/src/relish_storage/rust_s3.rs b/pageserver/src/relish_storage/rust_s3.rs index d32d357b27..5dddaa36ca 100644 --- a/pageserver/src/relish_storage/rust_s3.rs +++ b/pageserver/src/relish_storage/rust_s3.rs @@ -115,9 +115,9 @@ impl RelishStorage for RustS3 { .delete_object(path.key()) .await .with_context(|| format!("Failed to delete s3 object with key {}", path.key()))?; - if code != 200 { + if code != 204 { Err(anyhow::format_err!( - "Received non-200 exit code during deleting object with key '{}', code: {}", + "Received non-204 exit code during deleting object with key '{}', code: {}", path.key(), code )) diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 56e551a275..3009d51352 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -119,6 +119,9 @@ pub trait Timeline: Send + Sync { /// Get a list of all existing non-relational objects fn list_nonrels(&self, lsn: Lsn) -> Result>; + /// Get the LSN where this branch was created + fn get_ancestor_lsn(&self) -> Lsn; + //------------------------------------------------------------------------------ // Public PUT functions, to update the repository with new page versions. // diff --git a/pageserver/src/restore_local_repo.rs b/pageserver/src/restore_local_repo.rs index 60eb9ce278..8afa2676e2 100644 --- a/pageserver/src/restore_local_repo.rs +++ b/pageserver/src/restore_local_repo.rs @@ -7,10 +7,10 @@ use postgres_ffi::nonrelfile_utils::slru_may_delete_clogsegment; use std::cmp::min; use std::fs; use std::fs::File; -use std::io::Read; -use std::path::Path; +use std::io::{Read, Seek, SeekFrom}; +use std::path::{Path, PathBuf}; -use anyhow::{bail, Result}; +use anyhow::{anyhow, bail, Result}; use bytes::{Buf, Bytes}; use tracing::*; @@ -37,6 +37,8 @@ pub fn import_timeline_from_postgres_datadir( writer: &dyn TimelineWriter, lsn: Lsn, ) -> Result<()> { + let mut pg_control: Option = None; + // Scan 'global' for direntry in fs::read_dir(path.join("global"))? { let direntry = direntry?; @@ -44,7 +46,7 @@ pub fn import_timeline_from_postgres_datadir( None => continue, Some("pg_control") => { - import_control_file(writer, lsn, &direntry.path())?; + pg_control = Some(import_control_file(writer, lsn, &direntry.path())?); } Some("pg_filenode.map") => import_nonrel_file( writer, @@ -127,6 +129,18 @@ pub fn import_timeline_from_postgres_datadir( writer.advance_last_record_lsn(lsn); + // Import WAL. This is needed even when starting from a shutdown checkpoint, because + // this reads the checkpoint record itself, advancing the tip of the timeline to + // *after* the checkpoint record. And crucially, it initializes the 'prev_lsn' + let pg_control = pg_control.ok_or_else(|| anyhow!("pg_control file not found"))?; + import_wal( + &path.join("pg_wal"), + writer, + Lsn(pg_control.checkPointCopy.redo), + lsn, + &mut pg_control.checkPointCopy.clone(), + )?; + Ok(()) } @@ -212,7 +226,11 @@ fn import_nonrel_file( /// /// The control file is imported as is, but we also extract the checkpoint record /// from it and store it separated. -fn import_control_file(timeline: &dyn TimelineWriter, lsn: Lsn, path: &Path) -> Result<()> { +fn import_control_file( + timeline: &dyn TimelineWriter, + lsn: Lsn, + path: &Path, +) -> Result { let mut file = File::open(path)?; let mut buffer = Vec::new(); // read the whole file @@ -233,7 +251,7 @@ fn import_control_file(timeline: &dyn TimelineWriter, lsn: Lsn, path: &Path) -> let checkpoint_bytes = pg_control.checkPointCopy.encode(); timeline.put_page_image(RelishTag::Checkpoint, 0, lsn, checkpoint_bytes)?; - Ok(()) + Ok(pg_control) } /// @@ -285,6 +303,104 @@ fn import_slru_file( Ok(()) } +/// Scan PostgreSQL WAL files in given directory and load all records between +/// 'startpoint' and 'endpoint' into the repository. +fn import_wal( + walpath: &Path, + timeline: &dyn TimelineWriter, + startpoint: Lsn, + endpoint: Lsn, + checkpoint: &mut CheckPoint, +) -> Result<()> { + let mut waldecoder = WalStreamDecoder::new(startpoint); + + let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE); + let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE); + let mut last_lsn = startpoint; + + while last_lsn <= endpoint { + // FIXME: assume postgresql tli 1 for now + let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE); + let mut buf = Vec::new(); + + // Read local file + let mut path = walpath.join(&filename); + + // It could be as .partial + if !PathBuf::from(&path).exists() { + path = walpath.join(filename + ".partial"); + } + + // Slurp the WAL file + let mut file = File::open(&path)?; + + if offset > 0 { + file.seek(SeekFrom::Start(offset as u64))?; + } + + let nread = file.read_to_end(&mut buf)?; + if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize { + // Maybe allow this for .partial files? + error!("read only {} bytes from WAL file", nread); + } + + waldecoder.feed_bytes(&buf); + + let mut nrecords = 0; + while last_lsn <= endpoint { + if let Some((lsn, recdata)) = waldecoder.poll_decode()? { + let mut checkpoint_modified = false; + + let decoded = decode_wal_record(recdata.clone()); + save_decoded_record( + checkpoint, + &mut checkpoint_modified, + timeline, + &decoded, + recdata, + lsn, + )?; + last_lsn = lsn; + + if checkpoint_modified { + let checkpoint_bytes = checkpoint.encode(); + timeline.put_page_image( + RelishTag::Checkpoint, + 0, + last_lsn, + checkpoint_bytes, + )?; + } + + // Now that this record has been fully handled, including updating the + // checkpoint data, let the repository know that it is up-to-date to this LSN + timeline.advance_last_record_lsn(last_lsn); + nrecords += 1; + + trace!("imported record at {} (end {})", lsn, endpoint); + } + } + + debug!("imported {} records up to {}", nrecords, last_lsn); + + segno += 1; + offset = 0; + } + + if last_lsn != startpoint { + debug!( + "reached end of WAL at {}, updating checkpoint info", + last_lsn + ); + + timeline.advance_last_record_lsn(last_lsn); + } else { + info!("no WAL to import at {}", last_lsn); + } + + Ok(()) +} + /// /// Helper function to parse a WAL record and call the Timeline's PUT functions for all the /// relations/pages that the record affects. diff --git a/pageserver/src/tenant_mgr.rs b/pageserver/src/tenant_mgr.rs index 1712cf1b8a..be3a36fda4 100644 --- a/pageserver/src/tenant_mgr.rs +++ b/pageserver/src/tenant_mgr.rs @@ -9,8 +9,8 @@ use crate::PageServerConf; use anyhow::{anyhow, bail, Context, Result}; use lazy_static::lazy_static; use log::{debug, info}; -use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::fmt; use std::fs; use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; @@ -19,13 +19,47 @@ use std::thread::JoinHandle; use zenith_utils::zid::{ZTenantId, ZTimelineId}; lazy_static! { - static ref REPOSITORY: Mutex>> = - Mutex::new(HashMap::new()); + static ref TENANTS: Mutex> = Mutex::new(HashMap::new()); } -fn access_repository() -> MutexGuard<'static, HashMap>> { - REPOSITORY.lock().unwrap() +struct Tenant { + state: TenantState, + repo: Option>, } + +#[derive(Debug)] +enum TenantState { + // This tenant only exists in cloud storage. It cannot be accessed. + CloudOnly, + // This tenant exists in cloud storage, and we are currently downloading it to local disk. + // It cannot be accessed yet, not until it's been fully downloaded to local disk. + Downloading, + // All data for this tenant is complete on local disk, but we haven't loaded the Repository, + // Timeline and Layer structs into memory yet, so it cannot be accessed yet. + //Ready, + // This tenant exists on local disk, and the layer map has been loaded into memory. + // The local disk might have some newer files that don't exist in cloud storage yet. + Active, + // This tenant exists on local disk, and the layer map has been loaded into memory. + // The local disk might have some newer files that don't exist in cloud storage yet. + // The tenant cannot be accessed anymore for any reason, but graceful shutdown. + //Stopping, +} + +impl fmt::Display for TenantState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TenantState::CloudOnly => f.write_str("CloudOnly"), + TenantState::Downloading => f.write_str("Downloading"), + TenantState::Active => f.write_str("Active"), + } + } +} + +fn access_tenants() -> MutexGuard<'static, HashMap> { + TENANTS.lock().unwrap() +} + struct TenantHandleEntry { checkpointer_handle: Option>, gc_handle: Option>, @@ -41,17 +75,25 @@ lazy_static! { static SHUTDOWN_REQUESTED: AtomicBool = AtomicBool::new(false); pub fn init(conf: &'static PageServerConf) { - let mut m = access_repository(); for dir_entry in fs::read_dir(conf.tenants_path()).unwrap() { let tenantid = ZTenantId::from_str(dir_entry.unwrap().file_name().to_str().unwrap()).unwrap(); - let repo = init_repo(conf, tenantid); + + { + let mut m = access_tenants(); + let tenant = Tenant { + state: TenantState::CloudOnly, + repo: None, + }; + m.insert(tenantid, tenant); + } + + init_repo(conf, tenantid); info!("initialized storage for tenant: {}", &tenantid); - m.insert(tenantid, repo); } } -fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc { +fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) { // Set up a WAL redo manager, for applying WAL records. let walredo_mgr = PostgresRedoManager::new(conf, tenant_id); @@ -74,7 +116,10 @@ fn init_repo(conf: &'static PageServerConf, tenant_id: ZTenantId) -> Arc init_timeline(o.get().as_ref(), timeline_id), - Entry::Vacant(v) => { - log::info!("New repo initialized"); - let new_repo = init_repo(conf, tenant_id); - init_timeline(new_repo.as_ref(), timeline_id); - v.insert(new_repo); + + { + let mut m = access_tenants(); + let mut tenant = m.get_mut(&tenant_id).unwrap(); + tenant.state = TenantState::Downloading; + match &tenant.repo { + Some(repo) => init_timeline(repo.as_ref(), timeline_id), + None => { + log::info!("Initialize new repo"); + } } } + + // init repo updates Tenant state + init_repo(conf, tenant_id); + let new_repo = get_repository_for_tenant(tenant_id).unwrap(); + init_timeline(new_repo.as_ref(), timeline_id); } fn init_timeline(repo: &dyn Repository, timeline_id: ZTimelineId) { @@ -125,8 +178,8 @@ pub fn stop_tenant_threads(tenantid: ZTenantId) { pub fn shutdown_all_tenants() -> Result<()> { SHUTDOWN_REQUESTED.swap(true, Ordering::Relaxed); - let tenants = list_tenants()?; - for tenantid in tenants { + let tenantids = list_tenantids()?; + for tenantid in tenantids { stop_tenant_threads(tenantid); let repo = get_repository_for_tenant(tenantid)?; debug!("shutdown tenant {}", tenantid); @@ -140,25 +193,40 @@ pub fn create_repository_for_tenant( conf: &'static PageServerConf, tenantid: ZTenantId, ) -> Result<()> { - let mut m = access_repository(); - - // First check that the tenant doesn't exist already - if m.get(&tenantid).is_some() { - bail!("tenant {} already exists", tenantid); + { + let mut m = access_tenants(); + // First check that the tenant doesn't exist already + if m.get(&tenantid).is_some() { + bail!("tenant {} already exists", tenantid); + } + let tenant = Tenant { + state: TenantState::CloudOnly, + repo: None, + }; + m.insert(tenantid, tenant); } + let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenantid)); let repo = branches::create_repo(conf, tenantid, wal_redo_manager)?; - m.insert(tenantid, repo); + let mut m = access_tenants(); + let tenant = m.get_mut(&tenantid).unwrap(); + tenant.repo = Some(repo); + tenant.state = TenantState::Active; Ok(()) } pub fn get_repository_for_tenant(tenantid: ZTenantId) -> Result> { - access_repository() + let m = access_tenants(); + let tenant = m .get(&tenantid) - .map(Arc::clone) - .ok_or_else(|| anyhow!("repository not found for tenant name {}", tenantid)) + .ok_or_else(|| anyhow!("Tenant not found for tenant {}", tenantid)); + + match &tenant.unwrap().repo { + Some(repo) => Ok(Arc::clone(repo)), + None => anyhow::bail!("Repository for tenant {} is not yet valid", tenantid), + } } pub fn get_timeline_for_tenant( @@ -170,12 +238,11 @@ pub fn get_timeline_for_tenant( .with_context(|| format!("cannot fetch timeline {}", timelineid)) } -fn list_tenants() -> Result> { - let o = &mut REPOSITORY.lock().unwrap(); - - o.iter() - .map(|tenant| { - let (tenantid, _) = tenant; +fn list_tenantids() -> Result> { + let m = access_tenants(); + m.iter() + .map(|v| { + let (tenantid, _) = v; Ok(*tenantid) }) .collect() diff --git a/postgres_ffi/src/xlog_utils.rs b/postgres_ffi/src/xlog_utils.rs index 7826630a78..7f88de4c85 100644 --- a/postgres_ffi/src/xlog_utils.rs +++ b/postgres_ffi/src/xlog_utils.rs @@ -9,7 +9,6 @@ use crate::pg_constants; use crate::CheckPoint; -use crate::ControlFileData; use crate::FullTransactionId; use crate::XLogLongPageHeaderData; use crate::XLogPageHeaderData; @@ -18,8 +17,8 @@ use crate::XLOG_PAGE_MAGIC; use anyhow::{bail, Result}; use byteorder::{ByteOrder, LittleEndian}; +use bytes::BytesMut; use bytes::{Buf, Bytes}; -use bytes::{BufMut, BytesMut}; use crc32c::*; use log::*; use std::cmp::max; @@ -410,27 +409,25 @@ impl CheckPoint { } // -// Generate new WAL segment with single XLOG_CHECKPOINT_SHUTDOWN record. +// Generate new, empty WAL segment. // We need this segment to start compute node. -// In order to minimize changes in Postgres core, we prefer to -// provide WAL segment from which is can extract checkpoint record in standard way, -// rather then implement some alternative mechanism. // -pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes { +pub fn generate_wal_segment(segno: u64, system_id: u64) -> Bytes { let mut seg_buf = BytesMut::with_capacity(pg_constants::WAL_SEGMENT_SIZE as usize); + let pageaddr = XLogSegNoOffsetToRecPtr(segno, 0, pg_constants::WAL_SEGMENT_SIZE); let hdr = XLogLongPageHeaderData { std: { XLogPageHeaderData { xlp_magic: XLOG_PAGE_MAGIC as u16, xlp_info: pg_constants::XLP_LONG_HEADER, xlp_tli: 1, // FIXME: always use Postgres timeline 1 - xlp_pageaddr: pg_control.checkPoint - XLOG_SIZE_OF_XLOG_LONG_PHD as u64, + xlp_pageaddr: pageaddr, xlp_rem_len: 0, ..Default::default() // Put 0 in padding fields. } }, - xlp_sysid: pg_control.system_identifier, + xlp_sysid: system_id, xlp_seg_size: pg_constants::WAL_SEGMENT_SIZE as u32, xlp_xlog_blcksz: XLOG_BLCKSZ as u32, }; @@ -438,36 +435,6 @@ pub fn generate_wal_segment(pg_control: &ControlFileData) -> Bytes { let hdr_bytes = hdr.encode(); seg_buf.extend_from_slice(&hdr_bytes); - let rec_hdr = XLogRecord { - xl_tot_len: (XLOG_SIZE_OF_XLOG_RECORD - + SIZE_OF_XLOG_RECORD_DATA_HEADER_SHORT - + SIZEOF_CHECKPOINT) as u32, - xl_xid: 0, //0 is for InvalidTransactionId - xl_prev: 0, - xl_info: pg_constants::XLOG_CHECKPOINT_SHUTDOWN, - xl_rmid: pg_constants::RM_XLOG_ID, - xl_crc: 0, - ..Default::default() // Put 0 in padding fields. - }; - - let mut rec_shord_hdr_bytes = BytesMut::new(); - rec_shord_hdr_bytes.put_u8(pg_constants::XLR_BLOCK_ID_DATA_SHORT); - rec_shord_hdr_bytes.put_u8(SIZEOF_CHECKPOINT as u8); - - let rec_bytes = rec_hdr.encode(); - let checkpoint_bytes = pg_control.checkPointCopy.encode(); - - //calculate record checksum - let mut crc = 0; - crc = crc32c_append(crc, &rec_shord_hdr_bytes[..]); - crc = crc32c_append(crc, &checkpoint_bytes[..]); - crc = crc32c_append(crc, &rec_bytes[0..XLOG_RECORD_CRC_OFFS]); - - seg_buf.extend_from_slice(&rec_bytes[0..XLOG_RECORD_CRC_OFFS]); - seg_buf.put_u32_le(crc); - seg_buf.extend_from_slice(&rec_shord_hdr_bytes); - seg_buf.extend_from_slice(&checkpoint_bytes); - //zero out the rest of the file seg_buf.resize(pg_constants::WAL_SEGMENT_SIZE, 0); seg_buf.freeze() diff --git a/proxy/src/cplane_api.rs b/proxy/src/cplane_api.rs index 2b3b8a64bd..3435dca7b2 100644 --- a/proxy/src/cplane_api.rs +++ b/proxy/src/cplane_api.rs @@ -12,7 +12,7 @@ pub struct DatabaseInfo { pub port: u16, pub dbname: String, pub user: String, - pub password: String, + pub password: Option, } impl DatabaseInfo { @@ -24,12 +24,23 @@ impl DatabaseInfo { .next() .ok_or_else(|| anyhow::Error::msg("cannot resolve at least one SocketAddr")) } +} - pub fn conn_string(&self) -> String { - format!( - "dbname={} user={} password={}", - self.dbname, self.user, self.password - ) +impl From for tokio_postgres::Config { + fn from(db_info: DatabaseInfo) -> Self { + let mut config = tokio_postgres::Config::new(); + + config + .host(&db_info.host) + .port(db_info.port) + .dbname(&db_info.dbname) + .user(&db_info.user); + + if let Some(password) = db_info.password { + config.password(password); + } + + config } } diff --git a/proxy/src/main.rs b/proxy/src/main.rs index bbabab52f7..c183785635 100644 --- a/proxy/src/main.rs +++ b/proxy/src/main.rs @@ -145,18 +145,18 @@ fn main() -> anyhow::Result<()> { println!("Starting mgmt on {}", state.conf.mgmt_address); let mgmt_listener = TcpListener::bind(state.conf.mgmt_address)?; - let threads = vec![ + let threads = [ // Spawn a thread to listen for connections. It will spawn further threads // for each connection. thread::Builder::new() - .name("Proxy thread".into()) + .name("Listener thread".into()) .spawn(move || proxy::thread_main(state, pageserver_listener))?, thread::Builder::new() .name("Mgmt thread".into()) .spawn(move || mgmt::thread_main(state, mgmt_listener))?, ]; - for t in threads.into_iter() { + for t in threads { t.join().unwrap()?; } diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs index 61a742cf38..1debabae9c 100644 --- a/proxy/src/proxy.rs +++ b/proxy/src/proxy.rs @@ -6,7 +6,6 @@ use anyhow::bail; use tokio_postgres::NoTls; use rand::Rng; -use std::io::Write; use std::{io, sync::mpsc::channel, thread}; use zenith_utils::postgres_backend::Stream; use zenith_utils::postgres_backend::{PostgresBackend, ProtoState}; @@ -28,11 +27,13 @@ pub fn thread_main( println!("accepted connection from {}", peer_addr); socket.set_nodelay(true).unwrap(); - thread::spawn(move || { - if let Err(err) = proxy_conn_main(state, socket) { - println!("error: {}", err); - } - }); + thread::Builder::new() + .name("Proxy thread".into()) + .spawn(move || { + if let Err(err) = proxy_conn_main(state, socket) { + println!("error: {}", err); + } + })?; } } @@ -158,6 +159,7 @@ impl ProxyConnection { fn handle_existing_user(&mut self) -> anyhow::Result { // ask password rand::thread_rng().fill(&mut self.md5_salt); + self.pgb .write_message(&BeMessage::AuthenticationMD5Password(&self.md5_salt))?; self.pgb.state = ProtoState::Authentication; // XXX @@ -250,51 +252,68 @@ databases without opening the browser. /// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message async fn connect_to_db(db_info: DatabaseInfo) -> anyhow::Result { let mut socket = tokio::net::TcpStream::connect(db_info.socket_addr()?).await?; - let config = db_info.conn_string().parse::()?; + let config = tokio_postgres::Config::from(db_info); let _ = config.connect_raw(&mut socket, NoTls).await?; Ok(socket) } /// Concurrently proxy both directions of the client and server connections fn proxy( - client_read: ReadStream, - client_write: WriteStream, - server_read: ReadStream, - server_write: WriteStream, + (client_read, client_write): (ReadStream, WriteStream), + (server_read, server_write): (ReadStream, WriteStream), ) -> anyhow::Result<()> { - fn do_proxy(mut reader: ReadStream, mut writer: WriteStream) -> io::Result<()> { - std::io::copy(&mut reader, &mut writer)?; - writer.flush()?; - writer.shutdown(std::net::Shutdown::Both) + fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result { + /// FlushWriter will make sure that every message is sent as soon as possible + struct FlushWriter(W); + + impl io::Write for FlushWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + // `std::io::copy` is guaranteed to exit if we return an error, + // so we can afford to lose `res` in case `flush` fails + let res = self.0.write(buf); + if res.is_ok() { + self.0.flush()?; + } + res + } + + fn flush(&mut self) -> io::Result<()> { + self.0.flush() + } + } + + let res = std::io::copy(&mut reader, &mut FlushWriter(&mut writer)); + writer.shutdown(std::net::Shutdown::Both)?; + res } let client_to_server_jh = thread::spawn(move || do_proxy(client_read, server_write)); - let res1 = do_proxy(server_read, client_write); - let res2 = client_to_server_jh.join().unwrap(); - res1?; - res2?; + do_proxy(server_read, client_write)?; + client_to_server_jh.join().unwrap()?; Ok(()) } /// Proxy a client connection to a postgres database fn proxy_pass(pgb: PostgresBackend, db_info: DatabaseInfo) -> anyhow::Result<()> { - let runtime = tokio::runtime::Builder::new_current_thread() - .enable_all() - .build()?; - let db_stream = runtime.block_on(connect_to_db(db_info))?; - let db_stream = db_stream.into_std()?; - db_stream.set_nonblocking(false)?; + let db_stream = { + // We'll get rid of this once migration to async is complete + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()?; - let db_stream = zenith_utils::sock_split::BidiStream::from_tcp(db_stream); - let (db_read, db_write) = db_stream.split(); + let stream = runtime.block_on(connect_to_db(db_info))?.into_std()?; + stream.set_nonblocking(false)?; + stream + }; - let stream = match pgb.into_stream() { + let db = zenith_utils::sock_split::BidiStream::from_tcp(db_stream); + + let client = match pgb.into_stream() { Stream::Bidirectional(bidi_stream) => bidi_stream, _ => bail!("invalid stream"), }; - let (client_read, client_write) = stream.split(); - proxy(client_read, client_write, db_read, db_write) + proxy(client.split(), db.split()) } diff --git a/test_runner/Pipfile b/test_runner/Pipfile index f5ff0d7e2b..a98acc5718 100644 --- a/test_runner/Pipfile +++ b/test_runner/Pipfile @@ -11,11 +11,14 @@ pyjwt = {extras = ["crypto"], version = "*"} requests = "*" pytest-xdist = "*" asyncpg = "*" +cached-property = "*" [dev-packages] -yapf = "*" flake8 = "*" mypy = "*" +# Behavior may change slightly between versions. These are run continuously, +# so we pin exact versions to avoid suprising breaks. Update if comfortable. +yapf = "==0.31.0" [requires] # we need at least 3.6, but pipenv doesn't allow to say this directly diff --git a/test_runner/Pipfile.lock b/test_runner/Pipfile.lock index 3c68c0ff3a..75fc17ffad 100644 --- a/test_runner/Pipfile.lock +++ b/test_runner/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "3cdc048691824d0b93912b6b78a0aa01dc98f278212c1badb0cc2edbd2103c3a" + "sha256": "3645ae8d2dcf55bd2a54963c44cfeedf577f3b289d1077365214a80a7f36e643" }, "pipfile-spec": 6, "requires": { @@ -43,94 +43,108 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", "version": "==21.2.0" }, + "cached-property": { + "hashes": [ + "sha256:9fa5755838eecbb2d234c3aa390bd80fbd3ac6b6869109bfc1b499f7bd89a130", + "sha256:df4f613cf7ad9a588cc381aaf4a512d26265ecebd5eb9e1ba12f1319eb85a6a0" + ], + "index": "pypi", + "version": "==1.5.2" + }, "certifi": { "hashes": [ - "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee", - "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8" + "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872", + "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569" ], - "version": "==2021.5.30" + "version": "==2021.10.8" }, "cffi": { "hashes": [ - "sha256:06c54a68935738d206570b20da5ef2b6b6d92b38ef3ec45c5422c0ebaf338d4d", - "sha256:0c0591bee64e438883b0c92a7bed78f6290d40bf02e54c5bf0978eaf36061771", - "sha256:19ca0dbdeda3b2615421d54bef8985f72af6e0c47082a8d26122adac81a95872", - "sha256:22b9c3c320171c108e903d61a3723b51e37aaa8c81255b5e7ce102775bd01e2c", - "sha256:26bb2549b72708c833f5abe62b756176022a7b9a7f689b571e74c8478ead51dc", - "sha256:33791e8a2dc2953f28b8d8d300dde42dd929ac28f974c4b4c6272cb2955cb762", - "sha256:3c8d896becff2fa653dc4438b54a5a25a971d1f4110b32bd3068db3722c80202", - "sha256:4373612d59c404baeb7cbd788a18b2b2a8331abcc84c3ba40051fcd18b17a4d5", - "sha256:487d63e1454627c8e47dd230025780e91869cfba4c753a74fda196a1f6ad6548", - "sha256:48916e459c54c4a70e52745639f1db524542140433599e13911b2f329834276a", - "sha256:4922cd707b25e623b902c86188aca466d3620892db76c0bdd7b99a3d5e61d35f", - "sha256:55af55e32ae468e9946f741a5d51f9896da6b9bf0bbdd326843fec05c730eb20", - "sha256:57e555a9feb4a8460415f1aac331a2dc833b1115284f7ded7278b54afc5bd218", - "sha256:5d4b68e216fc65e9fe4f524c177b54964af043dde734807586cf5435af84045c", - "sha256:64fda793737bc4037521d4899be780534b9aea552eb673b9833b01f945904c2e", - "sha256:6d6169cb3c6c2ad50db5b868db6491a790300ade1ed5d1da29289d73bbe40b56", - "sha256:7bcac9a2b4fdbed2c16fa5681356d7121ecabf041f18d97ed5b8e0dd38a80224", - "sha256:80b06212075346b5546b0417b9f2bf467fea3bfe7352f781ffc05a8ab24ba14a", - "sha256:818014c754cd3dba7229c0f5884396264d51ffb87ec86e927ef0be140bfdb0d2", - "sha256:8eb687582ed7cd8c4bdbff3df6c0da443eb89c3c72e6e5dcdd9c81729712791a", - "sha256:99f27fefe34c37ba9875f224a8f36e31d744d8083e00f520f133cab79ad5e819", - "sha256:9f3e33c28cd39d1b655ed1ba7247133b6f7fc16fa16887b120c0c670e35ce346", - "sha256:a8661b2ce9694ca01c529bfa204dbb144b275a31685a075ce123f12331be790b", - "sha256:a9da7010cec5a12193d1af9872a00888f396aba3dc79186604a09ea3ee7c029e", - "sha256:aedb15f0a5a5949ecb129a82b72b19df97bbbca024081ed2ef88bd5c0a610534", - "sha256:b315d709717a99f4b27b59b021e6207c64620790ca3e0bde636a6c7f14618abb", - "sha256:ba6f2b3f452e150945d58f4badd92310449876c4c954836cfb1803bdd7b422f0", - "sha256:c33d18eb6e6bc36f09d793c0dc58b0211fccc6ae5149b808da4a62660678b156", - "sha256:c9a875ce9d7fe32887784274dd533c57909b7b1dcadcc128a2ac21331a9765dd", - "sha256:c9e005e9bd57bc987764c32a1bee4364c44fdc11a3cc20a40b93b444984f2b87", - "sha256:d2ad4d668a5c0645d281dcd17aff2be3212bc109b33814bbb15c4939f44181cc", - "sha256:d950695ae4381ecd856bcaf2b1e866720e4ab9a1498cba61c602e56630ca7195", - "sha256:e22dcb48709fc51a7b58a927391b23ab37eb3737a98ac4338e2448bef8559b33", - "sha256:e8c6a99be100371dbb046880e7a282152aa5d6127ae01783e37662ef73850d8f", - "sha256:e9dc245e3ac69c92ee4c167fbdd7428ec1956d4e754223124991ef29eb57a09d", - "sha256:eb687a11f0a7a1839719edd80f41e459cc5366857ecbed383ff376c4e3cc6afd", - "sha256:eb9e2a346c5238a30a746893f23a9535e700f8192a68c07c0258e7ece6ff3728", - "sha256:ed38b924ce794e505647f7c331b22a693bee1538fdf46b0222c4717b42f744e7", - "sha256:f0010c6f9d1a4011e429109fda55a225921e3206e7f62a0c22a35344bfd13cca", - "sha256:f0c5d1acbfca6ebdd6b1e3eded8d261affb6ddcf2186205518f1428b8569bb99", - "sha256:f10afb1004f102c7868ebfe91c28f4a712227fe4cb24974350ace1f90e1febbf", - "sha256:f174135f5609428cc6e1b9090f9268f5c8935fddb1b25ccb8255a2d50de6789e", - "sha256:f3ebe6e73c319340830a9b2825d32eb6d8475c1dac020b4f0aa774ee3b898d1c", - "sha256:f627688813d0a4140153ff532537fbe4afea5a3dffce1f9deb7f91f848a832b5", - "sha256:fd4305f86f53dfd8cd3522269ed7fc34856a8ee3709a5e28b2836b2db9d4cd69" + "sha256:00c878c90cb53ccfaae6b8bc18ad05d2036553e6d9d1d9dbcf323bbe83854ca3", + "sha256:0104fb5ae2391d46a4cb082abdd5c69ea4eab79d8d44eaaf79f1b1fd806ee4c2", + "sha256:06c48159c1abed75c2e721b1715c379fa3200c7784271b3c46df01383b593636", + "sha256:0808014eb713677ec1292301ea4c81ad277b6cdf2fdd90fd540af98c0b101d20", + "sha256:10dffb601ccfb65262a27233ac273d552ddc4d8ae1bf93b21c94b8511bffe728", + "sha256:14cd121ea63ecdae71efa69c15c5543a4b5fbcd0bbe2aad864baca0063cecf27", + "sha256:17771976e82e9f94976180f76468546834d22a7cc404b17c22df2a2c81db0c66", + "sha256:181dee03b1170ff1969489acf1c26533710231c58f95534e3edac87fff06c443", + "sha256:23cfe892bd5dd8941608f93348c0737e369e51c100d03718f108bf1add7bd6d0", + "sha256:263cc3d821c4ab2213cbe8cd8b355a7f72a8324577dc865ef98487c1aeee2bc7", + "sha256:2756c88cbb94231c7a147402476be2c4df2f6078099a6f4a480d239a8817ae39", + "sha256:27c219baf94952ae9d50ec19651a687b826792055353d07648a5695413e0c605", + "sha256:2a23af14f408d53d5e6cd4e3d9a24ff9e05906ad574822a10563efcef137979a", + "sha256:31fb708d9d7c3f49a60f04cf5b119aeefe5644daba1cd2a0fe389b674fd1de37", + "sha256:3415c89f9204ee60cd09b235810be700e993e343a408693e80ce7f6a40108029", + "sha256:3773c4d81e6e818df2efbc7dd77325ca0dcb688116050fb2b3011218eda36139", + "sha256:3b96a311ac60a3f6be21d2572e46ce67f09abcf4d09344c49274eb9e0bf345fc", + "sha256:3f7d084648d77af029acb79a0ff49a0ad7e9d09057a9bf46596dac9514dc07df", + "sha256:41d45de54cd277a7878919867c0f08b0cf817605e4eb94093e7516505d3c8d14", + "sha256:4238e6dab5d6a8ba812de994bbb0a79bddbdf80994e4ce802b6f6f3142fcc880", + "sha256:45db3a33139e9c8f7c09234b5784a5e33d31fd6907800b316decad50af323ff2", + "sha256:45e8636704eacc432a206ac7345a5d3d2c62d95a507ec70d62f23cd91770482a", + "sha256:4958391dbd6249d7ad855b9ca88fae690783a6be9e86df65865058ed81fc860e", + "sha256:4a306fa632e8f0928956a41fa8e1d6243c71e7eb59ffbd165fc0b41e316b2474", + "sha256:57e9ac9ccc3101fac9d6014fba037473e4358ef4e89f8e181f8951a2c0162024", + "sha256:59888172256cac5629e60e72e86598027aca6bf01fa2465bdb676d37636573e8", + "sha256:5e069f72d497312b24fcc02073d70cb989045d1c91cbd53979366077959933e0", + "sha256:64d4ec9f448dfe041705426000cc13e34e6e5bb13736e9fd62e34a0b0c41566e", + "sha256:6dc2737a3674b3e344847c8686cf29e500584ccad76204efea14f451d4cc669a", + "sha256:74fdfdbfdc48d3f47148976f49fab3251e550a8720bebc99bf1483f5bfb5db3e", + "sha256:75e4024375654472cc27e91cbe9eaa08567f7fbdf822638be2814ce059f58032", + "sha256:786902fb9ba7433aae840e0ed609f45c7bcd4e225ebb9c753aa39725bb3e6ad6", + "sha256:8b6c2ea03845c9f501ed1313e78de148cd3f6cad741a75d43a29b43da27f2e1e", + "sha256:91d77d2a782be4274da750752bb1650a97bfd8f291022b379bb8e01c66b4e96b", + "sha256:91ec59c33514b7c7559a6acda53bbfe1b283949c34fe7440bcf917f96ac0723e", + "sha256:920f0d66a896c2d99f0adbb391f990a84091179542c205fa53ce5787aff87954", + "sha256:a5263e363c27b653a90078143adb3d076c1a748ec9ecc78ea2fb916f9b861962", + "sha256:abb9a20a72ac4e0fdb50dae135ba5e77880518e742077ced47eb1499e29a443c", + "sha256:c2051981a968d7de9dd2d7b87bcb9c939c74a34626a6e2f8181455dd49ed69e4", + "sha256:c21c9e3896c23007803a875460fb786118f0cdd4434359577ea25eb556e34c55", + "sha256:c2502a1a03b6312837279c8c1bd3ebedf6c12c4228ddbad40912d671ccc8a962", + "sha256:d4d692a89c5cf08a8557fdeb329b82e7bf609aadfaed6c0d79f5a449a3c7c023", + "sha256:da5db4e883f1ce37f55c667e5c0de439df76ac4cb55964655906306918e7363c", + "sha256:e7022a66d9b55e93e1a845d8c9eba2a1bebd4966cd8bfc25d9cd07d515b33fa6", + "sha256:ef1f279350da2c586a69d32fc8733092fd32cc8ac95139a00377841f59a3f8d8", + "sha256:f54a64f8b0c8ff0b64d18aa76675262e1700f3995182267998c31ae974fbc382", + "sha256:f5c7150ad32ba43a07c4479f40241756145a1f03b43480e058cfd862bf5041c7", + "sha256:f6f824dc3bce0edab5f427efcfb1d63ee75b6fcb7282900ccaf925be84efb0fc", + "sha256:fd8a250edc26254fe5b33be00402e6d287f562b6a5b2152dec302fa15bb3e997", + "sha256:ffaa5c925128e29efbde7301d8ecaf35c8c60ffbcd6a1ffd3a552177c8e5e796" ], - "version": "==1.14.6" + "version": "==1.15.0" }, "charset-normalizer": { "hashes": [ - "sha256:5d209c0a931f215cee683b6445e2d77677e7e75e159f78def0db09d68fafcaa6", - "sha256:5ec46d183433dcbd0ab716f2d7f29d8dee50505b3fdb40c6b985c7c4f5a3591f" + "sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0", + "sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b" ], "markers": "python_version >= '3'", - "version": "==2.0.6" + "version": "==2.0.7" }, "cryptography": { "hashes": [ - "sha256:0a7dcbcd3f1913f664aca35d47c1331fce738d44ec34b7be8b9d332151b0b01e", - "sha256:1eb7bb0df6f6f583dd8e054689def236255161ebbcf62b226454ab9ec663746b", - "sha256:21ca464b3a4b8d8e86ba0ee5045e103a1fcfac3b39319727bc0fc58c09c6aff7", - "sha256:34dae04a0dce5730d8eb7894eab617d8a70d0c97da76b905de9efb7128ad7085", - "sha256:3520667fda779eb788ea00080124875be18f2d8f0848ec00733c0ec3bb8219fc", - "sha256:3c4129fc3fdc0fa8e40861b5ac0c673315b3c902bbdc05fc176764815b43dd1d", - "sha256:3fa3a7ccf96e826affdf1a0a9432be74dc73423125c8f96a909e3835a5ef194a", - "sha256:5b0fbfae7ff7febdb74b574055c7466da334a5371f253732d7e2e7525d570498", - "sha256:695104a9223a7239d155d7627ad912953b540929ef97ae0c34c7b8bf30857e89", - "sha256:8695456444f277af73a4877db9fc979849cd3ee74c198d04fc0776ebc3db52b9", - "sha256:94cc5ed4ceaefcbe5bf38c8fba6a21fc1d365bb8fb826ea1688e3370b2e24a1c", - "sha256:94fff993ee9bc1b2440d3b7243d488c6a3d9724cc2b09cdb297f6a886d040ef7", - "sha256:9965c46c674ba8cc572bc09a03f4c649292ee73e1b683adb1ce81e82e9a6a0fb", - "sha256:a00cf305f07b26c351d8d4e1af84ad7501eca8a342dedf24a7acb0e7b7406e14", - "sha256:a305600e7a6b7b855cd798e00278161b681ad6e9b7eca94c721d5f588ab212af", - "sha256:cd65b60cfe004790c795cc35f272e41a3df4631e2fb6b35aa7ac6ef2859d554e", - "sha256:d2a6e5ef66503da51d2110edf6c403dc6b494cc0082f85db12f54e9c5d4c3ec5", - "sha256:d9ec0e67a14f9d1d48dd87a2531009a9b251c02ea42851c060b25c782516ff06", - "sha256:f44d141b8c4ea5eb4dbc9b3ad992d45580c1d22bf5e24363f2fbf50c2d7ae8a7" + "sha256:07bb7fbfb5de0980590ddfc7f13081520def06dc9ed214000ad4372fb4e3c7f6", + "sha256:18d90f4711bf63e2fb21e8c8e51ed8189438e6b35a6d996201ebd98a26abbbe6", + "sha256:1ed82abf16df40a60942a8c211251ae72858b25b7421ce2497c2eb7a1cee817c", + "sha256:22a38e96118a4ce3b97509443feace1d1011d0571fae81fc3ad35f25ba3ea999", + "sha256:2d69645f535f4b2c722cfb07a8eab916265545b3475fdb34e0be2f4ee8b0b15e", + "sha256:4a2d0e0acc20ede0f06ef7aa58546eee96d2592c00f450c9acb89c5879b61992", + "sha256:54b2605e5475944e2213258e0ab8696f4f357a31371e538ef21e8d61c843c28d", + "sha256:7075b304cd567694dc692ffc9747f3e9cb393cc4aa4fb7b9f3abd6f5c4e43588", + "sha256:7b7ceeff114c31f285528ba8b390d3e9cfa2da17b56f11d366769a807f17cbaa", + "sha256:7eba2cebca600a7806b893cb1d541a6e910afa87e97acf2021a22b32da1df52d", + "sha256:928185a6d1ccdb816e883f56ebe92e975a262d31cc536429041921f8cb5a62fd", + "sha256:9933f28f70d0517686bd7de36166dda42094eac49415459d9bdf5e7df3e0086d", + "sha256:a688ebcd08250eab5bb5bca318cc05a8c66de5e4171a65ca51db6bd753ff8953", + "sha256:abb5a361d2585bb95012a19ed9b2c8f412c5d723a9836418fab7aaa0243e67d2", + "sha256:c10c797ac89c746e488d2ee92bd4abd593615694ee17b2500578b63cad6b93a8", + "sha256:ced40344e811d6abba00295ced98c01aecf0c2de39481792d87af4fa58b7b4d6", + "sha256:d57e0cdc1b44b6cdf8af1d01807db06886f10177469312fbde8f44ccbb284bc9", + "sha256:d99915d6ab265c22873f1b4d6ea5ef462ef797b4140be4c9d8b179915e0985c6", + "sha256:eb80e8a1f91e4b7ef8b33041591e6d89b2b8e122d787e87eeb2b08da71bb16ad", + "sha256:ebeddd119f526bcf323a89f853afb12e225902a24d29b55fe18dd6fcb2838a76" ], - "version": "==3.4.8" + "version": "==35.0.0" }, "execnet": { "hashes": [ @@ -142,11 +156,11 @@ }, "idna": { "hashes": [ - "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", - "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" ], "markers": "python_version >= '3'", - "version": "==3.2" + "version": "==3.3" }, "iniconfig": { "hashes": [ @@ -207,11 +221,11 @@ "crypto" ], "hashes": [ - "sha256:934d73fbba91b0483d3857d1aff50e96b2a892384ee2c17417ed3203f173fca1", - "sha256:fba44e7898bbca160a2b2b501f492824fc8382485d3a6f11ba5d0c1937ce6130" + "sha256:b888b4d56f06f6dcd777210c334e69c737be74755d3e5e9ee3fe67dc18a0ee41", + "sha256:e0c4bb8d9f0af0c7f5b1ec4c5036309617d03d56932877f2f7a0beeb5318322f" ], "index": "pypi", - "version": "==2.1.0" + "version": "==2.3.0" }, "pyparsing": { "hashes": [ @@ -272,21 +286,21 @@ }, "urllib3": { "hashes": [ - "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", - "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" + "sha256:4987c65554f7a2dbf30c18fd48778ef124af6fab771a377103da0585e2336ece", + "sha256:c4fdf4019605b6e5423637e01bc9fe4daef873709a7973e195ceba0a62bbc844" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", - "version": "==1.26.6" + "version": "==1.26.7" } }, "develop": { "flake8": { "hashes": [ - "sha256:07528381786f2a6237b061f6e96610a4167b226cb926e2aa2b6b1d78057c576b", - "sha256:bf8fd333346d844f616e8d47905ef3a3384edae6b4e9beb0c5101e25e3110907" + "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d", + "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d" ], "index": "pypi", - "version": "==3.9.2" + "version": "==4.0.1" }, "mccabe": { "hashes": [ @@ -333,19 +347,19 @@ }, "pycodestyle": { "hashes": [ - "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068", - "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef" + "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20", + "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.7.0" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==2.8.0" }, "pyflakes": { "hashes": [ - "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3", - "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db" + "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c", + "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.3.1" + "version": "==2.4.0" }, "toml": { "hashes": [ diff --git a/test_runner/README.md b/test_runner/README.md index 62a95350aa..cdbf7e988d 100644 --- a/test_runner/README.md +++ b/test_runner/README.md @@ -53,8 +53,8 @@ Useful environment variables: should go. `TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests. -Let stdout and stderr go to the terminal instead of capturing them: -`pytest -s ...` +Let stdout, stderr and `INFO` log messages go to the terminal instead of capturing them: +`pytest -s --log-cli-level=INFO ...` (Note many tests capture subprocess outputs separately, so this may not show much.) @@ -95,11 +95,13 @@ Python destructors, e.g. `__del__()` aren't recommended for cleanup. ### Code quality +We force code formatting via yapf: + +1. Install `yapf` and other tools (`flake8`, `mypy`) with `pipenv install --dev`. +1. Reformat all your code by running `pipenv run yapf -ri .` in the `test_runner/` directory. + Before submitting a patch, please consider: * Writing a couple of docstrings to clarify the reasoning behind a new test. * Running `flake8` (or a linter of your choice, e.g. `pycodestyle`) and fixing possible defects, if any. -* Formatting the code with `yapf -r -i .` (TODO: implement an opt-in pre-commit hook for that). * (Optional) Typechecking the code with `mypy .`. Currently this mostly affects `fixtures/zenith_fixtures.py`. - -The tools can be installed with `pipenv install --dev`. diff --git a/test_runner/batch_others/test_auth.py b/test_runner/batch_others/test_auth.py index 614883d4b8..9fe7567902 100644 --- a/test_runner/batch_others/test_auth.py +++ b/test_runner/batch_others/test_auth.py @@ -1,4 +1,3 @@ - from contextlib import closing from typing import Iterator from uuid import uuid4 @@ -6,7 +5,6 @@ import psycopg2 from fixtures.zenith_fixtures import PortDistributor, Postgres, ZenithCli, ZenithPageserver, PgBin import pytest - pytest_plugins = ("fixtures.zenith_fixtures") @@ -35,7 +33,9 @@ def test_pageserver_auth(pageserver_auth_enabled: ZenithPageserver): ps.safe_psql(f"tenant_create {uuid4().hex}", password=management_token) # fail to create tenant using tenant token - with pytest.raises(psycopg2.DatabaseError, match='Attempt to access management api with tenant scope. Permission denied'): + with pytest.raises( + psycopg2.DatabaseError, + match='Attempt to access management api with tenant scope. Permission denied'): ps.safe_psql(f"tenant_create {uuid4().hex}", password=tenant_token) @@ -60,14 +60,14 @@ def test_compute_auth_to_pageserver( wa_factory.start_n_new(3, management_token) with Postgres( - zenith_cli=zenith_cli, - repo_dir=repo_dir, - pg_bin=pg_bin, - tenant_id=ps.initial_tenant, - port=port_distributor.get_port(), + zenith_cli=zenith_cli, + repo_dir=repo_dir, + pg_bin=pg_bin, + tenant_id=ps.initial_tenant, + port=port_distributor.get_port(), ).create_start( - branch, - wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None, + branch, + wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None, ) as pg: with closing(pg.connect()) as conn: with conn.cursor() as cur: diff --git a/test_runner/batch_others/test_branch_behind.py b/test_runner/batch_others/test_branch_behind.py index 47c2f0b2f9..887671bf99 100644 --- a/test_runner/batch_others/test_branch_behind.py +++ b/test_runner/batch_others/test_branch_behind.py @@ -86,7 +86,10 @@ def test_branch_behind(zenith_cli, pageserver: ZenithPageserver, postgres: Postg assert cur.fetchone() == (1, ) # branch at pre-initdb lsn + # + # FIXME: This works currently, but probably shouldn't be allowed try: zenith_cli.run(["branch", "test_branch_preinitdb", "test_branch_behind@0/42"]) + # FIXME: assert false, "branch with invalid LSN should have failed" except subprocess.CalledProcessError: log.info("Branch creation with pre-initdb LSN failed (as expected)") diff --git a/test_runner/batch_others/test_clog_truncate.py b/test_runner/batch_others/test_clog_truncate.py index 8ad3c22732..a70e14d9a9 100644 --- a/test_runner/batch_others/test_clog_truncate.py +++ b/test_runner/batch_others/test_clog_truncate.py @@ -18,9 +18,12 @@ def test_clog_truncate(zenith_cli, pageserver: ZenithPageserver, postgres: Postg # set agressive autovacuum to make sure that truncation will happen config = [ - 'autovacuum_max_workers=10', 'autovacuum_vacuum_threshold=0', - 'autovacuum_vacuum_insert_threshold=0', 'autovacuum_vacuum_cost_delay=0', - 'autovacuum_vacuum_cost_limit=10000', 'autovacuum_naptime =1s', + 'autovacuum_max_workers=10', + 'autovacuum_vacuum_threshold=0', + 'autovacuum_vacuum_insert_threshold=0', + 'autovacuum_vacuum_cost_delay=0', + 'autovacuum_vacuum_cost_limit=10000', + 'autovacuum_naptime =1s', 'autovacuum_freeze_max_age=100000' ] diff --git a/test_runner/batch_others/test_createdropdb.py b/test_runner/batch_others/test_createdropdb.py index 4194538556..5fe103496d 100644 --- a/test_runner/batch_others/test_createdropdb.py +++ b/test_runner/batch_others/test_createdropdb.py @@ -41,6 +41,7 @@ def test_createdb( for db in (pg, pg2): db.connect(dbname='foodb').close() + # # Test DROP DATABASE # @@ -49,7 +50,7 @@ def test_dropdb( pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, - test_output_dir + test_output_dir, ): zenith_cli.run(["branch", "test_dropdb", "empty"]) @@ -66,7 +67,6 @@ def test_dropdb( cur.execute("SELECT oid FROM pg_database WHERE datname='foodb';") dboid = cur.fetchone()[0] - with closing(pg.connect()) as conn: with conn.cursor() as cur: cur.execute('DROP DATABASE foodb') @@ -76,7 +76,6 @@ def test_dropdb( cur.execute('SELECT pg_current_wal_insert_lsn()') lsn_after_drop = cur.fetchone()[0] - # Create two branches before and after database drop. zenith_cli.run(["branch", "test_before_dropdb", "test_dropdb@" + lsn_before_drop]) pg_before = postgres.create_start('test_before_dropdb') diff --git a/test_runner/batch_others/test_multixact.py b/test_runner/batch_others/test_multixact.py index 403eee9974..78504b95ed 100644 --- a/test_runner/batch_others/test_multixact.py +++ b/test_runner/batch_others/test_multixact.py @@ -10,8 +10,12 @@ pytest_plugins = ("fixtures.zenith_fixtures") # it only checks next_multixact_id field in restored pg_control, # since we don't have functions to check multixact internals. # -def test_multixact(pageserver: ZenithPageserver, postgres: PostgresFactory, - pg_bin, zenith_cli, base_dir, test_output_dir): +def test_multixact(pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin, + zenith_cli, + base_dir, + test_output_dir): # Create a branch for us zenith_cli.run(["branch", "test_multixact", "empty"]) pg = postgres.create_start('test_multixact') diff --git a/test_runner/batch_others/test_old_request_lsn.py b/test_runner/batch_others/test_old_request_lsn.py index 49e87210e4..6cc5c01b83 100644 --- a/test_runner/batch_others/test_old_request_lsn.py +++ b/test_runner/batch_others/test_old_request_lsn.py @@ -5,6 +5,7 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures") + # # Test where Postgres generates a lot of WAL, and it's garbage collected away, but # no pages are evicted so that Postgres uses an old LSN in a GetPage request. @@ -15,7 +16,10 @@ pytest_plugins = ("fixtures.zenith_fixtures") # just a hint that the page hasn't been modified since that LSN, and the page # server should return the latest page version regardless of the LSN. # -def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin): +def test_old_request_lsn(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin): # Create a branch for us zenith_cli.run(["branch", "test_old_request_lsn", "empty"]) pg = postgres.create_start('test_old_request_lsn') @@ -47,20 +51,20 @@ def test_old_request_lsn(zenith_cli, pageserver: ZenithPageserver, postgres: Pos from pg_settings where name = 'shared_buffers' ''') row = cur.fetchone() - log.info(f'shared_buffers is {row[0]}, table size {row[1]}'); + log.info(f'shared_buffers is {row[0]}, table size {row[1]}') assert int(row[0]) < int(row[1]) - cur.execute('VACUUM foo'); + cur.execute('VACUUM foo') # Make a lot of updates on a single row, generating a lot of WAL. Trigger # garbage collections so that the page server will remove old page versions. for i in range(10): pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") for j in range(100): - cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;'); + cur.execute('UPDATE foo SET val = val + 1 WHERE id = 1;') # All (or at least most of) the updates should've been on the same page, so # that we haven't had to evict any dirty pages for a long time. Now run # a query that sends GetPage@LSN requests with the old LSN. - cur.execute("SELECT COUNT(*), SUM(val) FROM foo"); + cur.execute("SELECT COUNT(*), SUM(val) FROM foo") assert cur.fetchone() == (100000, 101000) diff --git a/test_runner/batch_others/test_pageserver_api.py b/test_runner/batch_others/test_pageserver_api.py index 8d0f92a263..95b0172e4c 100644 --- a/test_runner/batch_others/test_pageserver_api.py +++ b/test_runner/batch_others/test_pageserver_api.py @@ -63,7 +63,8 @@ def test_tenant_list_psql(pageserver: ZenithPageserver, zenith_cli): cur = conn.cursor() # check same tenant cannot be created twice - with pytest.raises(psycopg2.DatabaseError, match=f'tenant {pageserver.initial_tenant} already exists'): + with pytest.raises(psycopg2.DatabaseError, + match=f'tenant {pageserver.initial_tenant} already exists'): cur.execute(f'tenant_create {pageserver.initial_tenant}') # create one more tenant @@ -102,5 +103,6 @@ def test_pageserver_http_api_client(pageserver: ZenithPageserver): def test_pageserver_http_api_client_auth_enabled(pageserver_auth_enabled: ZenithPageserver): - client = pageserver_auth_enabled.http_client(auth_token=pageserver_auth_enabled.auth_keys.generate_management_token()) + client = pageserver_auth_enabled.http_client( + auth_token=pageserver_auth_enabled.auth_keys.generate_management_token()) check_client(client, pageserver_auth_enabled.initial_tenant) diff --git a/test_runner/batch_others/test_pageserver_restart.py b/test_runner/batch_others/test_pageserver_restart.py index 0656b7c6e8..5b4943aa27 100644 --- a/test_runner/batch_others/test_pageserver_restart.py +++ b/test_runner/batch_others/test_pageserver_restart.py @@ -9,17 +9,20 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures") + # Check that dead minority doesn't prevent the commits: execute insert n_inserts # times, with fault_probability chance of getting a wal acceptor down or up # along the way. 2 of 3 are always alive, so the work keeps going. -def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory): +def test_pageserver_restart(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + wa_factory: WalAcceptorFactory): # One safekeeper is enough for this test. wa_factory.start_n_new(1) zenith_cli.run(["branch", "test_pageserver_restart", "empty"]) - pg = postgres.create_start('test_pageserver_restart', - wal_acceptors=wa_factory.get_connstrs()) + pg = postgres.create_start('test_pageserver_restart', wal_acceptors=wa_factory.get_connstrs()) pg_conn = pg.connect() cur = pg_conn.cursor() @@ -41,14 +44,14 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: from pg_settings where name = 'shared_buffers' ''') row = cur.fetchone() - log.info(f"shared_buffers is {row[0]}, table size {row[1]}"); + log.info(f"shared_buffers is {row[0]}, table size {row[1]}") assert int(row[0]) < int(row[1]) # Stop and restart pageserver. This is a more or less graceful shutdown, although # the page server doesn't currently have a shutdown routine so there's no difference # between stopping and crashing. - pageserver.stop(); - pageserver.start(); + pageserver.stop() + pageserver.start() # Stopping the pageserver breaks the connection from the postgres backend to # the page server, and causes the next query on the connection to fail. Start a new @@ -62,6 +65,5 @@ def test_pageserver_restart(zenith_cli, pageserver: ZenithPageserver, postgres: assert cur.fetchone() == (100000, ) # Stop the page server by force, and restart it - pageserver.stop(); - pageserver.start(); - + pageserver.stop() + pageserver.start() diff --git a/test_runner/batch_others/test_readonly_node.py b/test_runner/batch_others/test_readonly_node.py new file mode 100644 index 0000000000..cc6c11caad --- /dev/null +++ b/test_runner/batch_others/test_readonly_node.py @@ -0,0 +1,89 @@ +import subprocess +from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver + +pytest_plugins = ("fixtures.zenith_fixtures") + + +# +# Create read-only compute nodes, anchored at historical points in time. +# +# This is very similar to the 'test_branch_behind' test, but instead of +# creating branches, creates read-only nodes. +# +def test_readonly_node(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin): + zenith_cli.run(["branch", "test_readonly_node", "empty"]) + + pgmain = postgres.create_start('test_readonly_node') + print("postgres is running on 'test_readonly_node' branch") + + main_pg_conn = pgmain.connect() + main_cur = main_pg_conn.cursor() + + # Create table, and insert the first 100 rows + main_cur.execute('CREATE TABLE foo (t text)') + main_cur.execute(''' + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100) g + ''') + main_cur.execute('SELECT pg_current_wal_insert_lsn()') + lsn_a = main_cur.fetchone()[0] + print('LSN after 100 rows: ' + lsn_a) + + # Insert some more rows. (This generates enough WAL to fill a few segments.) + main_cur.execute(''' + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 200000) g + ''') + main_cur.execute('SELECT pg_current_wal_insert_lsn()') + lsn_b = main_cur.fetchone()[0] + print('LSN after 200100 rows: ' + lsn_b) + + # Insert many more rows. This generates enough WAL to fill a few segments. + main_cur.execute(''' + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 200000) g + ''') + + main_cur.execute('SELECT pg_current_wal_insert_lsn()') + lsn_c = main_cur.fetchone()[0] + print('LSN after 400100 rows: ' + lsn_c) + + # Create first read-only node at the point where only 100 rows were inserted + pg_hundred = postgres.create_start("test_readonly_node_hundred", + branch=f'test_readonly_node@{lsn_a}') + + # And another at the point where 200100 rows were inserted + pg_more = postgres.create_start("test_readonly_node_more", branch=f'test_readonly_node@{lsn_b}') + + # On the 'hundred' node, we should see only 100 rows + hundred_pg_conn = pg_hundred.connect() + hundred_cur = hundred_pg_conn.cursor() + hundred_cur.execute('SELECT count(*) FROM foo') + assert hundred_cur.fetchone() == (100, ) + + # On the 'more' node, we should see 100200 rows + more_pg_conn = pg_more.connect() + more_cur = more_pg_conn.cursor() + more_cur.execute('SELECT count(*) FROM foo') + assert more_cur.fetchone() == (200100, ) + + # All the rows are visible on the main branch + main_cur.execute('SELECT count(*) FROM foo') + assert main_cur.fetchone() == (400100, ) + + # Check creating a node at segment boundary + pg = postgres.create_start("test_branch_segment_boundary", + branch="test_readonly_node@0/3000000") + cur = pg.connect().cursor() + cur.execute('SELECT 1') + assert cur.fetchone() == (1, ) + + # Create node at pre-initdb lsn + try: + zenith_cli.run(["pg", "start", "test_branch_preinitdb", "test_readonly_node@0/42"]) + assert false, "compute node startup with invalid LSN should have failed" + except Exception: + print("Node creation with pre-initdb LSN failed (as expected)") diff --git a/test_runner/batch_others/test_restart_compute.py b/test_runner/batch_others/test_restart_compute.py index 48a19b490b..5d47d32aac 100644 --- a/test_runner/batch_others/test_restart_compute.py +++ b/test_runner/batch_others/test_restart_compute.py @@ -12,13 +12,13 @@ pytest_plugins = ("fixtures.zenith_fixtures") # @pytest.mark.parametrize('with_wal_acceptors', [False, True]) def test_restart_compute( - zenith_cli, - pageserver: ZenithPageserver, - postgres: PostgresFactory, - pg_bin, - wa_factory, - with_wal_acceptors: bool, - ): + zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin, + wa_factory, + with_wal_acceptors: bool, +): wal_acceptor_connstrs = None zenith_cli.run(["branch", "test_restart_compute", "empty"]) @@ -26,8 +26,7 @@ def test_restart_compute( wa_factory.start_n_new(3) wal_acceptor_connstrs = wa_factory.get_connstrs() - pg = postgres.create_start('test_restart_compute', - wal_acceptors=wal_acceptor_connstrs) + pg = postgres.create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs) log.info("postgres is running on 'test_restart_compute' branch") with closing(pg.connect()) as conn: @@ -40,9 +39,7 @@ def test_restart_compute( log.info(f"res = {r}") # Remove data directory and restart - pg.stop_and_destroy().create_start('test_restart_compute', - wal_acceptors=wal_acceptor_connstrs) - + pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs) with closing(pg.connect()) as conn: with conn.cursor() as cur: @@ -61,8 +58,7 @@ def test_restart_compute( log.info(f"res = {r}") # Again remove data directory and restart - pg.stop_and_destroy().create_start('test_restart_compute', - wal_acceptors=wal_acceptor_connstrs) + pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs) # That select causes lots of FPI's and increases probability of wakeepers # lagging behind after query completion @@ -76,8 +72,7 @@ def test_restart_compute( log.info(f"res = {r}") # And again remove data directory and restart - pg.stop_and_destroy().create_start('test_restart_compute', - wal_acceptors=wal_acceptor_connstrs) + pg.stop_and_destroy().create_start('test_restart_compute', wal_acceptors=wal_acceptor_connstrs) with closing(pg.connect()) as conn: with conn.cursor() as cur: diff --git a/test_runner/batch_others/test_snapfiles_gc.py b/test_runner/batch_others/test_snapfiles_gc.py index 9cd01ca42e..a799b34aa6 100644 --- a/test_runner/batch_others/test_snapfiles_gc.py +++ b/test_runner/batch_others/test_snapfiles_gc.py @@ -5,10 +5,15 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures") + def print_gc_result(row): - log.info("GC duration {elapsed} ms".format_map(row)); - log.info(" REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}".format_map(row)) - log.info(" NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}".format_map(row)) + log.info("GC duration {elapsed} ms".format_map(row)) + log.info( + " REL total: {layer_relfiles_total}, needed_by_cutoff {layer_relfiles_needed_by_cutoff}, needed_by_branches: {layer_relfiles_needed_by_branches}, not_updated: {layer_relfiles_not_updated}, needed_as_tombstone {layer_relfiles_needed_as_tombstone}, removed: {layer_relfiles_removed}, dropped: {layer_relfiles_dropped}" + .format_map(row)) + log.info( + " NONREL total: {layer_nonrelfiles_total}, needed_by_cutoff {layer_nonrelfiles_needed_by_cutoff}, needed_by_branches: {layer_nonrelfiles_needed_by_branches}, not_updated: {layer_nonrelfiles_not_updated}, needed_as_tombstone {layer_nonrelfiles_needed_as_tombstone}, removed: {layer_nonrelfiles_removed}, dropped: {layer_nonrelfiles_dropped}" + .format_map(row)) # @@ -24,7 +29,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): with closing(pg.connect()) as conn: with conn.cursor() as cur: with closing(pageserver.connect()) as psconn: - with psconn.cursor(cursor_factory = psycopg2.extras.DictCursor) as pscur: + with psconn.cursor(cursor_factory=psycopg2.extras.DictCursor) as pscur: # Get the timeline ID of our branch. We need it for the 'do_gc' command cur.execute("SHOW zenith.zenith_timeline") @@ -34,9 +39,9 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): cur.execute("CREATE TABLE foo(x integer)") cur.execute("INSERT INTO foo VALUES (1)") - cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass"); - row = cur.fetchone(); - log.info(f"relfilenode is {row[0]}"); + cur.execute("select relfilenode from pg_class where oid = 'foo'::regclass") + row = cur.fetchone() + log.info(f"relfilenode is {row[0]}") # Run GC, to clear out any garbage left behind in the catalogs by # the CREATE TABLE command. We want to have a clean slate with no garbage @@ -54,9 +59,10 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): log.info("Running GC before test") pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) # remember the number of files - layer_relfiles_remain = row['layer_relfiles_total'] - row['layer_relfiles_removed'] + layer_relfiles_remain = (row['layer_relfiles_total'] - + row['layer_relfiles_removed']) assert layer_relfiles_remain > 0 # Insert a row and run GC. Checkpoint should freeze the layer @@ -66,7 +72,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): cur.execute("INSERT INTO foo VALUES (1)") pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) assert row['layer_relfiles_total'] == layer_relfiles_remain + 2 assert row['layer_relfiles_removed'] == 2 assert row['layer_relfiles_dropped'] == 0 @@ -80,7 +86,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) assert row['layer_relfiles_total'] == layer_relfiles_remain + 2 assert row['layer_relfiles_removed'] == 2 assert row['layer_relfiles_dropped'] == 0 @@ -92,7 +98,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) assert row['layer_relfiles_total'] == layer_relfiles_remain + 2 assert row['layer_relfiles_removed'] == 2 assert row['layer_relfiles_dropped'] == 0 @@ -101,7 +107,7 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): log.info("Run GC again, with nothing to do") pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) assert row['layer_relfiles_total'] == layer_relfiles_remain assert row['layer_relfiles_removed'] == 0 assert row['layer_relfiles_dropped'] == 0 @@ -109,12 +115,12 @@ def test_layerfiles_gc(zenith_cli, pageserver, postgres, pg_bin): # # Test DROP TABLE checks that relation data and metadata was deleted by GC from object storage # - log.info("Drop table and run GC again"); + log.info("Drop table and run GC again") cur.execute("DROP TABLE foo") pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") row = pscur.fetchone() - print_gc_result(row); + print_gc_result(row) # We still cannot remove the latest layers # because they serve as tombstones for earlier layers. diff --git a/test_runner/batch_others/test_tenants.py b/test_runner/batch_others/test_tenants.py index ee6bb0bfd3..d646f10666 100644 --- a/test_runner/batch_others/test_tenants.py +++ b/test_runner/batch_others/test_tenants.py @@ -21,18 +21,30 @@ def test_tenants_normal_work( tenant_1 = tenant_factory.create() tenant_2 = tenant_factory.create() - zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_1}"]) - zenith_cli.run(["branch", f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", "main", f"--tenantid={tenant_2}"]) + zenith_cli.run([ + "branch", + f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", + "main", + f"--tenantid={tenant_1}" + ]) + zenith_cli.run([ + "branch", + f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", + "main", + f"--tenantid={tenant_2}" + ]) if with_wal_acceptors: wa_factory.start_n_new(3) pg_tenant1 = postgres.create_start( f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", + None, # branch name, None means same as node name tenant_1, wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None, ) pg_tenant2 = postgres.create_start( f"test_tenants_normal_work_with_wal_acceptors{with_wal_acceptors}", + None, # branch name, None means same as node name tenant_2, wal_acceptors=wa_factory.get_connstrs() if with_wal_acceptors else None, ) @@ -45,4 +57,4 @@ def test_tenants_normal_work( cur.execute("CREATE TABLE t(key int primary key, value text)") cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'") cur.execute("SELECT sum(key) FROM t") - assert cur.fetchone() == (5000050000,) + assert cur.fetchone() == (5000050000, ) diff --git a/test_runner/batch_others/test_timeline_size.py b/test_runner/batch_others/test_timeline_size.py index acc2394306..819edc26b4 100644 --- a/test_runner/batch_others/test_timeline_size.py +++ b/test_runner/batch_others/test_timeline_size.py @@ -4,9 +4,8 @@ import psycopg2.extras from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver from fixtures.log_helper import log -def test_timeline_size( - zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin -): + +def test_timeline_size(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin): # Branch at the point where only 100 rows were inserted zenith_cli.run(["branch", "test_timeline_size", "empty"]) @@ -23,13 +22,11 @@ def test_timeline_size( # Create table, and insert the first 100 rows cur.execute("CREATE TABLE foo (t text)") - cur.execute( - """ + cur.execute(""" INSERT INTO foo SELECT 'long string to consume some space' || g FROM generate_series(1, 10) g - """ - ) + """) res = client.branch_detail(UUID(pageserver.initial_tenant), "test_timeline_size") assert res["current_logical_size"] == res["current_logical_size_non_incremental"] diff --git a/test_runner/batch_others/test_twophase.py b/test_runner/batch_others/test_twophase.py index a6315fed15..bc6ee076c1 100644 --- a/test_runner/batch_others/test_twophase.py +++ b/test_runner/batch_others/test_twophase.py @@ -9,7 +9,10 @@ pytest_plugins = ("fixtures.zenith_fixtures") # # Test branching, when a transaction is in prepared state # -def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin: PgBin): +def test_twophase(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin: PgBin): zenith_cli.run(["branch", "test_twophase", "empty"]) pg = postgres.create_start('test_twophase', config_lines=['max_prepared_transactions=5']) @@ -79,8 +82,8 @@ def test_twophase(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFa cur2.execute("ROLLBACK PREPARED 'insert_two'") cur2.execute('SELECT * FROM foo') - assert cur2.fetchall() == [('one',), ('three',)] + assert cur2.fetchall() == [('one', ), ('three', )] # Only one committed insert is visible on the original branch cur.execute('SELECT * FROM foo') - assert cur.fetchall() == [('three',)] + assert cur.fetchall() == [('three', )] diff --git a/test_runner/batch_others/test_vm_bits.py b/test_runner/batch_others/test_vm_bits.py index 86c56ddb9c..6f19940f2f 100644 --- a/test_runner/batch_others/test_vm_bits.py +++ b/test_runner/batch_others/test_vm_bits.py @@ -3,11 +3,16 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures") + # # Test that the VM bit is cleared correctly at a HEAP_DELETE and # HEAP_UPDATE record. # -def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, base_dir): +def test_vm_bit_clear(pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin, + zenith_cli, + base_dir): # Create a branch for us zenith_cli.run(["branch", "test_vm_bit_clear", "empty"]) pg = postgres.create_start('test_vm_bit_clear') @@ -49,13 +54,12 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p ''') cur.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert(cur.fetchall() == []); + assert (cur.fetchall() == []) cur.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert(cur.fetchall() == []); + assert (cur.fetchall() == []) cur.close() - # Check the same thing on the branch that we created right after the DELETE # # As of this writing, the code in smgrwrite() creates a full-page image whenever @@ -75,6 +79,6 @@ def test_vm_bit_clear(pageserver: ZenithPageserver, postgres: PostgresFactory, p ''') cur_new.execute('SELECT * FROM vmtest_delete WHERE id = 1') - assert(cur_new.fetchall() == []); + assert (cur_new.fetchall() == []) cur_new.execute('SELECT * FROM vmtest_update WHERE id = 1') - assert(cur_new.fetchall() == []); + assert (cur_new.fetchall() == []) diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index 263757e2e7..3eaadc78a6 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -16,7 +16,10 @@ pytest_plugins = ("fixtures.zenith_fixtures") # basic test, write something in setup with wal acceptors, ensure that commits # succeed and data is written -def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory): +def test_normal_work(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + wa_factory): zenith_cli.run(["branch", "test_wal_acceptors_normal_work", "empty"]) wa_factory.start_n_new(3) pg = postgres.create_start('test_wal_acceptors_normal_work', @@ -34,7 +37,10 @@ def test_normal_work(zenith_cli, pageserver: ZenithPageserver, postgres: Postgre # Run page server and multiple acceptors, and multiple compute nodes running # against different timelines. -def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory): +def test_many_timelines(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + wa_factory): n_timelines = 2 wa_factory.start_n_new(3) @@ -66,7 +72,10 @@ def test_many_timelines(zenith_cli, pageserver: ZenithPageserver, postgres: Post # Check that dead minority doesn't prevent the commits: execute insert n_inserts # times, with fault_probability chance of getting a wal acceptor down or up # along the way. 2 of 3 are always alive, so the work keeps going. -def test_restarts(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory: WalAcceptorFactory): +def test_restarts(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + wa_factory: WalAcceptorFactory): fault_probability = 0.01 n_inserts = 1000 n_acceptors = 3 @@ -177,7 +186,11 @@ def stop_value(): # do inserts while concurrently getting up/down subsets of acceptors -def test_race_conditions(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, wa_factory, stop_value): +def test_race_conditions(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, + wa_factory, + stop_value): wa_factory.start_n_new(3) @@ -318,6 +331,5 @@ def test_timeline_status(zenith_cli, pageserver, postgres, wa_factory: WalAccept pg.stop().start() pg.safe_psql("insert into t values(10)") - epoch_after_reboot = wa_http_cli.timeline_status(tenant_id, - timeline_id).acceptor_epoch + epoch_after_reboot = wa_http_cli.timeline_status(tenant_id, timeline_id).acceptor_epoch assert epoch_after_reboot > epoch diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/batch_others/test_wal_acceptor_async.py index b2faa9b985..a86b17c946 100644 --- a/test_runner/batch_others/test_wal_acceptor_async.py +++ b/test_runner/batch_others/test_wal_acceptor_async.py @@ -1,9 +1,11 @@ import asyncio import asyncpg import random +import time from fixtures.zenith_fixtures import WalAcceptor, WalAcceptorFactory, ZenithPageserver, PostgresFactory, Postgres from fixtures.log_helper import getLogger +from fixtures.utils import lsn_from_hex, lsn_to_hex from typing import List log = getLogger('root.wal_acceptor_async') @@ -19,13 +21,16 @@ class BankClient(object): async def initdb(self): await self.conn.execute('DROP TABLE IF EXISTS bank_accs') await self.conn.execute('CREATE TABLE bank_accs(uid int primary key, amount int)') - await self.conn.execute(''' + await self.conn.execute( + ''' INSERT INTO bank_accs SELECT *, $1 FROM generate_series(0, $2) - ''', self.init_amount, self.n_accounts - 1) + ''', + self.init_amount, + self.n_accounts - 1) await self.conn.execute('DROP TABLE IF EXISTS bank_log') await self.conn.execute('CREATE TABLE bank_log(from_uid int, to_uid int, amount int)') - + # TODO: Remove when https://github.com/zenithdb/zenith/issues/644 is fixed await self.conn.execute('ALTER TABLE bank_accs SET (autovacuum_enabled = false)') await self.conn.execute('ALTER TABLE bank_log SET (autovacuum_enabled = false)') @@ -34,6 +39,7 @@ class BankClient(object): row = await self.conn.fetchrow('SELECT sum(amount) AS sum FROM bank_accs') assert row['sum'] == self.n_accounts * self.init_amount + async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): # avoid deadlocks by sorting uids if from_uid > to_uid: @@ -42,16 +48,22 @@ async def bank_transfer(conn: asyncpg.Connection, from_uid, to_uid, amount): async with conn.transaction(): await conn.execute( 'UPDATE bank_accs SET amount = amount + ($1) WHERE uid = $2', - amount, to_uid, + amount, + to_uid, ) await conn.execute( 'UPDATE bank_accs SET amount = amount - ($1) WHERE uid = $2', - amount, from_uid, + amount, + from_uid, ) - await conn.execute('INSERT INTO bank_log VALUES ($1, $2, $3)', - from_uid, to_uid, amount, + await conn.execute( + 'INSERT INTO bank_log VALUES ($1, $2, $3)', + from_uid, + to_uid, + amount, ) + class WorkerStats(object): def __init__(self, n_workers): self.counters = [0] * n_workers @@ -92,6 +104,38 @@ async def run_random_worker(stats: WorkerStats, pg: Postgres, worker_id, n_accou await pg_conn.close() +async def wait_for_lsn(safekeeper: WalAcceptor, + tenant_id: str, + timeline_id: str, + wait_lsn: str, + polling_interval=1, + timeout=600): + """ + Poll flush_lsn from safekeeper until it's greater or equal than + provided wait_lsn. To do that, timeline_status is fetched from + safekeeper every polling_interval seconds. + """ + + started_at = time.time() + client = safekeeper.http_client() + + flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn + log.info( + f'Safekeeper at port {safekeeper.port.pg} has flush_lsn {flush_lsn}, waiting for lsn {wait_lsn}' + ) + + while lsn_from_hex(wait_lsn) > lsn_from_hex(flush_lsn): + elapsed = time.time() - started_at + if elapsed > timeout: + raise RuntimeError( + f"timed out waiting for safekeeper at port {safekeeper.port.pg} to reach {wait_lsn}, current lsn is {flush_lsn}" + ) + + await asyncio.sleep(polling_interval) + flush_lsn = client.timeline_status(tenant_id, timeline_id).flush_lsn + log.debug(f'safekeeper port={safekeeper.port.pg} flush_lsn={flush_lsn} wait_lsn={wait_lsn}') + + # This test will run several iterations and check progress in each of them. # On each iteration 1 acceptor is stopped, and 2 others should allow # background workers execute transactions. In the end, state should remain @@ -104,6 +148,9 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_ iterations = 6 pg_conn = await pg.connect_async() + tenant_id = await pg_conn.fetchval("show zenith.zenith_tenant") + timeline_id = await pg_conn.fetchval("show zenith.zenith_timeline") + bank = BankClient(pg_conn, n_accounts=n_accounts, init_amount=init_amount) # create tables and initial balances await bank.initdb() @@ -114,19 +161,19 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_ worker = run_random_worker(stats, pg, worker_id, bank.n_accounts, max_transfer) workers.append(asyncio.create_task(worker)) - for it in range(iterations): - victim = acceptors[it % len(acceptors)] + victim_idx = it % len(acceptors) + victim = acceptors[victim_idx] victim.stop() - # Wait till previous victim recovers so it is ready for the next - # iteration by making any writing xact. - conn = await pg.connect_async() - await conn.execute( - 'UPDATE bank_accs SET amount = amount WHERE uid = 1', - timeout=120 - ) - await conn.close() + flush_lsn = await pg_conn.fetchval('SELECT pg_current_wal_flush_lsn()') + flush_lsn = lsn_to_hex(flush_lsn) + log.info(f'Postgres flush_lsn {flush_lsn}') + + # Wait until alive safekeepers catch up with postgres + for idx, safekeeper in enumerate(acceptors): + if idx != victim_idx: + await wait_for_lsn(safekeeper, tenant_id, timeline_id, flush_lsn) stats.reset() await asyncio.sleep(period_time) @@ -145,7 +192,9 @@ async def run_restarts_under_load(pg: Postgres, acceptors: List[WalAcceptor], n_ # restart acceptors one by one, while executing and validating bank transactions -def test_restarts_under_load(zenith_cli, pageserver: ZenithPageserver, postgres: PostgresFactory, +def test_restarts_under_load(zenith_cli, + pageserver: ZenithPageserver, + postgres: PostgresFactory, wa_factory: WalAcceptorFactory): wa_factory.start_n_new(3) diff --git a/test_runner/batch_others/test_zenith_cli.py b/test_runner/batch_others/test_zenith_cli.py index be9e2b07fd..7379cf2981 100644 --- a/test_runner/batch_others/test_zenith_cli.py +++ b/test_runner/batch_others/test_zenith_cli.py @@ -23,8 +23,11 @@ def helper_compare_branch_list(page_server_cur, zenith_cli, initial_tenant: str) res = zenith_cli.run(["branch", f"--tenantid={initial_tenant}"]) res.check_returncode() - branches_cli_with_tenant_arg = sorted(map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n"))) - branches_cli_with_tenant_arg = [b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main')] + branches_cli_with_tenant_arg = sorted( + map(lambda b: b.split(':')[-1].strip(), res.stdout.strip().split("\n"))) + branches_cli_with_tenant_arg = [ + b for b in branches_cli if b.startswith('test_cli_') or b in ('empty', 'main') + ] assert branches_api == branches_cli == branches_cli_with_tenant_arg @@ -54,6 +57,7 @@ def test_cli_branch_list(pageserver: ZenithPageserver, zenith_cli): assert 'test_cli_branch_list_main' in branches_cli assert 'test_cli_branch_list_nested' in branches_cli + def helper_compare_tenant_list(page_server_cur, zenith_cli: ZenithCli): page_server_cur.execute(f'tenant_list') tenants_api = sorted(json.loads(page_server_cur.fetchone()[0])) diff --git a/test_runner/batch_pg_regress/test_isolation.py b/test_runner/batch_pg_regress/test_isolation.py index ae654401cc..0f215337be 100644 --- a/test_runner/batch_pg_regress/test_isolation.py +++ b/test_runner/batch_pg_regress/test_isolation.py @@ -6,8 +6,14 @@ from fixtures.zenith_fixtures import ZenithPageserver, PostgresFactory pytest_plugins = ("fixtures.zenith_fixtures") -def test_isolation(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, - base_dir, capsys): +def test_isolation(pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin, + zenith_cli, + test_output_dir, + pg_distrib_dir, + base_dir, + capsys): # Create a branch for us zenith_cli.run(["branch", "test_isolation", "empty"]) diff --git a/test_runner/batch_pg_regress/test_pg_regress.py b/test_runner/batch_pg_regress/test_pg_regress.py index 6f61b77ebc..2fd7fee314 100644 --- a/test_runner/batch_pg_regress/test_pg_regress.py +++ b/test_runner/batch_pg_regress/test_pg_regress.py @@ -6,8 +6,14 @@ from fixtures.zenith_fixtures import PostgresFactory, ZenithPageserver, check_re pytest_plugins = ("fixtures.zenith_fixtures") -def test_pg_regress(pageserver: ZenithPageserver, postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, - base_dir, capsys): +def test_pg_regress(pageserver: ZenithPageserver, + postgres: PostgresFactory, + pg_bin, + zenith_cli, + test_output_dir, + pg_distrib_dir, + base_dir, + capsys): # Create a branch for us zenith_cli.run(["branch", "test_pg_regress", "empty"]) diff --git a/test_runner/batch_pg_regress/test_zenith_regress.py b/test_runner/batch_pg_regress/test_zenith_regress.py index ab43f511ef..ca1422388e 100644 --- a/test_runner/batch_pg_regress/test_zenith_regress.py +++ b/test_runner/batch_pg_regress/test_zenith_regress.py @@ -7,8 +7,14 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures") -def test_zenith_regress(postgres: PostgresFactory, pg_bin, zenith_cli, test_output_dir, pg_distrib_dir, - base_dir, capsys, pageserver_port: PageserverPort): +def test_zenith_regress(postgres: PostgresFactory, + pg_bin, + zenith_cli, + test_output_dir, + pg_distrib_dir, + base_dir, + capsys, + pageserver_port: PageserverPort): # Create a branch for us zenith_cli.run(["branch", "test_zenith_regress", "empty"]) diff --git a/test_runner/fixtures/benchmark_fixture.py b/test_runner/fixtures/benchmark_fixture.py index c70ee36256..f41d66674d 100644 --- a/test_runner/fixtures/benchmark_fixture.py +++ b/test_runner/fixtures/benchmark_fixture.py @@ -24,7 +24,6 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, TypeVar, cast from typing_extensions import Literal from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture) - """ This file contains fixtures for micro-benchmarks. @@ -55,7 +54,6 @@ in the test initialization, or measure disk usage after the test query. """ - # All the results are collected in this list, as a tuple: # (test_name: str, metric_name: str, metric_value: float, unit: str) # @@ -65,6 +63,7 @@ in the test initialization, or measure disk usage after the test query. global zenbenchmark_results zenbenchmark_results = [] + class ZenithBenchmarkResults: """ An object for recording benchmark results. """ def __init__(self): @@ -77,6 +76,7 @@ class ZenithBenchmarkResults: self.results.append((test_name, metric_name, metric_value, unit)) + # Session scope fixture that initializes the results object @pytest.fixture(autouse=True, scope='session') def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]: @@ -88,6 +88,7 @@ def zenbenchmark_global(request) -> Iterator[ZenithBenchmarkResults]: yield zenbenchmark_results + class ZenithBenchmarker: """ An object for recording benchmark results. This is created for each test @@ -103,7 +104,6 @@ class ZenithBenchmarker: """ self.results.record(self.request.node.name, metric_name, metric_value, unit) - @contextmanager def record_duration(self, metric_name): """ @@ -134,7 +134,8 @@ class ZenithBenchmarker: # The metric should be an integer, as it's a number of bytes. But in general # all prometheus metrics are floats. So to be pedantic, read it as a float # and round to integer. - matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', all_metrics, + matches = re.search(r'^pageserver_disk_io_bytes{io_operation="write"} (\S+)$', + all_metrics, re.MULTILINE) return int(round(float(matches.group(1)))) @@ -145,8 +146,7 @@ class ZenithBenchmarker: # Fetch all the exposed prometheus metrics from page server all_metrics = pageserver.http_client().get_metrics() # See comment in get_io_writes() - matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, - re.MULTILINE) + matches = re.search(r'^pageserver_maxrss_kb (\S+)$', all_metrics, re.MULTILINE) return int(round(float(matches.group(1)))) def get_timeline_size(self, repo_dir: str, tenantid: str, timelineid: str): @@ -171,7 +171,11 @@ class ZenithBenchmarker: yield after = self.get_io_writes(pageserver) - self.results.record(self.request.node.name, metric_name, round((after - before) / (1024 * 1024)), 'MB') + self.results.record(self.request.node.name, + metric_name, + round((after - before) / (1024 * 1024)), + 'MB') + @pytest.fixture(scope='function') def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]: @@ -185,9 +189,7 @@ def zenbenchmark(zenbenchmark_global, request) -> Iterator[ZenithBenchmarker]: # Hook to print the results at the end @pytest.hookimpl(hookwrapper=True) -def pytest_terminal_summary( - terminalreporter: TerminalReporter, exitstatus: int, config: Config -): +def pytest_terminal_summary(terminalreporter: TerminalReporter, exitstatus: int, config: Config): yield global zenbenchmark_results diff --git a/test_runner/fixtures/log_helper.py b/test_runner/fixtures/log_helper.py index f253576e7b..cab7462a51 100644 --- a/test_runner/fixtures/log_helper.py +++ b/test_runner/fixtures/log_helper.py @@ -1,6 +1,5 @@ import logging import logging.config - """ This file configures logging to use in python tests. Logs are automatically captured and shown in their @@ -27,17 +26,19 @@ LOGGING = { "level": "INFO" }, "root.wal_acceptor_async": { - "level": "INFO" # a lot of logs on DEBUG level + "level": "INFO" # a lot of logs on DEBUG level } } } + def getLogger(name='root') -> logging.Logger: """Method to get logger for tests. Should be used to get correctly initialized logger. """ return logging.getLogger(name) + # default logger for tests log = getLogger() diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index dd90a36dbb..51005e3c48 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -4,6 +4,7 @@ import subprocess from typing import Any, List from fixtures.log_helper import log + def get_self_dir() -> str: """ Get the path to the directory where this script lives. """ return os.path.dirname(os.path.abspath(__file__)) @@ -58,6 +59,13 @@ def global_counter() -> int: _global_counter += 1 return _global_counter + def lsn_to_hex(num: int) -> str: """ Convert lsn from int to standard hex notation. """ return "{:X}/{:X}".format(num >> 32, num & 0xffffffff) + + +def lsn_from_hex(lsn_hex: str) -> int: + """ Convert lsn from hex notation to int. """ + l, r = lsn_hex.split('/') + return (int(l, 16) << 32) + int(r, 16) diff --git a/test_runner/fixtures/zenith_fixtures.py b/test_runner/fixtures/zenith_fixtures.py index 80246193fa..b343828006 100644 --- a/test_runner/fixtures/zenith_fixtures.py +++ b/test_runner/fixtures/zenith_fixtures.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from functools import cached_property +from cached_property import cached_property import asyncpg import os import pathlib @@ -13,9 +13,8 @@ import signal import subprocess import time import filecmp -import difflib -from contextlib import closing +from contextlib import closing, suppress from pathlib import Path from dataclasses import dataclass @@ -28,7 +27,6 @@ import requests from .utils import (get_self_dir, mkdir_if_needed, subprocess_capture) from fixtures.log_helper import log - """ This file contains pytest fixtures. A fixture is a test resource that can be summoned by placing its name in the test's arguments. @@ -56,14 +54,15 @@ DEFAULT_POSTGRES_DIR = 'tmp_install' BASE_PORT = 15000 WORKER_PORT_NUM = 100 + def pytest_configure(config): """ Ensure that no unwanted daemons are running before we start testing. Check that we do not owerflow available ports range. """ numprocesses = config.getoption('numprocesses') - if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports - raise Exception('Too many workers configured. Cannot distrubute ports for services.') + if numprocesses is not None and BASE_PORT + numprocesses * WORKER_PORT_NUM > 32768: # do not use ephemeral ports + raise Exception('Too many workers configured. Cannot distrubute ports for services.') # does not use -c as it is not supported on macOS cmd = ['pgrep', 'pageserver|postgres|safekeeper'] @@ -107,7 +106,11 @@ class PgProtocol: self.port = port self.username = username or "zenith_admin" - def connstr(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> str: + def connstr(self, + *, + dbname: str = 'postgres', + username: Optional[str] = None, + password: Optional[str] = None) -> str: """ Build a libpq connection string for the Postgres instance. """ @@ -119,7 +122,12 @@ class PgProtocol: return f'{res} password={password}' # autocommit=True here by default because that's what we need most of the time - def connect(self, *, autocommit=True, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> PgConnection: + def connect(self, + *, + autocommit=True, + dbname: str = 'postgres', + username: Optional[str] = None, + password: Optional[str] = None) -> PgConnection: """ Connect to the node. Returns psycopg2's connection object. @@ -135,7 +143,11 @@ class PgProtocol: conn.autocommit = autocommit return conn - async def connect_async(self, *, dbname: str = 'postgres', username: Optional[str] = None, password: Optional[str] = None) -> asyncpg.Connection: + async def connect_async(self, + *, + dbname: str = 'postgres', + username: Optional[str] = None, + password: Optional[str] = None) -> asyncpg.Connection: """ Connect to the node from async python. Returns asyncpg's connection object. @@ -201,11 +213,11 @@ class ZenithCli: # Interceipt CalledProcessError and print more info try: res = subprocess.run(args, - env=self.env, - check=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + env=self.env, + check=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) except subprocess.CalledProcessError as exc: # this way command output will be in recorded and shown in CI in failure message msg = f"""\ @@ -243,21 +255,17 @@ class ZenithPageserverHttpClient(requests.Session): return res.json() def branch_create(self, tenant_id: uuid.UUID, name: str, start_point: str) -> Dict: - res = self.post( - f"http://localhost:{self.port}/v1/branch", - json={ - 'tenant_id': tenant_id.hex, - 'name': name, - 'start_point': start_point, - } - ) + res = self.post(f"http://localhost:{self.port}/v1/branch", + json={ + 'tenant_id': tenant_id.hex, + 'name': name, + 'start_point': start_point, + }) res.raise_for_status() return res.json() def branch_detail(self, tenant_id: uuid.UUID, name: str) -> Dict: - res = self.get( - f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}", - ) + res = self.get(f"http://localhost:{self.port}/v1/branch/{tenant_id.hex}/{name}", ) res.raise_for_status() return res.json() @@ -299,7 +307,11 @@ class AuthKeys: return token def generate_tenant_token(self, tenant_id): - token = jwt.encode({"scope": "tenant", "tenant_id": tenant_id}, self.priv, algorithm="RS256") + token = jwt.encode({ + "scope": "tenant", "tenant_id": tenant_id + }, + self.priv, + algorithm="RS256") if isinstance(token, bytes): token = token.decode() @@ -324,6 +336,7 @@ def worker_base_port(worker_seq_no: int): # so workers have disjoint set of ports for services return BASE_PORT + worker_seq_no * WORKER_PORT_NUM + class PortDistributor: def __init__(self, base_port: int, port_number: int) -> None: self.iterator = iter(range(base_port, base_port + port_number)) @@ -332,13 +345,15 @@ class PortDistributor: try: return next(self.iterator) except StopIteration: - raise RuntimeError('port range configured for test is exhausted, consider enlarging the range') + raise RuntimeError( + 'port range configured for test is exhausted, consider enlarging the range') @zenfixture def port_distributor(worker_base_port): return PortDistributor(base_port=worker_base_port, port_number=WORKER_PORT_NUM) + @dataclass class PageserverPort: pg: int @@ -353,14 +368,18 @@ class ZenithPageserver(PgProtocol): self.running = False self.initial_tenant = None self.repo_dir = repo_dir - self.service_port = port # do not shadow PgProtocol.port which is just int + self.service_port = port # do not shadow PgProtocol.port which is just int def init(self, enable_auth: bool = False) -> 'ZenithPageserver': """ Initialize the repository, i.e. run "zenith init". Returns self. """ - cmd = ['init', f'--pageserver-pg-port={self.service_port.pg}', f'--pageserver-http-port={self.service_port.http}'] + cmd = [ + 'init', + f'--pageserver-pg-port={self.service_port.pg}', + f'--pageserver-http-port={self.service_port.http}' + ] if enable_auth: cmd.append('--enable-auth') self.zenith_cli.run(cmd) @@ -420,8 +439,6 @@ class ZenithPageserver(PgProtocol): ) - - @zenfixture def pageserver_port(port_distributor: PortDistributor) -> PageserverPort: pg = port_distributor.get_port() @@ -431,7 +448,8 @@ def pageserver_port(port_distributor: PortDistributor) -> PageserverPort: @zenfixture -def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]: +def pageserver(zenith_cli: ZenithCli, repo_dir: str, + pageserver_port: PageserverPort) -> Iterator[ZenithPageserver]: """ The 'pageserver' fixture provides a Page Server that's up and running. @@ -443,7 +461,8 @@ def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: Pageserver By convention, the test branches are named after the tests. For example, test called 'test_foo' would create and use branches with the 'test_foo' prefix. """ - ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init().start() + ps = ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, + port=pageserver_port).init().start() # For convenience in tests, create a branch from the freshly-initialized cluster. zenith_cli.run(["branch", "empty", "main"]) @@ -453,6 +472,7 @@ def pageserver(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: Pageserver log.info('Starting pageserver cleanup') ps.stop(True) + class PgBin: """ A helper class for executing postgres binaries """ def __init__(self, log_dir: str, pg_distrib_dir: str): @@ -514,9 +534,11 @@ class PgBin: def pg_bin(test_output_dir: str, pg_distrib_dir: str) -> PgBin: return PgBin(test_output_dir, pg_distrib_dir) + @pytest.fixture def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_port: PageserverPort): - with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, port=pageserver_port).init(enable_auth=True).start() as ps: + with ZenithPageserver(zenith_cli=zenith_cli, repo_dir=repo_dir, + port=pageserver_port).init(enable_auth=True).start() as ps: # For convenience in tests, create a branch from the freshly-initialized cluster. zenith_cli.run(["branch", "empty", "main"]) yield ps @@ -524,21 +546,27 @@ def pageserver_auth_enabled(zenith_cli: ZenithCli, repo_dir: str, pageserver_por class Postgres(PgProtocol): """ An object representing a running postgres daemon. """ - def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, tenant_id: str, port: int): + def __init__(self, + zenith_cli: ZenithCli, + repo_dir: str, + pg_bin: PgBin, + tenant_id: str, + port: int): super().__init__(host='localhost', port=port) self.zenith_cli = zenith_cli self.running = False self.repo_dir = repo_dir - self.branch: Optional[str] = None # dubious, see asserts below - self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA + self.node_name: Optional[str] = None # dubious, see asserts below + self.pgdata_dir: Optional[str] = None # Path to computenode PGDATA self.tenant_id = tenant_id self.pg_bin = pg_bin - # path to conf is /pgdatadirs/tenants///postgresql.conf + # path to conf is /pgdatadirs/tenants///postgresql.conf def create( self, - branch: str, + node_name: str, + branch: Optional[str] = None, wal_acceptors: Optional[str] = None, config_lines: Optional[List[str]] = None, ) -> 'Postgres': @@ -552,9 +580,19 @@ class Postgres(PgProtocol): if not config_lines: config_lines = [] - self.zenith_cli.run(['pg', 'create', branch, f'--tenantid={self.tenant_id}', f'--port={self.port}']) - self.branch = branch - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch + if branch is None: + branch = node_name + + self.zenith_cli.run([ + 'pg', + 'create', + f'--tenantid={self.tenant_id}', + f'--port={self.port}', + node_name, + branch + ]) + self.node_name = node_name + path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name self.pgdata_dir = os.path.join(self.repo_dir, path) if wal_acceptors is not None: @@ -571,11 +609,12 @@ class Postgres(PgProtocol): Returns self. """ - assert self.branch is not None + assert self.node_name is not None - log.info(f"Starting postgres on branch {self.branch}") + log.info(f"Starting postgres node {self.node_name}") - run_result = self.zenith_cli.run(['pg', 'start', self.branch, f'--tenantid={self.tenant_id}', f'--port={self.port}']) + run_result = self.zenith_cli.run( + ['pg', 'start', f'--tenantid={self.tenant_id}', f'--port={self.port}', self.node_name]) self.running = True log.info(f"stdout: {run_result.stdout}") @@ -584,7 +623,7 @@ class Postgres(PgProtocol): def pg_data_dir_path(self) -> str: """ Path to data directory """ - path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.branch + path = pathlib.Path('pgdatadirs') / 'tenants' / self.tenant_id / self.node_name return os.path.join(self.repo_dir, path) def pg_xact_dir_path(self) -> str: @@ -641,8 +680,8 @@ class Postgres(PgProtocol): """ if self.running: - assert self.branch is not None - self.zenith_cli.run(['pg', 'stop', self.branch, f'--tenantid={self.tenant_id}']) + assert self.node_name is not None + self.zenith_cli.run(['pg', 'stop', self.node_name, f'--tenantid={self.tenant_id}']) self.running = False return self @@ -653,15 +692,17 @@ class Postgres(PgProtocol): Returns self. """ - assert self.branch is not None + assert self.node_name is not None assert self.tenant_id is not None - self.zenith_cli.run(['pg', 'stop', '--destroy', self.branch, f'--tenantid={self.tenant_id}']) + self.zenith_cli.run( + ['pg', 'stop', '--destroy', self.node_name, f'--tenantid={self.tenant_id}']) return self def create_start( self, - branch: str, + node_name: str, + branch: Optional[str] = None, wal_acceptors: Optional[str] = None, config_lines: Optional[List[str]] = None, ) -> 'Postgres': @@ -672,6 +713,7 @@ class Postgres(PgProtocol): """ self.create( + node_name=node_name, branch=branch, wal_acceptors=wal_acceptors, config_lines=config_lines, @@ -685,9 +727,15 @@ class Postgres(PgProtocol): def __exit__(self, exc_type, exc, tb): self.stop() + class PostgresFactory: """ An object representing multiple running postgres daemons. """ - def __init__(self, zenith_cli: ZenithCli, repo_dir: str, pg_bin: PgBin, initial_tenant: str, port_distributor: PortDistributor): + def __init__(self, + zenith_cli: ZenithCli, + repo_dir: str, + pg_bin: PgBin, + initial_tenant: str, + port_distributor: PortDistributor): self.zenith_cli = zenith_cli self.repo_dir = repo_dir self.num_instances = 0 @@ -696,13 +744,13 @@ class PostgresFactory: self.port_distributor = port_distributor self.pg_bin = pg_bin - def create_start( - self, - branch: str = "main", - tenant_id: Optional[str] = None, - wal_acceptors: Optional[str] = None, - config_lines: Optional[List[str]] = None - ) -> Postgres: + def create_start(self, + node_name: str = "main", + branch: Optional[str] = None, + tenant_id: Optional[str] = None, + wal_acceptors: Optional[str] = None, + config_lines: Optional[List[str]] = None) -> Postgres: + pg = Postgres( zenith_cli=self.zenith_cli, repo_dir=self.repo_dir, @@ -714,18 +762,18 @@ class PostgresFactory: self.instances.append(pg) return pg.create_start( + node_name=node_name, branch=branch, wal_acceptors=wal_acceptors, config_lines=config_lines, ) - def create( - self, - branch: str = "main", - tenant_id: Optional[str] = None, - wal_acceptors: Optional[str] = None, - config_lines: Optional[List[str]] = None - ) -> Postgres: + def create(self, + node_name: str = "main", + branch: Optional[str] = None, + tenant_id: Optional[str] = None, + wal_acceptors: Optional[str] = None, + config_lines: Optional[List[str]] = None) -> Postgres: pg = Postgres( zenith_cli=self.zenith_cli, @@ -739,18 +787,17 @@ class PostgresFactory: self.instances.append(pg) return pg.create( + node_name=node_name, branch=branch, wal_acceptors=wal_acceptors, config_lines=config_lines, ) - def config( - self, - branch: str = "main", - tenant_id: Optional[str] = None, - wal_acceptors: Optional[str] = None, - config_lines: Optional[List[str]] = None - ) -> Postgres: + def config(self, + node_name: str = "main", + tenant_id: Optional[str] = None, + wal_acceptors: Optional[str] = None, + config_lines: Optional[List[str]] = None) -> Postgres: pg = Postgres( zenith_cli=self.zenith_cli, @@ -764,7 +811,7 @@ class PostgresFactory: self.instances.append(pg) return pg.config( - branch=branch, + node_name=node_name, wal_acceptors=wal_acceptors, config_lines=config_lines, ) @@ -775,13 +822,18 @@ class PostgresFactory: return self + @zenfixture def initial_tenant(pageserver: ZenithPageserver): return pageserver.initial_tenant @zenfixture -def postgres(zenith_cli: ZenithCli, initial_tenant: str, repo_dir: str, pg_bin: PgBin, port_distributor: PortDistributor) -> Iterator[PostgresFactory]: +def postgres(zenith_cli: ZenithCli, + initial_tenant: str, + repo_dir: str, + pg_bin: PgBin, + port_distributor: PortDistributor) -> Iterator[PostgresFactory]: pgfactory = PostgresFactory( zenith_cli=zenith_cli, repo_dir=repo_dir, @@ -796,6 +848,7 @@ def postgres(zenith_cli: ZenithCli, initial_tenant: str, repo_dir: str, pg_bin: log.info('Starting postgres cleanup') pgfactory.stop_all() + def read_pid(path: Path): """ Read content of file into number """ return int(path.read_text()) @@ -806,20 +859,22 @@ class WalAcceptorPort: pg: int http: int + @dataclass class WalAcceptor: """ An object representing a running wal acceptor daemon. """ wa_bin_path: Path data_dir: Path port: WalAcceptorPort - num: int # identifier for logging + num: int # identifier for logging pageserver_port: int auth_token: Optional[str] = None def start(self) -> 'WalAcceptor': # create data directory if not exists self.data_dir.mkdir(parents=True, exist_ok=True) - self.pidfile.unlink(missing_ok=True) + with suppress(FileNotFoundError): + self.pidfile.unlink() cmd = [str(self.wa_bin_path)] cmd.extend(["-D", str(self.data_dir)]) @@ -876,10 +931,11 @@ class WalAcceptor: os.kill(pid, signal.SIGTERM) except Exception: # TODO: cleanup pid file on exit in wal acceptor - pass # pidfile might be obsolete + pass # pidfile might be obsolete return self - def append_logical_message(self, tenant_id: str, timeline_id: str, request: Dict[str, Any]) -> Dict[str, Any]: + def append_logical_message(self, tenant_id: str, timeline_id: str, + request: Dict[str, Any]) -> Dict[str, Any]: """ Send JSON_CTRL query to append LogicalMessage to WAL and modify safekeeper state. It will construct LogicalMessage from provided @@ -907,7 +963,11 @@ class WalAcceptor: class WalAcceptorFactory: """ An object representing multiple running wal acceptors. """ - def __init__(self, zenith_binpath: Path, data_dir: Path, pageserver_port: int, port_distributor: PortDistributor): + def __init__(self, + zenith_binpath: Path, + data_dir: Path, + pageserver_port: int, + port_distributor: PortDistributor): self.wa_bin_path = zenith_binpath / 'safekeeper' self.data_dir = data_dir self.instances: List[WalAcceptor] = [] @@ -953,7 +1013,10 @@ class WalAcceptorFactory: @zenfixture -def wa_factory(zenith_binpath: str, repo_dir: str, pageserver_port: PageserverPort, port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]: +def wa_factory(zenith_binpath: str, + repo_dir: str, + pageserver_port: PageserverPort, + port_distributor: PortDistributor) -> Iterator[WalAcceptorFactory]: """ Gives WalAcceptorFactory providing wal acceptors. """ wafactory = WalAcceptorFactory( zenith_binpath=Path(zenith_binpath), @@ -966,9 +1029,12 @@ def wa_factory(zenith_binpath: str, repo_dir: str, pageserver_port: PageserverPo log.info('Starting wal acceptors cleanup') wafactory.stop_all() + @dataclass -class PageserverTimelineStatus: +class SafekeeperTimelineStatus: acceptor_epoch: int + flush_lsn: str + class WalAcceptorHttpClient(requests.Session): def __init__(self, port: int) -> None: @@ -978,11 +1044,12 @@ class WalAcceptorHttpClient(requests.Session): def check_status(self): self.get(f"http://localhost:{self.port}/v1/status").raise_for_status() - def timeline_status(self, tenant_id: str, timeline_id: str) -> PageserverTimelineStatus: + def timeline_status(self, tenant_id: str, timeline_id: str) -> SafekeeperTimelineStatus: res = self.get(f"http://localhost:{self.port}/v1/timeline/{tenant_id}/{timeline_id}") res.raise_for_status() resj = res.json() - return PageserverTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch']) + return SafekeeperTimelineStatus(acceptor_epoch=resj['acceptor_state']['epoch'], + flush_lsn=resj['flush_lsn']) @zenfixture @@ -1083,6 +1150,7 @@ class TenantFactory: def tenant_factory(zenith_cli: ZenithCli): return TenantFactory(zenith_cli) + # # Test helpers # @@ -1093,8 +1161,15 @@ def list_files_to_compare(pgdata_dir: str): rel_dir = os.path.relpath(root, pgdata_dir) # Skip some dirs and files we don't want to compare skip_dirs = ['pg_wal', 'pg_stat', 'pg_stat_tmp', 'pg_subtrans', 'pg_logical'] - skip_files = ['pg_internal.init', 'pg.log', 'zenith.signal', 'postgresql.conf', - 'postmaster.opts', 'postmaster.pid', 'pg_control'] + skip_files = [ + 'pg_internal.init', + 'pg.log', + 'zenith.signal', + 'postgresql.conf', + 'postmaster.opts', + 'postmaster.pid', + 'pg_control' + ] if rel_dir not in skip_dirs and filename not in skip_files: rel_file = os.path.join(rel_dir, filename) pgdata_files.append(rel_file) @@ -1103,8 +1178,12 @@ def list_files_to_compare(pgdata_dir: str): log.info(pgdata_files) return pgdata_files + # pg is the existing and running compute node, that we want to compare with a basebackup -def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str, pg: Postgres, pageserver_pg_port: int): +def check_restored_datadir_content(zenith_cli: ZenithCli, + test_output_dir: str, + pg: Postgres, + pageserver_pg_port: int): # Get the timeline ID of our branch. We need it for the 'basebackup' command with closing(pg.connect()) as conn: @@ -1116,7 +1195,7 @@ def check_restored_datadir_content(zenith_cli: ZenithCli, test_output_dir: str, pg.stop() # Take a basebackup from pageserver - restored_dir_path = os.path.join(test_output_dir, f"{pg.branch}_restored_datadir") + restored_dir_path = os.path.join(test_output_dir, f"{pg.node_name}_restored_datadir") mkdir_if_needed(restored_dir_path) psql_path = os.path.join(pg.pg_bin.pg_bin_path, 'psql') diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py index 1effa56ee9..cf6fa03703 100644 --- a/test_runner/performance/test_bulk_insert.py +++ b/test_runner/performance/test_bulk_insert.py @@ -5,6 +5,7 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") + # # Run bulk INSERT test. # @@ -15,7 +16,12 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") # 3. Disk space used # 4. Peak memory usage # -def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str): +def test_bulk_insert(postgres: PostgresFactory, + pageserver: ZenithPageserver, + pg_bin, + zenith_cli, + zenbenchmark, + repo_dir: str): # Create a branch for us zenith_cli.run(["branch", "test_bulk_insert", "empty"]) @@ -24,7 +30,7 @@ def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg # Open a connection directly to the page server that we'll use to force # flushing the layers to disk - psconn = pageserver.connect(); + psconn = pageserver.connect() pscur = psconn.cursor() # Get the timeline ID of our branch. We need it for the 'do_gc' command @@ -48,5 +54,7 @@ def test_bulk_insert(postgres: PostgresFactory, pageserver: ZenithPageserver, pg zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB') # Report disk space used by the repository - timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024*1024), 'MB') + timeline_size = zenbenchmark.get_timeline_size(repo_dir, + pageserver.initial_tenant, + timeline) + zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') diff --git a/test_runner/performance/test_bulk_tenant_create.py b/test_runner/performance/test_bulk_tenant_create.py index e1de1dd014..1e2a17c2c9 100644 --- a/test_runner/performance/test_bulk_tenant_create.py +++ b/test_runner/performance/test_bulk_tenant_create.py @@ -37,7 +37,9 @@ def test_bulk_tenant_create( tenant = tenant_factory.create() zenith_cli.run([ - "branch", f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", "main", + "branch", + f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", + "main", f"--tenantid={tenant}" ]) @@ -46,6 +48,7 @@ def test_bulk_tenant_create( pg_tenant = postgres.create_start( f"test_bulk_tenant_create_{tenants_count}_{i}_{use_wal_acceptors}", + None, # branch name, None means same as node name tenant, wal_acceptors=wa_factory.get_connstrs() if use_wal_acceptors == 'with_wa' else None, ) diff --git a/test_runner/performance/test_gist_build.py b/test_runner/performance/test_gist_build.py index b9ef0a3d4b..5a80978cf0 100644 --- a/test_runner/performance/test_gist_build.py +++ b/test_runner/performance/test_gist_build.py @@ -5,12 +5,18 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") + # # Test buffering GisT build. It WAL-logs the whole relation, in 32-page chunks. # As of this writing, we're duplicate those giant WAL records for each page, # which makes the delta layer about 32x larger than it needs to be. # -def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str): +def test_gist_buffering_build(postgres: PostgresFactory, + pageserver: ZenithPageserver, + pg_bin, + zenith_cli, + zenbenchmark, + repo_dir: str): # Create a branch for us zenith_cli.run(["branch", "test_gist_buffering_build", "empty"]) @@ -19,7 +25,7 @@ def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPages # Open a connection directly to the page server that we'll use to force # flushing the layers to disk - psconn = pageserver.connect(); + psconn = pageserver.connect() pscur = psconn.cursor() # Get the timeline ID of our branch. We need it for the 'do_gc' command @@ -29,13 +35,17 @@ def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPages timeline = cur.fetchone()[0] # Create test table. - cur.execute("create table gist_point_tbl(id int4, p point)"); - cur.execute("insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;"); + cur.execute("create table gist_point_tbl(id int4, p point)") + cur.execute( + "insert into gist_point_tbl select g, point(g, g) from generate_series(1, 1000000) g;" + ) # Build the index. with zenbenchmark.record_pageserver_writes(pageserver, 'pageserver_writes'): with zenbenchmark.record_duration('build'): - cur.execute("create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)"); + cur.execute( + "create index gist_pointidx2 on gist_point_tbl using gist(p) with (buffering = on)" + ) # Flush the layers from memory to disk. This is included in the reported # time and I/O @@ -45,5 +55,7 @@ def test_gist_buffering_build(postgres: PostgresFactory, pageserver: ZenithPages zenbenchmark.record("peak_mem", zenbenchmark.get_peak_mem(pageserver) / 1024, 'MB') # Report disk space used by the repository - timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024*1024), 'MB') + timeline_size = zenbenchmark.get_timeline_size(repo_dir, + pageserver.initial_tenant, + timeline) + zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') diff --git a/test_runner/performance/test_perf_pgbench.py b/test_runner/performance/test_perf_pgbench.py index 22e8f2aee3..388ac4314c 100644 --- a/test_runner/performance/test_perf_pgbench.py +++ b/test_runner/performance/test_perf_pgbench.py @@ -5,6 +5,7 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") + # # Run a very short pgbench test. # @@ -14,7 +15,12 @@ pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") # 2. Time to run 5000 pgbench transactions # 3. Disk space used # -def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str): +def test_pgbench(postgres: PostgresFactory, + pageserver: ZenithPageserver, + pg_bin, + zenith_cli, + zenbenchmark, + repo_dir: str): # Create a branch for us zenith_cli.run(["branch", "test_pgbench_perf", "empty"]) @@ -23,7 +29,7 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin # Open a connection directly to the page server that we'll use to force # flushing the layers to disk - psconn = pageserver.connect(); + psconn = pageserver.connect() pscur = psconn.cursor() # Get the timeline ID of our branch. We need it for the 'do_gc' command @@ -53,4 +59,4 @@ def test_pgbench(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin # Report disk space used by the repository timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024*1024), 'MB') + zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') diff --git a/test_runner/performance/test_write_amplification.py b/test_runner/performance/test_write_amplification.py index 8410499bd2..1a1cc7bf21 100644 --- a/test_runner/performance/test_write_amplification.py +++ b/test_runner/performance/test_write_amplification.py @@ -17,7 +17,13 @@ from fixtures.log_helper import log pytest_plugins = ("fixtures.zenith_fixtures", "fixtures.benchmark_fixture") -def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPageserver, pg_bin, zenith_cli, zenbenchmark, repo_dir: str): + +def test_write_amplification(postgres: PostgresFactory, + pageserver: ZenithPageserver, + pg_bin, + zenith_cli, + zenbenchmark, + repo_dir: str): # Create a branch for us zenith_cli.run(["branch", "test_write_amplification", "empty"]) @@ -26,7 +32,7 @@ def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPagese # Open a connection directly to the page server that we'll use to force # flushing the layers to disk - psconn = pageserver.connect(); + psconn = pageserver.connect() pscur = psconn.cursor() with closing(pg.connect()) as conn: @@ -71,5 +77,7 @@ def test_write_amplification(postgres: PostgresFactory, pageserver: ZenithPagese pscur.execute(f"do_gc {pageserver.initial_tenant} {timeline} 0") # Report disk space used by the repository - timeline_size = zenbenchmark.get_timeline_size(repo_dir, pageserver.initial_tenant, timeline) - zenbenchmark.record('size', timeline_size / (1024*1024), 'MB') + timeline_size = zenbenchmark.get_timeline_size(repo_dir, + pageserver.initial_tenant, + timeline) + zenbenchmark.record('size', timeline_size / (1024 * 1024), 'MB') diff --git a/test_runner/setup.cfg b/test_runner/setup.cfg index 578cb28efc..cff4c7f86e 100644 --- a/test_runner/setup.cfg +++ b/test_runner/setup.cfg @@ -10,6 +10,7 @@ max-line-length = 100 [yapf] based_on_style = pep8 column_limit = 100 +split_all_top_level_comma_separated_values = true [mypy] # some tests don't typecheck when this flag is set diff --git a/vendor/postgres b/vendor/postgres index 8e82cb4a36..9160deb05a 160000 --- a/vendor/postgres +++ b/vendor/postgres @@ -1 +1 @@ -Subproject commit 8e82cb4a36c385d2a65af11c55fd63d011bd79af +Subproject commit 9160deb05a08986354721173ba36e3ebc50a9e21 diff --git a/walkeeper/src/http/routes.rs b/walkeeper/src/http/routes.rs index 8ab405508e..f5a69d3522 100644 --- a/walkeeper/src/http/routes.rs +++ b/walkeeper/src/http/routes.rs @@ -49,6 +49,8 @@ struct TimelineStatus { commit_lsn: Lsn, #[serde(serialize_with = "display_serialize")] truncate_lsn: Lsn, + #[serde(serialize_with = "display_serialize")] + flush_lsn: Lsn, } /// Report info about timeline. @@ -64,6 +66,7 @@ async fn timeline_status_handler(request: Request) -> Result) -> Result Result<()> { - let timeline_arg = Arg::with_name("timeline") - .short("n") + let node_arg = Arg::with_name("node") .index(1) - .help("Timeline name") + .help("Node name") .required(true); + let timeline_arg = Arg::with_name("timeline") + .index(2) + .help("Branch name or a point-in time specification") + .required(false); + let tenantid_arg = Arg::with_name("tenantid") .long("tenantid") .help("Tenant id. Represented as a hexadecimal string 32 symbols length") @@ -102,7 +106,10 @@ fn main() -> Result<()> { .subcommand(SubCommand::with_name("list").arg(tenantid_arg.clone())) .subcommand(SubCommand::with_name("create") .about("Create a postgres compute node") - .arg(timeline_arg.clone()).arg(tenantid_arg.clone()).arg(port_arg.clone()) + .arg(node_arg.clone()) + .arg(timeline_arg.clone()) + .arg(tenantid_arg.clone()) + .arg(port_arg.clone()) .arg( Arg::with_name("config-only") .help("Don't do basebackup, create compute node with only config files") @@ -111,13 +118,13 @@ fn main() -> Result<()> { )) .subcommand(SubCommand::with_name("start") .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files") - .arg( - timeline_arg.clone() - ).arg( - tenantid_arg.clone() - ).arg(port_arg.clone())) + .arg(node_arg.clone()) + .arg(timeline_arg.clone()) + .arg(tenantid_arg.clone()) + .arg(port_arg.clone())) .subcommand( SubCommand::with_name("stop") + .arg(node_arg.clone()) .arg(timeline_arg.clone()) .arg(tenantid_arg.clone()) .arg( @@ -430,25 +437,32 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { let tenantid: ZTenantId = list_match .value_of("tenantid") .map_or(Ok(env.tenantid), |value| value.parse())?; + let branch_infos = get_branch_infos(env, &tenantid).unwrap_or_else(|e| { eprintln!("Failed to load branch info: {}", e); HashMap::new() }); - println!("BRANCH\tADDRESS\t\tLSN\t\tSTATUS"); - for ((_, timeline_name), node) in cplane + println!("NODE\tADDRESS\t\tBRANCH\tLSN\t\tSTATUS"); + for ((_, node_name), node) in cplane .nodes .iter() .filter(|((node_tenantid, _), _)| node_tenantid == &tenantid) { + // FIXME: This shows the LSN at the end of the timeline. It's not the + // right thing to do for read-only nodes that might be anchored at an + // older point in time, or following but lagging behind the primary. + let lsn_str = branch_infos + .get(&node.timelineid) + .map(|bi| bi.latest_valid_lsn.to_string()) + .unwrap_or_else(|| "?".to_string()); + println!( - "{}\t{}\t{}\t{}", - timeline_name, + "{}\t{}\t{}\t{}\t{}", + node_name, node.address, - branch_infos - .get(&node.timelineid) - .map(|bi| bi.latest_valid_lsn.to_string()) - .unwrap_or_else(|| "?".to_string()), + node.timelineid, // FIXME: resolve human-friendly branch name + lsn_str, node.status(), ); } @@ -457,26 +471,28 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { let tenantid: ZTenantId = create_match .value_of("tenantid") .map_or(Ok(env.tenantid), |value| value.parse())?; - let timeline_name = create_match.value_of("timeline").unwrap_or("main"); + let node_name = create_match.value_of("node").unwrap_or("main"); + let timeline_name = create_match.value_of("timeline").unwrap_or(node_name); let port: Option = match create_match.value_of("port") { Some(p) => Some(p.parse()?), None => None, }; - cplane.new_node(tenantid, timeline_name, port)?; + cplane.new_node(tenantid, node_name, timeline_name, port)?; } ("start", Some(start_match)) => { let tenantid: ZTenantId = start_match .value_of("tenantid") .map_or(Ok(env.tenantid), |value| value.parse())?; - let timeline_name = start_match.value_of("timeline").unwrap_or("main"); + let node_name = start_match.value_of("node").unwrap_or("main"); + let timeline_name = start_match.value_of("timeline"); let port: Option = match start_match.value_of("port") { Some(p) => Some(p.parse()?), None => None, }; - let node = cplane.nodes.get(&(tenantid, timeline_name.to_owned())); + let node = cplane.nodes.get(&(tenantid, node_name.to_owned())); let auth_token = if matches!(env.auth_type, AuthType::ZenithJWT) { let claims = Claims::new(Some(tenantid), Scope::Tenant); @@ -485,12 +501,11 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { None }; - println!( - "Starting {} postgres on timeline {}...", - if node.is_some() { "existing" } else { "new" }, - timeline_name - ); if let Some(node) = node { + if timeline_name.is_some() { + println!("timeline name ignored because node exists already"); + } + println!("Starting existing postgres {}...", node_name); node.start(&auth_token)?; } else { // when used with custom port this results in non obvious behaviour @@ -498,12 +513,17 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { // start --port X // stop // start <-- will also use port X even without explicit port argument - let node = cplane.new_node(tenantid, timeline_name, port)?; + let timeline_name = timeline_name.unwrap_or(node_name); + println!( + "Starting new postgres {} on {}...", + node_name, timeline_name + ); + let node = cplane.new_node(tenantid, node_name, timeline_name, port)?; node.start(&auth_token)?; } } ("stop", Some(stop_match)) => { - let timeline_name = stop_match.value_of("timeline").unwrap_or("main"); + let node_name = stop_match.value_of("node").unwrap_or("main"); let destroy = stop_match.is_present("destroy"); let tenantid: ZTenantId = stop_match .value_of("tenantid") @@ -511,8 +531,8 @@ fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> { let node = cplane .nodes - .get(&(tenantid, timeline_name.to_owned())) - .ok_or_else(|| anyhow!("postgres {} is not found", timeline_name))?; + .get(&(tenantid, node_name.to_owned())) + .ok_or_else(|| anyhow!("postgres {} is not found", node_name))?; node.stop(destroy)?; } diff --git a/zenith_utils/src/sock_split.rs b/zenith_utils/src/sock_split.rs index f7078dafc2..c62963e113 100644 --- a/zenith_utils/src/sock_split.rs +++ b/zenith_utils/src/sock_split.rs @@ -107,21 +107,12 @@ impl io::Write for WriteStream { } } -pub struct TlsBoxed { - stream: BufStream, - session: rustls::ServerSession, -} - -impl TlsBoxed { - fn rustls_stream(&mut self) -> rustls::Stream { - rustls::Stream::new(&mut self.session, &mut self.stream) - } -} +type TlsStream = rustls::StreamOwned; pub enum BidiStream { Tcp(BufStream), /// This variant is boxed, because [`rustls::ServerSession`] is quite larger than [`BufStream`]. - Tls(Box), + Tls(Box>), } impl BidiStream { @@ -134,11 +125,11 @@ impl BidiStream { Self::Tcp(stream) => stream.get_ref().shutdown(how), Self::Tls(tls_boxed) => { if how == Shutdown::Read { - tls_boxed.stream.get_ref().shutdown(how) + tls_boxed.sock.get_ref().shutdown(how) } else { - tls_boxed.session.send_close_notify(); - let res = tls_boxed.rustls_stream().flush(); - tls_boxed.stream.get_ref().shutdown(how)?; + tls_boxed.sess.send_close_notify(); + let res = tls_boxed.flush(); + tls_boxed.sock.get_ref().shutdown(how)?; res } } @@ -155,7 +146,7 @@ impl BidiStream { (ReadStream::Tcp(reader), WriteStream::Tcp(stream)) } Self::Tls(tls_boxed) => { - let reader = tls_boxed.stream.into_reader(); + let reader = tls_boxed.sock.into_reader(); let buffer_data = reader.buffer().to_owned(); let read_buf_cfg = rustls_split::BufCfg::with_data(buffer_data, 8192); let write_buf_cfg = rustls_split::BufCfg::with_capacity(8192); @@ -164,7 +155,7 @@ impl BidiStream { let socket = Arc::try_unwrap(reader.into_inner().0).unwrap(); let (read_half, write_half) = - rustls_split::split(socket, tls_boxed.session, read_buf_cfg, write_buf_cfg); + rustls_split::split(socket, tls_boxed.sess, read_buf_cfg, write_buf_cfg); (ReadStream::Tls(read_half), WriteStream::Tls(write_half)) } } @@ -175,7 +166,7 @@ impl BidiStream { Self::Tcp(mut stream) => { session.complete_io(&mut stream)?; assert!(!session.is_handshaking()); - Ok(Self::Tls(Box::new(TlsBoxed { stream, session }))) + Ok(Self::Tls(Box::new(TlsStream::new(session, stream)))) } Self::Tls { .. } => Err(io::Error::new( io::ErrorKind::InvalidInput, @@ -189,7 +180,7 @@ impl io::Read for BidiStream { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { Self::Tcp(stream) => stream.read(buf), - Self::Tls(tls_boxed) => tls_boxed.rustls_stream().read(buf), + Self::Tls(tls_boxed) => tls_boxed.read(buf), } } } @@ -198,14 +189,14 @@ impl io::Write for BidiStream { fn write(&mut self, buf: &[u8]) -> io::Result { match self { Self::Tcp(stream) => stream.write(buf), - Self::Tls(tls_boxed) => tls_boxed.rustls_stream().write(buf), + Self::Tls(tls_boxed) => tls_boxed.write(buf), } } fn flush(&mut self) -> io::Result<()> { match self { Self::Tcp(stream) => stream.flush(), - Self::Tls(tls_boxed) => tls_boxed.rustls_stream().flush(), + Self::Tls(tls_boxed) => tls_boxed.flush(), } } }