diff --git a/Cargo.lock b/Cargo.lock index ef1da386e4..672a443792 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -740,6 +740,9 @@ name = "cc" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +dependencies = [ + "jobserver", +] [[package]] name = "cexpr" @@ -922,6 +925,7 @@ dependencies = [ "url", "utils", "workspace_hack", + "zstd", ] [[package]] @@ -1975,6 +1979,15 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +[[package]] +name = "jobserver" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.63" @@ -5296,6 +5309,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bytes", + "cc", "chrono", "clap", "clap_builder", @@ -5396,3 +5410,33 @@ name = "zeroize" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9" + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.8+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index 11121ecc90..08dcc21c7a 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -34,3 +34,4 @@ utils.workspace = true workspace_hack.workspace = true toml_edit.workspace = true remote_storage = { version = "0.1", path = "../libs/remote_storage/" } +zstd = "0.12.4" diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index fc000bd088..3851af2fdb 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -30,11 +30,10 @@ //! -C 'postgresql://cloud_admin@localhost/postgres' \ //! -S /var/db/postgres/specs/current.json \ //! -b /usr/local/bin/postgres \ -//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000", -//! (optionally) "key": "AWS_SECRET_ACCESS_KEY", "id": "AWS_ACCESS_KEY_ID"} +//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000"} //! ``` //! -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fs::File; use std::panic; use std::path::Path; @@ -52,7 +51,6 @@ use compute_api::responses::ComputeStatus; use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec}; use compute_tools::configurator::launch_configurator; -use compute_tools::extension_server::launch_download_extensions; use compute_tools::extension_server::{get_pg_version, init_remote_storage}; use compute_tools::http::api::launch_http_server; use compute_tools::logger::*; @@ -60,12 +58,14 @@ use compute_tools::monitor::launch_monitor; use compute_tools::params::*; use compute_tools::spec::*; -const BUILD_TAG_DEFAULT: &str = "local"; +const BUILD_TAG_DEFAULT: &str = "111"; // TODO: change back to local; I need 111 for my test fn main() -> Result<()> { init_tracing_and_logging(DEFAULT_LOG_LEVEL)?; - let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT); + let build_tag = option_env!("BUILD_TAG") + .unwrap_or(BUILD_TAG_DEFAULT) + .to_string(); info!("build_tag: {build_tag}"); let matches = cli().get_matches(); @@ -74,8 +74,7 @@ fn main() -> Result<()> { let remote_ext_config = matches.get_one::("remote-ext-config"); let ext_remote_storage = remote_ext_config.map(|x| { - init_remote_storage(x, build_tag) - .expect("cannot initialize remote extension storage from config") + init_remote_storage(x).expect("cannot initialize remote extension storage from config") }); let http_port = *matches @@ -195,7 +194,9 @@ fn main() -> Result<()> { state: Mutex::new(new_state), state_changed: Condvar::new(), ext_remote_storage, - available_extensions: OnceLock::new(), + ext_remote_paths: OnceLock::new(), + already_downloaded_extensions: Mutex::new(HashSet::new()), + build_tag, }; let compute = Arc::new(compute_node); @@ -243,9 +244,6 @@ fn main() -> Result<()> { let _configurator_handle = launch_configurator(&compute).expect("cannot launch configurator thread"); - let _download_extensions_handle = - launch_download_extensions(&compute).expect("cannot launch download extensions thread"); - // Start Postgres let mut delay_exit = false; let mut exit_code = None; diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 63ef984ede..4f227bc8ae 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1,3 +1,4 @@ +use std::collections::HashMap; use std::collections::HashSet; use std::fs; use std::io::BufRead; @@ -21,7 +22,7 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus}; use compute_api::spec::{ComputeMode, ComputeSpec}; use utils::measured_stream::MeasuredReader; -use remote_storage::GenericRemoteStorage; +use remote_storage::{GenericRemoteStorage, RemotePath}; use crate::pg_helpers::*; use crate::spec::*; @@ -55,8 +56,10 @@ pub struct ComputeNode { pub state_changed: Condvar, /// the S3 bucket that we search for extensions in pub ext_remote_storage: Option, - // cached lists of available extensions and libraries - pub available_extensions: OnceLock>, + // (key: extension name, value: path to extension archive in remote storage) + pub ext_remote_paths: OnceLock>, + pub already_downloaded_extensions: Mutex>, + pub build_tag: String, } #[derive(Clone, Debug)] @@ -735,23 +738,23 @@ LIMIT 100", // If remote extension storage is configured, // download extension control files - #[tokio::main] pub async fn prepare_external_extensions(&self, compute_state: &ComputeState) -> Result<()> { if let Some(ref ext_remote_storage) = self.ext_remote_storage { let pspec = compute_state.pspec.as_ref().expect("spec must be set"); let spec = &pspec.spec; - let custom_ext_prefixes = spec.custom_extensions.clone().unwrap_or(Vec::new()); - info!("custom_ext_prefixes: {:?}", &custom_ext_prefixes); - let available_extensions = extension_server::get_available_extensions( + let custom_ext = spec.custom_extensions.clone().unwrap_or(Vec::new()); + info!("custom extensions: {:?}", &custom_ext); + let ext_remote_paths = extension_server::get_available_extensions( ext_remote_storage, &self.pgbin, &self.pgversion, - &custom_ext_prefixes, + &custom_ext, + &self.build_tag, ) .await?; - self.available_extensions - .set(available_extensions) - .expect("available_extensions.set error"); + self.ext_remote_paths + .set(ext_remote_paths) + .expect("ext_remote_paths.set error"); } Ok(()) } @@ -760,11 +763,31 @@ LIMIT 100", match &self.ext_remote_storage { None => anyhow::bail!("No remote extension storage"), Some(remote_storage) => { + // TODO: eliminate useless LOAD Library calls to this function (if possible) + // not clear that we can distinguish between useful and useless + // library calls better than the below code + let ext_name = ext_name.replace(".so", ""); + { + let mut already_downloaded_extensions = + self.already_downloaded_extensions.lock().expect("bad lock"); + if already_downloaded_extensions.contains(&ext_name) { + info!( + "extension {:?} already exists, skipping download", + &ext_name + ); + return Ok(()); + } else { + already_downloaded_extensions.insert(ext_name.clone()); + } + } extension_server::download_extension( - ext_name, + &ext_name, + &self + .ext_remote_paths + .get() + .expect("error accessing ext_remote_paths")[&ext_name], remote_storage, &self.pgbin, - &self.pgversion, ) .await } @@ -809,6 +832,9 @@ LIMIT 100", libs_vec.extend(preload_libs_vec); } + info!("Download ext_index.json, find the extension paths"); + self.prepare_external_extensions(compute_state).await?; + info!("Downloading to shared preload libraries: {:?}", &libs_vec); let mut download_tasks = Vec::new(); for library in &libs_vec { diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs index f32fa16990..ee6580ae36 100644 --- a/compute_tools/src/extension_server.rs +++ b/compute_tools/src/extension_server.rs @@ -1,35 +1,75 @@ +// AT LEAST download_extension / get_available_extensions need large changes +// refactor custom_extensions to be clear that we have moved the access logic to control plane + // Download extension files from the extension store // and put them in the right place in the postgres directory /* The layout of the S3 bucket is as follows: +├── 111 + │   ├── v14 + │   │   ├── extensions + │   │   │   ├── anon.tar.zst + │   │   │   └── embedding.tar.zst + │   │   └── ext_index.json + │   └── v15 + │   ├── extensions + │   │   ├── anon.tar.zst + │   │   └── embedding.tar.zst + │   └── ext_index.json + ├── 112 + │   ├── v14 + │   │   ├── extensions + │   │   │   └── anon.tar.zst + │   │   └── ext_index.json + │   └── v15 + │   ├── extensions + │   │   └── anon.tar.zst + │   └── ext_index.json + └── 113 + ├── v14 + │   ├── extensions + │   │   └── embedding.tar.zst + │   └── ext_index.json + └── v15 + ├── extensions + │   └── embedding.tar.zst + └── ext_index.json -v14/ext_index.json - -- this contains information necessary to create control files -v14/extensions/test_ext1.tar.gz - -- this contains the library files and sql files necessary to create this extension -v14/extensions/custom_ext1.tar.gz +Note that build number cannot be part of prefix because we might need extensions +from other build numbers. -The difference between a private and public extensions is determined by who can -load the extension this is specified in ext_index.json +ext_index.json stores the control files and location of extension archives -Speicially, ext_index.json has a list of public extensions, and a list of -extensions enabled for specific tenant-ids. +We do not duplicate extension.tar.zst files. +We only upload a new one if it is updated. +*access* is controlled by spec + +More specifically, here is an example ext_index.json +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "111/v14/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "111/v14/extensions/anon.tar.zst" + } +} */ -use crate::compute::ComputeNode; use anyhow::Context; use anyhow::{self, Result}; -use flate2::read::GzDecoder; +use futures::future::join_all; use remote_storage::*; use serde_json::{self, Value}; -use std::collections::HashSet; +use std::collections::HashMap; +use std::io::Read; use std::num::{NonZeroU32, NonZeroUsize}; use std::path::Path; use std::str; -use std::sync::Arc; -use std::thread; use tar::Archive; use tokio::io::AsyncReadExt; use tracing::info; +use zstd::stream::read::Decoder; fn get_pg_config(argument: &str, pgbin: &str) -> String { // gives the result of `pg_config [argument]` @@ -61,133 +101,111 @@ pub fn get_pg_version(pgbin: &str) -> String { panic!("Unsuported postgres version {human_version}"); } -// download extension control files -// if custom_ext_prefixes is provided - search also in custom extension paths +// download control files for enabled_extensions +// return the paths in s3 to the archives containing the actual extension files +// for use in creating the extension pub async fn get_available_extensions( remote_storage: &GenericRemoteStorage, pgbin: &str, pg_version: &str, - custom_ext_prefixes: &[String], -) -> Result> { + enabled_extensions: &[String], + build_tag: &str, +) -> Result> { let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"); - let index_path = pg_version.to_owned() + "/ext_index.json"; + let index_path = format!("{build_tag}/{pg_version}/ext_index.json"); let index_path = RemotePath::new(Path::new(&index_path)).context("error forming path")?; - info!("download ext_index.json: {:?}", &index_path); + info!("download ext_index.json from: {:?}", &index_path); - // TODO: potential optimization: cache ext_index.json let mut download = remote_storage.download(&index_path).await?; - let mut write_data_buffer = Vec::new(); + let mut ext_idx_buffer = Vec::new(); download .download_stream - .read_to_end(&mut write_data_buffer) + .read_to_end(&mut ext_idx_buffer) .await?; - let ext_index_str = match str::from_utf8(&write_data_buffer) { - Ok(v) => v, - Err(e) => panic!("Invalid UTF-8 sequence: {}", e), - }; - + let ext_index_str = str::from_utf8(&ext_idx_buffer).expect("error parsing json"); let ext_index_full: Value = serde_json::from_str(ext_index_str)?; let ext_index_full = ext_index_full.as_object().context("error parsing json")?; - let control_data = ext_index_full["control_data"] - .as_object() - .context("json parse error")?; - let enabled_extensions = ext_index_full["enabled_extensions"] - .as_object() - .context("json parse error")?; - info!("{:?}", control_data.clone()); - info!("{:?}", enabled_extensions.clone()); + info!("ext_index: {:?}", &ext_index_full); - let mut prefixes = vec!["public".to_string()]; - prefixes.extend(custom_ext_prefixes.to_owned()); - info!("{:?}", &prefixes); - let mut all_extensions = HashSet::new(); - for prefix in prefixes { - let prefix_extensions = match enabled_extensions.get(&prefix) { - Some(Value::Array(ext_name)) => ext_name, - _ => { - info!("prefix {} has no extensions", prefix); - continue; - } - }; - info!("{:?}", prefix_extensions); - for ext_name in prefix_extensions { - all_extensions.insert(ext_name.as_str().context("json parse error")?.to_string()); - } + info!("enabled_extensions: {:?}", enabled_extensions); + let mut ext_remote_paths = HashMap::new(); + let mut file_create_tasks = Vec::new(); + for extension in enabled_extensions { + let ext_data = ext_index_full[extension] + .as_object() + .context("error parsing json")?; + let control_contents = ext_data["control_file_content"] + .as_str() + .context("error parsing json")?; + let control_path = local_sharedir.join(extension.to_owned() + ".control"); + info!("writing file {:?}{:?}", control_path, control_contents); + file_create_tasks.push(tokio::fs::write(control_path, control_contents)); + + let ext_archive_path = ext_data["extension_archive"] + .as_str() + .context("error parsing json")?; + ext_remote_paths.insert( + extension.to_string(), + RemotePath::from_string(ext_archive_path)?, + ); } - - for prefix in &all_extensions { - let control_contents = control_data[prefix].as_str().context("json parse error")?; - let control_path = local_sharedir.join(prefix.to_owned() + ".control"); - - info!("WRITING FILE {:?}{:?}", control_path, control_contents); - std::fs::write(control_path, control_contents)?; + let results = join_all(file_create_tasks).await; + for result in results { + result?; } - - Ok(all_extensions.into_iter().collect()) + Ok(ext_remote_paths) } // download all sqlfiles (and possibly data files) for a given extension name pub async fn download_extension( ext_name: &str, + ext_path: &RemotePath, remote_storage: &GenericRemoteStorage, pgbin: &str, - pg_version: &str, ) -> Result<()> { - // TODO: potential optimization: only download the extension if it doesn't exist - // problem: how would we tell if it exists? - let ext_name = ext_name.replace(".so", ""); - let ext_name_targz = ext_name.to_owned() + ".tar.gz"; - if Path::new(&ext_name_targz).exists() { - info!("extension {:?} already exists", ext_name_targz); - return Ok(()); - } - let ext_path = RemotePath::new( - &Path::new(pg_version) - .join("extensions") - .join(ext_name_targz.clone()), - )?; - info!( - "Start downloading extension {:?} from {:?}", - ext_name, ext_path - ); - let mut download = remote_storage.download(&ext_path).await?; - let mut write_data_buffer = Vec::new(); + info!("Download extension {:?} from {:?}", ext_name, ext_path); + let mut download = remote_storage.download(ext_path).await?; + let mut download_buffer = Vec::new(); download .download_stream - .read_to_end(&mut write_data_buffer) + .read_to_end(&mut download_buffer) .await?; - let unzip_dest = pgbin.strip_suffix("/bin/postgres").expect("bad pgbin"); - let tar = GzDecoder::new(write_data_buffer.as_slice()); - let mut archive = Archive::new(tar); - archive.unpack(unzip_dest)?; + let mut decoder = Decoder::new(download_buffer.as_slice())?; + let mut decompress_buffer = Vec::new(); + decoder.read_to_end(&mut decompress_buffer)?; + let mut archive = Archive::new(decompress_buffer.as_slice()); + let unzip_dest = pgbin + .strip_suffix("/bin/postgres") + .expect("bad pgbin") + .to_string() + + "/download_extensions"; + archive.unpack(&unzip_dest)?; info!("Download + unzip {:?} completed successfully", &ext_path); - let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"); - let zip_sharedir = format!("{unzip_dest}/extensions/{ext_name}/share/extension"); - info!("mv {zip_sharedir:?}/* {local_sharedir:?}"); - for file in std::fs::read_dir(zip_sharedir)? { - let old_file = file?.path(); - let new_file = - Path::new(&local_sharedir).join(old_file.file_name().context("error parsing file")?); - std::fs::rename(old_file, new_file)?; - } - let local_libdir = Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql"); - let zip_libdir = format!("{unzip_dest}/extensions/{ext_name}/lib"); - info!("mv {zip_libdir:?}/* {local_libdir:?}"); - for file in std::fs::read_dir(zip_libdir)? { - let old_file = file?.path(); - let new_file = - Path::new(&local_libdir).join(old_file.file_name().context("error parsing file")?); - std::fs::rename(old_file, new_file)?; + let sharedir_paths = ( + format!("{unzip_dest}/{ext_name}/share/extension"), + Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"), + ); + let libdir_paths = ( + format!("{unzip_dest}/{ext_name}/lib"), + Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql"), + ); + // move contents of the libdir / sharedir in unzipped archive to the correct local paths + for paths in [sharedir_paths, libdir_paths] { + let (zip_dir, real_dir) = paths; + info!("mv {zip_dir:?}/* {real_dir:?}"); + for file in std::fs::read_dir(zip_dir)? { + let old_file = file?.path(); + let new_file = + Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?); + std::fs::rename(old_file, new_file)?; + } } Ok(()) } // This function initializes the necessary structs to use remote storage (should be fairly cheap) -pub fn init_remote_storage( - remote_ext_config: &str, - default_prefix: &str, -) -> anyhow::Result { +pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result { let remote_ext_config: serde_json::Value = serde_json::from_str(remote_ext_config)?; let remote_ext_bucket = remote_ext_config["bucket"] @@ -199,19 +217,9 @@ pub fn init_remote_storage( let remote_ext_endpoint = remote_ext_config["endpoint"].as_str(); let remote_ext_prefix = remote_ext_config["prefix"] .as_str() - .unwrap_or(default_prefix) + .unwrap_or_default() .to_string(); - // control plane passes the aws creds via CLI ARGS to compute_ctl - let aws_key = remote_ext_config["key"].as_str(); - let aws_id = remote_ext_config["id"].as_str(); - if let Some(aws_key) = aws_key { - if let Some(aws_id) = aws_id { - std::env::set_var("AWS_SECRET_ACCESS_KEY", aws_key); - std::env::set_var("AWS_ACCESS_KEY_ID", aws_id); - } - } - // If needed, it is easy to allow modification of other parameters // however, default values should be fine for now let config = S3Config { @@ -229,19 +237,3 @@ pub fn init_remote_storage( }; GenericRemoteStorage::from_config(&config) } - -pub fn launch_download_extensions( - compute: &Arc, -) -> Result, std::io::Error> { - let compute = Arc::clone(compute); - thread::Builder::new() - .name("download-extensions".into()) - .spawn(move || { - info!("start download_extension_files"); - let compute_state = compute.state.lock().expect("error unlocking compute.state"); - compute - .prepare_external_extensions(&compute_state) - .expect("error preparing extensions"); - info!("download_extension_files done, exiting thread"); - }) -} diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 35e863b98e..dd78da59fb 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -499,9 +499,7 @@ impl Endpoint { // // The proper way to implement this is to pass the custom extension // in spec, but we don't have a way to do that yet in the python tests. - // NEW HACK: we enable the anon custom extension for everyone! this is of course just for testing - // how will we do it for real? - custom_extensions: Some(vec!["123454321".to_string(), self.tenant_id.to_string()]), + custom_extensions: Some(vec!["embedding".into(), "anon".into()]), }; let spec_path = self.endpoint_path().join("spec.json"); std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?; diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 5e311b3cdc..1ddd156a08 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -65,6 +65,10 @@ impl RemotePath { Ok(Self(relative_path.to_path_buf())) } + pub fn from_string(relative_path: &str) -> anyhow::Result { + Self::new(Path::new(relative_path)) + } + pub fn with_base(&self, base_path: &Path) -> PathBuf { base_path.join(&self.0) } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 0bb40d36c3..fc3ab838a8 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -721,7 +721,10 @@ class NeonEnvBuilder: self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage")) def enable_mock_s3_remote_storage( - self, bucket_name: str, force_enable: bool = True, enable_remote_extensions: bool = False + self, + bucket_name: str, + force_enable: bool = True, + enable_remote_extensions: bool = False, ): """ Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already. @@ -763,7 +766,10 @@ class NeonEnvBuilder: ) def enable_real_s3_remote_storage( - self, test_name: str, force_enable: bool = True, enable_remote_extensions: bool = False + self, + test_name: str, + force_enable: bool = True, + enable_remote_extensions: bool = False, ): """ Sets up configuration to use real s3 endpoint without mock server @@ -810,7 +816,7 @@ class NeonEnvBuilder: bucket_region="eu-central-1", access_key=access_key, secret_key=secret_key, - prefix_in_bucket="5555", + prefix_in_bucket="", ) def cleanup_local_storage(self): diff --git a/test_runner/regress/data/extension_test/111/v14/ext_index.json b/test_runner/regress/data/extension_test/111/v14/ext_index.json new file mode 100644 index 0000000000..b5365ceea0 --- /dev/null +++ b/test_runner/regress/data/extension_test/111/v14/ext_index.json @@ -0,0 +1,10 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "111/v14/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "111/v14/extensions/anon.tar.zst" + } +} diff --git a/test_runner/regress/data/extension_test/111/v14/extensions/anon.tar.zst b/test_runner/regress/data/extension_test/111/v14/extensions/anon.tar.zst new file mode 100644 index 0000000000..24a08ec9aa Binary files /dev/null and b/test_runner/regress/data/extension_test/111/v14/extensions/anon.tar.zst differ diff --git a/test_runner/regress/data/extension_test/111/v14/extensions/embedding.tar.zst b/test_runner/regress/data/extension_test/111/v14/extensions/embedding.tar.zst new file mode 100644 index 0000000000..f522afc742 Binary files /dev/null and b/test_runner/regress/data/extension_test/111/v14/extensions/embedding.tar.zst differ diff --git a/test_runner/regress/data/extension_test/111/v15/ext_index.json b/test_runner/regress/data/extension_test/111/v15/ext_index.json new file mode 100644 index 0000000000..acd631accb --- /dev/null +++ b/test_runner/regress/data/extension_test/111/v15/ext_index.json @@ -0,0 +1,11 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "111/v15/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "111/v15/extensions/anon.tar.zst" + } +} + diff --git a/test_runner/regress/data/extension_test/111/v15/extensions/anon.tar.zst b/test_runner/regress/data/extension_test/111/v15/extensions/anon.tar.zst new file mode 100644 index 0000000000..045d7be049 Binary files /dev/null and b/test_runner/regress/data/extension_test/111/v15/extensions/anon.tar.zst differ diff --git a/test_runner/regress/data/extension_test/111/v15/extensions/embedding.tar.zst b/test_runner/regress/data/extension_test/111/v15/extensions/embedding.tar.zst new file mode 100644 index 0000000000..f33fd48a66 Binary files /dev/null and b/test_runner/regress/data/extension_test/111/v15/extensions/embedding.tar.zst differ diff --git a/test_runner/regress/data/extension_test/112/v14/ext_index.json b/test_runner/regress/data/extension_test/112/v14/ext_index.json new file mode 100644 index 0000000000..3384d733f1 --- /dev/null +++ b/test_runner/regress/data/extension_test/112/v14/ext_index.json @@ -0,0 +1,10 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "111/v14/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "112/v14/extensions/anon.tar.zst" + } +} diff --git a/test_runner/regress/data/extension_test/112/v14/extensions/anon.tar.zst b/test_runner/regress/data/extension_test/112/v14/extensions/anon.tar.zst new file mode 100644 index 0000000000..24a08ec9aa Binary files /dev/null and b/test_runner/regress/data/extension_test/112/v14/extensions/anon.tar.zst differ diff --git a/test_runner/regress/data/extension_test/112/v15/ext_index.json b/test_runner/regress/data/extension_test/112/v15/ext_index.json new file mode 100644 index 0000000000..a73cba0523 --- /dev/null +++ b/test_runner/regress/data/extension_test/112/v15/ext_index.json @@ -0,0 +1,11 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "111/v15/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "112/v15/extensions/anon.tar.zst" + } +} + diff --git a/test_runner/regress/data/extension_test/112/v15/extensions/anon.tar.zst b/test_runner/regress/data/extension_test/112/v15/extensions/anon.tar.zst new file mode 100644 index 0000000000..045d7be049 Binary files /dev/null and b/test_runner/regress/data/extension_test/112/v15/extensions/anon.tar.zst differ diff --git a/test_runner/regress/data/extension_test/113/v14/ext_index.json b/test_runner/regress/data/extension_test/113/v14/ext_index.json new file mode 100644 index 0000000000..6a54e78322 --- /dev/null +++ b/test_runner/regress/data/extension_test/113/v14/ext_index.json @@ -0,0 +1,10 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "113/v14/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "111/v14/extensions/anon.tar.zst" + } +} diff --git a/test_runner/regress/data/extension_test/113/v14/extensions/embedding.tar.zst b/test_runner/regress/data/extension_test/113/v14/extensions/embedding.tar.zst new file mode 100644 index 0000000000..f522afc742 Binary files /dev/null and b/test_runner/regress/data/extension_test/113/v14/extensions/embedding.tar.zst differ diff --git a/test_runner/regress/data/extension_test/113/v15/ext_index.json b/test_runner/regress/data/extension_test/113/v15/ext_index.json new file mode 100644 index 0000000000..f193cff9d2 --- /dev/null +++ b/test_runner/regress/data/extension_test/113/v15/ext_index.json @@ -0,0 +1,11 @@ +{ + "embedding": { + "control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", + "extension_archive": "113/v15/extensions/embedding.tar.zst" + }, + "anon": { + "control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n", + "extension_archive": "111/v15/extensions/anon.tar.zst" + } +} + diff --git a/test_runner/regress/data/extension_test/113/v15/extensions/embedding.tar.zst b/test_runner/regress/data/extension_test/113/v15/extensions/embedding.tar.zst new file mode 100644 index 0000000000..f33fd48a66 Binary files /dev/null and b/test_runner/regress/data/extension_test/113/v15/extensions/embedding.tar.zst differ diff --git a/test_runner/regress/data/extension_test/v14/anon.tar.gz b/test_runner/regress/data/extension_test/v14/anon.tar.gz deleted file mode 100644 index 4c7959fe8b..0000000000 Binary files a/test_runner/regress/data/extension_test/v14/anon.tar.gz and /dev/null differ diff --git a/test_runner/regress/data/extension_test/v14/embedding.tar.gz b/test_runner/regress/data/extension_test/v14/embedding.tar.gz deleted file mode 100644 index 98ba0a5c2c..0000000000 Binary files a/test_runner/regress/data/extension_test/v14/embedding.tar.gz and /dev/null differ diff --git a/test_runner/regress/data/extension_test/v14/ext_index.json b/test_runner/regress/data/extension_test/v14/ext_index.json deleted file mode 100644 index dd84369e30..0000000000 --- a/test_runner/regress/data/extension_test/v14/ext_index.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "enabled_extensions": { - "123454321": [ - "anon" - ], - "public": [ - "embedding" - ] - }, - "control_data": { - "embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", - "anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n" - } -} diff --git a/test_runner/regress/data/extension_test/v15/anon.tar.gz b/test_runner/regress/data/extension_test/v15/anon.tar.gz deleted file mode 100644 index 8c4fc44967..0000000000 Binary files a/test_runner/regress/data/extension_test/v15/anon.tar.gz and /dev/null differ diff --git a/test_runner/regress/data/extension_test/v15/embedding.tar.gz b/test_runner/regress/data/extension_test/v15/embedding.tar.gz deleted file mode 100644 index 4fa980ba13..0000000000 Binary files a/test_runner/regress/data/extension_test/v15/embedding.tar.gz and /dev/null differ diff --git a/test_runner/regress/data/extension_test/v15/ext_index.json b/test_runner/regress/data/extension_test/v15/ext_index.json deleted file mode 100644 index 7fa10701f4..0000000000 --- a/test_runner/regress/data/extension_test/v15/ext_index.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "enabled_extensions": { - "123454321": [ - "anon" - ], - "public": [ - "embedding" - ] - }, - "control_data": { - "embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true", - "anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n" - } -} \ No newline at end of file diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py index 2ce96eb7c0..57c0f37bf7 100644 --- a/test_runner/regress/test_download_extensions.py +++ b/test_runner/regress/test_download_extensions.py @@ -11,21 +11,16 @@ from fixtures.neon_fixtures import ( ) from fixtures.pg_version import PgVersion -# Generate mock extension files and upload them to the mock bucket. -# -# NOTE: You must have appropriate AWS credentials to run REAL_S3 test. -# It may also be necessary to set the following environment variables for MOCK_S3 test: -# export AWS_ACCESS_KEY_ID='test' # export AWS_SECRET_ACCESS_KEY='test' -# export AWS_SECURITY_TOKEN='test' # export AWS_SESSION_TOKEN='test' -# export AWS_DEFAULT_REGION='us-east-1' - +# Test downloading remote extension. @pytest.mark.parametrize("remote_storage_kind", available_s3_storages()) def test_remote_extensions( neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind, pg_version: PgVersion, ): + if remote_storage_kind == RemoteStorageKind.REAL_S3: + return None neon_env_builder.enable_remote_storage( remote_storage_kind=remote_storage_kind, test_name="test_remote_extensions", @@ -36,87 +31,84 @@ def test_remote_extensions( tenant_id, _ = env.neon_cli.create_tenant() env.neon_cli.create_timeline("test_remote_extensions", tenant_id=tenant_id) + assert env.ext_remote_storage is not None # satisfy mypy + assert env.remote_storage_client is not None # satisfy mypy + # For MOCK_S3 we upload test files. # For REAL_S3 we use the files already in the bucket if remote_storage_kind == RemoteStorageKind.MOCK_S3: log.info("Uploading test files to mock bucket") + os.chdir("test_runner/regress/data/extension_test") + for path in os.walk("."): + prefix, _, files = path + for file in files: + # the [2:] is to remove the leading "./" + full_path = os.path.join(prefix, file)[2:] - def upload_test_file(from_path, to_path): - assert env.ext_remote_storage is not None # satisfy mypy - assert env.remote_storage_client is not None # satisfy mypy - with open( - f"test_runner/regress/data/extension_test/v{pg_version}/{from_path}", "rb" - ) as f: - env.remote_storage_client.upload_fileobj( - f, - env.ext_remote_storage.bucket_name, - f"ext/v{pg_version}/{to_path}", - ) + with open(full_path, "rb") as f: + log.info(f"UPLOAD {full_path} to ext/{full_path}") + env.remote_storage_client.upload_fileobj( + f, + env.ext_remote_storage.bucket_name, + f"ext/{full_path}", + ) + os.chdir("../../../..") + # Start a compute node and check that it can download the extensions + # and use them to CREATE EXTENSION and LOAD + endpoint = env.endpoints.create_start( + "test_remote_extensions", + tenant_id=tenant_id, + remote_ext_config=env.ext_remote_storage.to_string(), + # config_lines=["log_min_messages=debug3"], + ) + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + # Check that appropriate control files were downloaded + cur.execute("SELECT * FROM pg_available_extensions") + all_extensions = [x[0] for x in cur.fetchall()] + log.info(all_extensions) + assert "anon" in all_extensions + assert "embedding" in all_extensions + # TODO: check that we cant't download custom extensions for other tenant ids - upload_test_file("ext_index.json", "ext_index.json") - upload_test_file("anon.tar.gz", "extensions/anon.tar.gz") - upload_test_file("embedding.tar.gz", "extensions/embedding.tar.gz") + # check that we can download public extension + cur.execute("CREATE EXTENSION embedding") + cur.execute("SELECT extname FROM pg_extension") + assert "embedding" in [x[0] for x in cur.fetchall()] - assert env.ext_remote_storage is not None # satisfy mypy - assert env.remote_storage_client is not None # satisfy mypy - try: - # Start a compute node and check that it can download the extensions - # and use them to CREATE EXTENSION and LOAD - endpoint = env.endpoints.create_start( - "test_remote_extensions", - tenant_id=tenant_id, - remote_ext_config=env.ext_remote_storage.to_string(), - # config_lines=["log_min_messages=debug3"], - ) - with closing(endpoint.connect()) as conn: - with conn.cursor() as cur: - # Check that appropriate control files were downloaded - cur.execute("SELECT * FROM pg_available_extensions") - all_extensions = [x[0] for x in cur.fetchall()] - log.info(all_extensions) - assert "anon" in all_extensions - assert "embedding" in all_extensions - # TODO: check that we cant't download custom extensions for other tenant ids - - # check that we can download public extension - cur.execute("CREATE EXTENSION embedding") - cur.execute("SELECT extname FROM pg_extension") - assert "embedding" in [x[0] for x in cur.fetchall()] - - # check that we can download private extension - try: - cur.execute("CREATE EXTENSION anon") - except Exception as err: - log.info("error creating anon extension") - assert "pgcrypto" in str(err), "unexpected error creating anon extension" - - # TODO: try to load libraries as well - - finally: - # Cleaning up downloaded files is important for local tests - # or else one test could reuse the files from another test or another test run - cleanup_files = [ - "lib/postgresql/anon.so", - "lib/postgresql/embedding.so", - "share/postgresql/extension/anon.control", - "share/postgresql/extension/embedding--0.1.0.sql", - "share/postgresql/extension/embedding.control", - ] - cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files] - cleanup_folders = [ - "extensions", - f"pg_install/v{pg_version}/share/postgresql/extension/anon", - f"pg_install/v{pg_version}/extensions", - ] - for file in cleanup_files: + # check that we can download private extension try: - os.remove(file) - log.info(f"removed file {file}") + cur.execute("CREATE EXTENSION anon") except Exception as err: - log.info(f"error removing file {file}: {err}") - for folder in cleanup_folders: - try: - shutil.rmtree(folder) - log.info(f"removed folder {folder}") - except Exception as err: - log.info(f"error removing file {file}: {err}") + log.info("error creating anon extension") + assert "pgcrypto" in str(err), "unexpected error creating anon extension" + + # TODO: try to load libraries as well + + # Cleaning up downloaded files is important for local tests + # or else one test could reuse the files from another test or another test run + cleanup_files = [ + "lib/postgresql/anon.so", + "lib/postgresql/embedding.so", + "share/postgresql/extension/anon.control", + "share/postgresql/extension/embedding--0.1.0.sql", + "share/postgresql/extension/embedding.control", + ] + cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files] + cleanup_folders = [ + "extensions", + f"pg_install/v{pg_version}/share/postgresql/extension/anon", + f"pg_install/v{pg_version}/extensions", + ] + for file in cleanup_files: + try: + os.remove(file) + log.info(f"removed file {file}") + except Exception as err: + log.info(f"error removing file {file}: {err}") + for folder in cleanup_folders: + try: + shutil.rmtree(folder) + log.info(f"removed folder {folder}") + except Exception as err: + log.info(f"error removing file {file}: {err}") diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 3f47ef062f..d79c7a4104 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -60,6 +60,7 @@ url = { version = "2", features = ["serde"] } [build-dependencies] anyhow = { version = "1", features = ["backtrace"] } bytes = { version = "1", features = ["serde"] } +cc = { version = "1", default-features = false, features = ["parallel"] } either = { version = "1" } itertools = { version = "0.10" } libc = { version = "0.2", features = ["extra_traits"] }