Add Versioning + move Access to control plane for remote ext (#4760)

This commit is contained in:
Alek Westover
2023-07-20 12:55:07 -04:00
parent 285f687e1b
commit fcc57f49d1
30 changed files with 373 additions and 276 deletions

44
Cargo.lock generated
View File

@@ -740,6 +740,9 @@ name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
dependencies = [
"jobserver",
]
[[package]]
name = "cexpr"
@@ -922,6 +925,7 @@ dependencies = [
"url",
"utils",
"workspace_hack",
"zstd",
]
[[package]]
@@ -1975,6 +1979,15 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
[[package]]
name = "jobserver"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.63"
@@ -5296,6 +5309,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"bytes",
"cc",
"chrono",
"clap",
"clap_builder",
@@ -5396,3 +5410,33 @@ name = "zeroize"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
[[package]]
name = "zstd"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "6.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.8+zstd.1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
dependencies = [
"cc",
"libc",
"pkg-config",
]

View File

@@ -34,3 +34,4 @@ utils.workspace = true
workspace_hack.workspace = true
toml_edit.workspace = true
remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
zstd = "0.12.4"

View File

@@ -30,11 +30,10 @@
//! -C 'postgresql://cloud_admin@localhost/postgres' \
//! -S /var/db/postgres/specs/current.json \
//! -b /usr/local/bin/postgres \
//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000",
//! (optionally) "key": "AWS_SECRET_ACCESS_KEY", "id": "AWS_ACCESS_KEY_ID"}
//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000"}
//! ```
//!
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::fs::File;
use std::panic;
use std::path::Path;
@@ -52,7 +51,6 @@ use compute_api::responses::ComputeStatus;
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
use compute_tools::configurator::launch_configurator;
use compute_tools::extension_server::launch_download_extensions;
use compute_tools::extension_server::{get_pg_version, init_remote_storage};
use compute_tools::http::api::launch_http_server;
use compute_tools::logger::*;
@@ -60,12 +58,14 @@ use compute_tools::monitor::launch_monitor;
use compute_tools::params::*;
use compute_tools::spec::*;
const BUILD_TAG_DEFAULT: &str = "local";
const BUILD_TAG_DEFAULT: &str = "111"; // TODO: change back to local; I need 111 for my test
fn main() -> Result<()> {
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
let build_tag = option_env!("BUILD_TAG")
.unwrap_or(BUILD_TAG_DEFAULT)
.to_string();
info!("build_tag: {build_tag}");
let matches = cli().get_matches();
@@ -74,8 +74,7 @@ fn main() -> Result<()> {
let remote_ext_config = matches.get_one::<String>("remote-ext-config");
let ext_remote_storage = remote_ext_config.map(|x| {
init_remote_storage(x, build_tag)
.expect("cannot initialize remote extension storage from config")
init_remote_storage(x).expect("cannot initialize remote extension storage from config")
});
let http_port = *matches
@@ -195,7 +194,9 @@ fn main() -> Result<()> {
state: Mutex::new(new_state),
state_changed: Condvar::new(),
ext_remote_storage,
available_extensions: OnceLock::new(),
ext_remote_paths: OnceLock::new(),
already_downloaded_extensions: Mutex::new(HashSet::new()),
build_tag,
};
let compute = Arc::new(compute_node);
@@ -243,9 +244,6 @@ fn main() -> Result<()> {
let _configurator_handle =
launch_configurator(&compute).expect("cannot launch configurator thread");
let _download_extensions_handle =
launch_download_extensions(&compute).expect("cannot launch download extensions thread");
// Start Postgres
let mut delay_exit = false;
let mut exit_code = None;

View File

@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::io::BufRead;
@@ -21,7 +22,7 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
use compute_api::spec::{ComputeMode, ComputeSpec};
use utils::measured_stream::MeasuredReader;
use remote_storage::GenericRemoteStorage;
use remote_storage::{GenericRemoteStorage, RemotePath};
use crate::pg_helpers::*;
use crate::spec::*;
@@ -55,8 +56,10 @@ pub struct ComputeNode {
pub state_changed: Condvar,
/// the S3 bucket that we search for extensions in
pub ext_remote_storage: Option<GenericRemoteStorage>,
// cached lists of available extensions and libraries
pub available_extensions: OnceLock<HashSet<String>>,
// (key: extension name, value: path to extension archive in remote storage)
pub ext_remote_paths: OnceLock<HashMap<String, RemotePath>>,
pub already_downloaded_extensions: Mutex<HashSet<String>>,
pub build_tag: String,
}
#[derive(Clone, Debug)]
@@ -735,23 +738,23 @@ LIMIT 100",
// If remote extension storage is configured,
// download extension control files
#[tokio::main]
pub async fn prepare_external_extensions(&self, compute_state: &ComputeState) -> Result<()> {
if let Some(ref ext_remote_storage) = self.ext_remote_storage {
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
let spec = &pspec.spec;
let custom_ext_prefixes = spec.custom_extensions.clone().unwrap_or(Vec::new());
info!("custom_ext_prefixes: {:?}", &custom_ext_prefixes);
let available_extensions = extension_server::get_available_extensions(
let custom_ext = spec.custom_extensions.clone().unwrap_or(Vec::new());
info!("custom extensions: {:?}", &custom_ext);
let ext_remote_paths = extension_server::get_available_extensions(
ext_remote_storage,
&self.pgbin,
&self.pgversion,
&custom_ext_prefixes,
&custom_ext,
&self.build_tag,
)
.await?;
self.available_extensions
.set(available_extensions)
.expect("available_extensions.set error");
self.ext_remote_paths
.set(ext_remote_paths)
.expect("ext_remote_paths.set error");
}
Ok(())
}
@@ -760,11 +763,31 @@ LIMIT 100",
match &self.ext_remote_storage {
None => anyhow::bail!("No remote extension storage"),
Some(remote_storage) => {
// TODO: eliminate useless LOAD Library calls to this function (if possible)
// not clear that we can distinguish between useful and useless
// library calls better than the below code
let ext_name = ext_name.replace(".so", "");
{
let mut already_downloaded_extensions =
self.already_downloaded_extensions.lock().expect("bad lock");
if already_downloaded_extensions.contains(&ext_name) {
info!(
"extension {:?} already exists, skipping download",
&ext_name
);
return Ok(());
} else {
already_downloaded_extensions.insert(ext_name.clone());
}
}
extension_server::download_extension(
ext_name,
&ext_name,
&self
.ext_remote_paths
.get()
.expect("error accessing ext_remote_paths")[&ext_name],
remote_storage,
&self.pgbin,
&self.pgversion,
)
.await
}
@@ -809,6 +832,9 @@ LIMIT 100",
libs_vec.extend(preload_libs_vec);
}
info!("Download ext_index.json, find the extension paths");
self.prepare_external_extensions(compute_state).await?;
info!("Downloading to shared preload libraries: {:?}", &libs_vec);
let mut download_tasks = Vec::new();
for library in &libs_vec {

View File

@@ -1,35 +1,75 @@
// AT LEAST download_extension / get_available_extensions need large changes
// refactor custom_extensions to be clear that we have moved the access logic to control plane
// Download extension files from the extension store
// and put them in the right place in the postgres directory
/*
The layout of the S3 bucket is as follows:
├── 111
│   ├── v14
│   │   ├── extensions
│   │   │   ├── anon.tar.zst
│   │   │   └── embedding.tar.zst
│   │   └── ext_index.json
│   └── v15
│   ├── extensions
│   │   ├── anon.tar.zst
│   │   └── embedding.tar.zst
│   └── ext_index.json
├── 112
│   ├── v14
│   │   ├── extensions
│   │   │   └── anon.tar.zst
│   │   └── ext_index.json
│   └── v15
│   ├── extensions
│   │   └── anon.tar.zst
│   └── ext_index.json
└── 113
├── v14
│   ├── extensions
│   │   └── embedding.tar.zst
│   └── ext_index.json
└── v15
├── extensions
│   └── embedding.tar.zst
└── ext_index.json
v14/ext_index.json
-- this contains information necessary to create control files
v14/extensions/test_ext1.tar.gz
-- this contains the library files and sql files necessary to create this extension
v14/extensions/custom_ext1.tar.gz
Note that build number cannot be part of prefix because we might need extensions
from other build numbers.
The difference between a private and public extensions is determined by who can
load the extension this is specified in ext_index.json
ext_index.json stores the control files and location of extension archives
Speicially, ext_index.json has a list of public extensions, and a list of
extensions enabled for specific tenant-ids.
We do not duplicate extension.tar.zst files.
We only upload a new one if it is updated.
*access* is controlled by spec
More specifically, here is an example ext_index.json
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "111/v14/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "111/v14/extensions/anon.tar.zst"
}
}
*/
use crate::compute::ComputeNode;
use anyhow::Context;
use anyhow::{self, Result};
use flate2::read::GzDecoder;
use futures::future::join_all;
use remote_storage::*;
use serde_json::{self, Value};
use std::collections::HashSet;
use std::collections::HashMap;
use std::io::Read;
use std::num::{NonZeroU32, NonZeroUsize};
use std::path::Path;
use std::str;
use std::sync::Arc;
use std::thread;
use tar::Archive;
use tokio::io::AsyncReadExt;
use tracing::info;
use zstd::stream::read::Decoder;
fn get_pg_config(argument: &str, pgbin: &str) -> String {
// gives the result of `pg_config [argument]`
@@ -61,133 +101,111 @@ pub fn get_pg_version(pgbin: &str) -> String {
panic!("Unsuported postgres version {human_version}");
}
// download extension control files
// if custom_ext_prefixes is provided - search also in custom extension paths
// download control files for enabled_extensions
// return the paths in s3 to the archives containing the actual extension files
// for use in creating the extension
pub async fn get_available_extensions(
remote_storage: &GenericRemoteStorage,
pgbin: &str,
pg_version: &str,
custom_ext_prefixes: &[String],
) -> Result<HashSet<String>> {
enabled_extensions: &[String],
build_tag: &str,
) -> Result<HashMap<String, RemotePath>> {
let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
let index_path = pg_version.to_owned() + "/ext_index.json";
let index_path = format!("{build_tag}/{pg_version}/ext_index.json");
let index_path = RemotePath::new(Path::new(&index_path)).context("error forming path")?;
info!("download ext_index.json: {:?}", &index_path);
info!("download ext_index.json from: {:?}", &index_path);
// TODO: potential optimization: cache ext_index.json
let mut download = remote_storage.download(&index_path).await?;
let mut write_data_buffer = Vec::new();
let mut ext_idx_buffer = Vec::new();
download
.download_stream
.read_to_end(&mut write_data_buffer)
.read_to_end(&mut ext_idx_buffer)
.await?;
let ext_index_str = match str::from_utf8(&write_data_buffer) {
Ok(v) => v,
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
};
let ext_index_str = str::from_utf8(&ext_idx_buffer).expect("error parsing json");
let ext_index_full: Value = serde_json::from_str(ext_index_str)?;
let ext_index_full = ext_index_full.as_object().context("error parsing json")?;
let control_data = ext_index_full["control_data"]
.as_object()
.context("json parse error")?;
let enabled_extensions = ext_index_full["enabled_extensions"]
.as_object()
.context("json parse error")?;
info!("{:?}", control_data.clone());
info!("{:?}", enabled_extensions.clone());
info!("ext_index: {:?}", &ext_index_full);
let mut prefixes = vec!["public".to_string()];
prefixes.extend(custom_ext_prefixes.to_owned());
info!("{:?}", &prefixes);
let mut all_extensions = HashSet::new();
for prefix in prefixes {
let prefix_extensions = match enabled_extensions.get(&prefix) {
Some(Value::Array(ext_name)) => ext_name,
_ => {
info!("prefix {} has no extensions", prefix);
continue;
}
};
info!("{:?}", prefix_extensions);
for ext_name in prefix_extensions {
all_extensions.insert(ext_name.as_str().context("json parse error")?.to_string());
}
info!("enabled_extensions: {:?}", enabled_extensions);
let mut ext_remote_paths = HashMap::new();
let mut file_create_tasks = Vec::new();
for extension in enabled_extensions {
let ext_data = ext_index_full[extension]
.as_object()
.context("error parsing json")?;
let control_contents = ext_data["control_file_content"]
.as_str()
.context("error parsing json")?;
let control_path = local_sharedir.join(extension.to_owned() + ".control");
info!("writing file {:?}{:?}", control_path, control_contents);
file_create_tasks.push(tokio::fs::write(control_path, control_contents));
let ext_archive_path = ext_data["extension_archive"]
.as_str()
.context("error parsing json")?;
ext_remote_paths.insert(
extension.to_string(),
RemotePath::from_string(ext_archive_path)?,
);
}
for prefix in &all_extensions {
let control_contents = control_data[prefix].as_str().context("json parse error")?;
let control_path = local_sharedir.join(prefix.to_owned() + ".control");
info!("WRITING FILE {:?}{:?}", control_path, control_contents);
std::fs::write(control_path, control_contents)?;
let results = join_all(file_create_tasks).await;
for result in results {
result?;
}
Ok(all_extensions.into_iter().collect())
Ok(ext_remote_paths)
}
// download all sqlfiles (and possibly data files) for a given extension name
pub async fn download_extension(
ext_name: &str,
ext_path: &RemotePath,
remote_storage: &GenericRemoteStorage,
pgbin: &str,
pg_version: &str,
) -> Result<()> {
// TODO: potential optimization: only download the extension if it doesn't exist
// problem: how would we tell if it exists?
let ext_name = ext_name.replace(".so", "");
let ext_name_targz = ext_name.to_owned() + ".tar.gz";
if Path::new(&ext_name_targz).exists() {
info!("extension {:?} already exists", ext_name_targz);
return Ok(());
}
let ext_path = RemotePath::new(
&Path::new(pg_version)
.join("extensions")
.join(ext_name_targz.clone()),
)?;
info!(
"Start downloading extension {:?} from {:?}",
ext_name, ext_path
);
let mut download = remote_storage.download(&ext_path).await?;
let mut write_data_buffer = Vec::new();
info!("Download extension {:?} from {:?}", ext_name, ext_path);
let mut download = remote_storage.download(ext_path).await?;
let mut download_buffer = Vec::new();
download
.download_stream
.read_to_end(&mut write_data_buffer)
.read_to_end(&mut download_buffer)
.await?;
let unzip_dest = pgbin.strip_suffix("/bin/postgres").expect("bad pgbin");
let tar = GzDecoder::new(write_data_buffer.as_slice());
let mut archive = Archive::new(tar);
archive.unpack(unzip_dest)?;
let mut decoder = Decoder::new(download_buffer.as_slice())?;
let mut decompress_buffer = Vec::new();
decoder.read_to_end(&mut decompress_buffer)?;
let mut archive = Archive::new(decompress_buffer.as_slice());
let unzip_dest = pgbin
.strip_suffix("/bin/postgres")
.expect("bad pgbin")
.to_string()
+ "/download_extensions";
archive.unpack(&unzip_dest)?;
info!("Download + unzip {:?} completed successfully", &ext_path);
let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
let zip_sharedir = format!("{unzip_dest}/extensions/{ext_name}/share/extension");
info!("mv {zip_sharedir:?}/* {local_sharedir:?}");
for file in std::fs::read_dir(zip_sharedir)? {
let old_file = file?.path();
let new_file =
Path::new(&local_sharedir).join(old_file.file_name().context("error parsing file")?);
std::fs::rename(old_file, new_file)?;
}
let local_libdir = Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql");
let zip_libdir = format!("{unzip_dest}/extensions/{ext_name}/lib");
info!("mv {zip_libdir:?}/* {local_libdir:?}");
for file in std::fs::read_dir(zip_libdir)? {
let old_file = file?.path();
let new_file =
Path::new(&local_libdir).join(old_file.file_name().context("error parsing file")?);
std::fs::rename(old_file, new_file)?;
let sharedir_paths = (
format!("{unzip_dest}/{ext_name}/share/extension"),
Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"),
);
let libdir_paths = (
format!("{unzip_dest}/{ext_name}/lib"),
Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql"),
);
// move contents of the libdir / sharedir in unzipped archive to the correct local paths
for paths in [sharedir_paths, libdir_paths] {
let (zip_dir, real_dir) = paths;
info!("mv {zip_dir:?}/* {real_dir:?}");
for file in std::fs::read_dir(zip_dir)? {
let old_file = file?.path();
let new_file =
Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?);
std::fs::rename(old_file, new_file)?;
}
}
Ok(())
}
// This function initializes the necessary structs to use remote storage (should be fairly cheap)
pub fn init_remote_storage(
remote_ext_config: &str,
default_prefix: &str,
) -> anyhow::Result<GenericRemoteStorage> {
pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
let remote_ext_config: serde_json::Value = serde_json::from_str(remote_ext_config)?;
let remote_ext_bucket = remote_ext_config["bucket"]
@@ -199,19 +217,9 @@ pub fn init_remote_storage(
let remote_ext_endpoint = remote_ext_config["endpoint"].as_str();
let remote_ext_prefix = remote_ext_config["prefix"]
.as_str()
.unwrap_or(default_prefix)
.unwrap_or_default()
.to_string();
// control plane passes the aws creds via CLI ARGS to compute_ctl
let aws_key = remote_ext_config["key"].as_str();
let aws_id = remote_ext_config["id"].as_str();
if let Some(aws_key) = aws_key {
if let Some(aws_id) = aws_id {
std::env::set_var("AWS_SECRET_ACCESS_KEY", aws_key);
std::env::set_var("AWS_ACCESS_KEY_ID", aws_id);
}
}
// If needed, it is easy to allow modification of other parameters
// however, default values should be fine for now
let config = S3Config {
@@ -229,19 +237,3 @@ pub fn init_remote_storage(
};
GenericRemoteStorage::from_config(&config)
}
pub fn launch_download_extensions(
compute: &Arc<ComputeNode>,
) -> Result<thread::JoinHandle<()>, std::io::Error> {
let compute = Arc::clone(compute);
thread::Builder::new()
.name("download-extensions".into())
.spawn(move || {
info!("start download_extension_files");
let compute_state = compute.state.lock().expect("error unlocking compute.state");
compute
.prepare_external_extensions(&compute_state)
.expect("error preparing extensions");
info!("download_extension_files done, exiting thread");
})
}

View File

@@ -499,9 +499,7 @@ impl Endpoint {
//
// The proper way to implement this is to pass the custom extension
// in spec, but we don't have a way to do that yet in the python tests.
// NEW HACK: we enable the anon custom extension for everyone! this is of course just for testing
// how will we do it for real?
custom_extensions: Some(vec!["123454321".to_string(), self.tenant_id.to_string()]),
custom_extensions: Some(vec!["embedding".into(), "anon".into()]),
};
let spec_path = self.endpoint_path().join("spec.json");
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;

View File

@@ -65,6 +65,10 @@ impl RemotePath {
Ok(Self(relative_path.to_path_buf()))
}
pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
Self::new(Path::new(relative_path))
}
pub fn with_base(&self, base_path: &Path) -> PathBuf {
base_path.join(&self.0)
}

View File

@@ -721,7 +721,10 @@ class NeonEnvBuilder:
self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage"))
def enable_mock_s3_remote_storage(
self, bucket_name: str, force_enable: bool = True, enable_remote_extensions: bool = False
self,
bucket_name: str,
force_enable: bool = True,
enable_remote_extensions: bool = False,
):
"""
Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
@@ -763,7 +766,10 @@ class NeonEnvBuilder:
)
def enable_real_s3_remote_storage(
self, test_name: str, force_enable: bool = True, enable_remote_extensions: bool = False
self,
test_name: str,
force_enable: bool = True,
enable_remote_extensions: bool = False,
):
"""
Sets up configuration to use real s3 endpoint without mock server
@@ -810,7 +816,7 @@ class NeonEnvBuilder:
bucket_region="eu-central-1",
access_key=access_key,
secret_key=secret_key,
prefix_in_bucket="5555",
prefix_in_bucket="",
)
def cleanup_local_storage(self):

View File

@@ -0,0 +1,10 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "111/v14/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "111/v14/extensions/anon.tar.zst"
}
}

View File

@@ -0,0 +1,11 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "111/v15/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "111/v15/extensions/anon.tar.zst"
}
}

View File

@@ -0,0 +1,10 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "111/v14/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "112/v14/extensions/anon.tar.zst"
}
}

View File

@@ -0,0 +1,11 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "111/v15/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "112/v15/extensions/anon.tar.zst"
}
}

View File

@@ -0,0 +1,10 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "113/v14/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "111/v14/extensions/anon.tar.zst"
}
}

View File

@@ -0,0 +1,11 @@
{
"embedding": {
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"extension_archive": "113/v15/extensions/embedding.tar.zst"
},
"anon": {
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
"extension_archive": "111/v15/extensions/anon.tar.zst"
}
}

View File

@@ -1,14 +0,0 @@
{
"enabled_extensions": {
"123454321": [
"anon"
],
"public": [
"embedding"
]
},
"control_data": {
"embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n"
}
}

View File

@@ -1,14 +0,0 @@
{
"enabled_extensions": {
"123454321": [
"anon"
],
"public": [
"embedding"
]
},
"control_data": {
"embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
"anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n"
}
}

View File

@@ -11,21 +11,16 @@ from fixtures.neon_fixtures import (
)
from fixtures.pg_version import PgVersion
# Generate mock extension files and upload them to the mock bucket.
#
# NOTE: You must have appropriate AWS credentials to run REAL_S3 test.
# It may also be necessary to set the following environment variables for MOCK_S3 test:
# export AWS_ACCESS_KEY_ID='test' # export AWS_SECRET_ACCESS_KEY='test'
# export AWS_SECURITY_TOKEN='test' # export AWS_SESSION_TOKEN='test'
# export AWS_DEFAULT_REGION='us-east-1'
# Test downloading remote extension.
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
def test_remote_extensions(
neon_env_builder: NeonEnvBuilder,
remote_storage_kind: RemoteStorageKind,
pg_version: PgVersion,
):
if remote_storage_kind == RemoteStorageKind.REAL_S3:
return None
neon_env_builder.enable_remote_storage(
remote_storage_kind=remote_storage_kind,
test_name="test_remote_extensions",
@@ -36,87 +31,84 @@ def test_remote_extensions(
tenant_id, _ = env.neon_cli.create_tenant()
env.neon_cli.create_timeline("test_remote_extensions", tenant_id=tenant_id)
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
# For MOCK_S3 we upload test files.
# For REAL_S3 we use the files already in the bucket
if remote_storage_kind == RemoteStorageKind.MOCK_S3:
log.info("Uploading test files to mock bucket")
os.chdir("test_runner/regress/data/extension_test")
for path in os.walk("."):
prefix, _, files = path
for file in files:
# the [2:] is to remove the leading "./"
full_path = os.path.join(prefix, file)[2:]
def upload_test_file(from_path, to_path):
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
with open(
f"test_runner/regress/data/extension_test/v{pg_version}/{from_path}", "rb"
) as f:
env.remote_storage_client.upload_fileobj(
f,
env.ext_remote_storage.bucket_name,
f"ext/v{pg_version}/{to_path}",
)
with open(full_path, "rb") as f:
log.info(f"UPLOAD {full_path} to ext/{full_path}")
env.remote_storage_client.upload_fileobj(
f,
env.ext_remote_storage.bucket_name,
f"ext/{full_path}",
)
os.chdir("../../../..")
# Start a compute node and check that it can download the extensions
# and use them to CREATE EXTENSION and LOAD
endpoint = env.endpoints.create_start(
"test_remote_extensions",
tenant_id=tenant_id,
remote_ext_config=env.ext_remote_storage.to_string(),
# config_lines=["log_min_messages=debug3"],
)
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# Check that appropriate control files were downloaded
cur.execute("SELECT * FROM pg_available_extensions")
all_extensions = [x[0] for x in cur.fetchall()]
log.info(all_extensions)
assert "anon" in all_extensions
assert "embedding" in all_extensions
# TODO: check that we cant't download custom extensions for other tenant ids
upload_test_file("ext_index.json", "ext_index.json")
upload_test_file("anon.tar.gz", "extensions/anon.tar.gz")
upload_test_file("embedding.tar.gz", "extensions/embedding.tar.gz")
# check that we can download public extension
cur.execute("CREATE EXTENSION embedding")
cur.execute("SELECT extname FROM pg_extension")
assert "embedding" in [x[0] for x in cur.fetchall()]
assert env.ext_remote_storage is not None # satisfy mypy
assert env.remote_storage_client is not None # satisfy mypy
try:
# Start a compute node and check that it can download the extensions
# and use them to CREATE EXTENSION and LOAD
endpoint = env.endpoints.create_start(
"test_remote_extensions",
tenant_id=tenant_id,
remote_ext_config=env.ext_remote_storage.to_string(),
# config_lines=["log_min_messages=debug3"],
)
with closing(endpoint.connect()) as conn:
with conn.cursor() as cur:
# Check that appropriate control files were downloaded
cur.execute("SELECT * FROM pg_available_extensions")
all_extensions = [x[0] for x in cur.fetchall()]
log.info(all_extensions)
assert "anon" in all_extensions
assert "embedding" in all_extensions
# TODO: check that we cant't download custom extensions for other tenant ids
# check that we can download public extension
cur.execute("CREATE EXTENSION embedding")
cur.execute("SELECT extname FROM pg_extension")
assert "embedding" in [x[0] for x in cur.fetchall()]
# check that we can download private extension
try:
cur.execute("CREATE EXTENSION anon")
except Exception as err:
log.info("error creating anon extension")
assert "pgcrypto" in str(err), "unexpected error creating anon extension"
# TODO: try to load libraries as well
finally:
# Cleaning up downloaded files is important for local tests
# or else one test could reuse the files from another test or another test run
cleanup_files = [
"lib/postgresql/anon.so",
"lib/postgresql/embedding.so",
"share/postgresql/extension/anon.control",
"share/postgresql/extension/embedding--0.1.0.sql",
"share/postgresql/extension/embedding.control",
]
cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files]
cleanup_folders = [
"extensions",
f"pg_install/v{pg_version}/share/postgresql/extension/anon",
f"pg_install/v{pg_version}/extensions",
]
for file in cleanup_files:
# check that we can download private extension
try:
os.remove(file)
log.info(f"removed file {file}")
cur.execute("CREATE EXTENSION anon")
except Exception as err:
log.info(f"error removing file {file}: {err}")
for folder in cleanup_folders:
try:
shutil.rmtree(folder)
log.info(f"removed folder {folder}")
except Exception as err:
log.info(f"error removing file {file}: {err}")
log.info("error creating anon extension")
assert "pgcrypto" in str(err), "unexpected error creating anon extension"
# TODO: try to load libraries as well
# Cleaning up downloaded files is important for local tests
# or else one test could reuse the files from another test or another test run
cleanup_files = [
"lib/postgresql/anon.so",
"lib/postgresql/embedding.so",
"share/postgresql/extension/anon.control",
"share/postgresql/extension/embedding--0.1.0.sql",
"share/postgresql/extension/embedding.control",
]
cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files]
cleanup_folders = [
"extensions",
f"pg_install/v{pg_version}/share/postgresql/extension/anon",
f"pg_install/v{pg_version}/extensions",
]
for file in cleanup_files:
try:
os.remove(file)
log.info(f"removed file {file}")
except Exception as err:
log.info(f"error removing file {file}: {err}")
for folder in cleanup_folders:
try:
shutil.rmtree(folder)
log.info(f"removed folder {folder}")
except Exception as err:
log.info(f"error removing file {file}: {err}")

View File

@@ -60,6 +60,7 @@ url = { version = "2", features = ["serde"] }
[build-dependencies]
anyhow = { version = "1", features = ["backtrace"] }
bytes = { version = "1", features = ["serde"] }
cc = { version = "1", default-features = false, features = ["parallel"] }
either = { version = "1" }
itertools = { version = "0.10" }
libc = { version = "0.2", features = ["extra_traits"] }