mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-14 17:02:56 +00:00
Add Versioning + move Access to control plane for remote ext (#4760)
This commit is contained in:
44
Cargo.lock
generated
44
Cargo.lock
generated
@@ -740,6 +740,9 @@ name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
@@ -922,6 +925,7 @@ dependencies = [
|
||||
"url",
|
||||
"utils",
|
||||
"workspace_hack",
|
||||
"zstd",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1975,6 +1979,15 @@ version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.63"
|
||||
@@ -5296,6 +5309,7 @@ version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
"cc",
|
||||
"chrono",
|
||||
"clap",
|
||||
"clap_builder",
|
||||
@@ -5396,3 +5410,33 @@ name = "zeroize"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
|
||||
|
||||
[[package]]
|
||||
name = "zstd"
|
||||
version = "0.12.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
|
||||
dependencies = [
|
||||
"zstd-safe",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-safe"
|
||||
version = "6.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zstd-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zstd-sys"
|
||||
version = "2.0.8+zstd.1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
@@ -34,3 +34,4 @@ utils.workspace = true
|
||||
workspace_hack.workspace = true
|
||||
toml_edit.workspace = true
|
||||
remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
|
||||
zstd = "0.12.4"
|
||||
|
||||
@@ -30,11 +30,10 @@
|
||||
//! -C 'postgresql://cloud_admin@localhost/postgres' \
|
||||
//! -S /var/db/postgres/specs/current.json \
|
||||
//! -b /usr/local/bin/postgres \
|
||||
//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000",
|
||||
//! (optionally) "key": "AWS_SECRET_ACCESS_KEY", "id": "AWS_ACCESS_KEY_ID"}
|
||||
//! -r {"bucket": "my-bucket", "region": "eu-central-1", "endpoint": "http:://localhost:9000"}
|
||||
//! ```
|
||||
//!
|
||||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::panic;
|
||||
use std::path::Path;
|
||||
@@ -52,7 +51,6 @@ use compute_api::responses::ComputeStatus;
|
||||
|
||||
use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
|
||||
use compute_tools::configurator::launch_configurator;
|
||||
use compute_tools::extension_server::launch_download_extensions;
|
||||
use compute_tools::extension_server::{get_pg_version, init_remote_storage};
|
||||
use compute_tools::http::api::launch_http_server;
|
||||
use compute_tools::logger::*;
|
||||
@@ -60,12 +58,14 @@ use compute_tools::monitor::launch_monitor;
|
||||
use compute_tools::params::*;
|
||||
use compute_tools::spec::*;
|
||||
|
||||
const BUILD_TAG_DEFAULT: &str = "local";
|
||||
const BUILD_TAG_DEFAULT: &str = "111"; // TODO: change back to local; I need 111 for my test
|
||||
|
||||
fn main() -> Result<()> {
|
||||
init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
|
||||
|
||||
let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
|
||||
let build_tag = option_env!("BUILD_TAG")
|
||||
.unwrap_or(BUILD_TAG_DEFAULT)
|
||||
.to_string();
|
||||
info!("build_tag: {build_tag}");
|
||||
|
||||
let matches = cli().get_matches();
|
||||
@@ -74,8 +74,7 @@ fn main() -> Result<()> {
|
||||
|
||||
let remote_ext_config = matches.get_one::<String>("remote-ext-config");
|
||||
let ext_remote_storage = remote_ext_config.map(|x| {
|
||||
init_remote_storage(x, build_tag)
|
||||
.expect("cannot initialize remote extension storage from config")
|
||||
init_remote_storage(x).expect("cannot initialize remote extension storage from config")
|
||||
});
|
||||
|
||||
let http_port = *matches
|
||||
@@ -195,7 +194,9 @@ fn main() -> Result<()> {
|
||||
state: Mutex::new(new_state),
|
||||
state_changed: Condvar::new(),
|
||||
ext_remote_storage,
|
||||
available_extensions: OnceLock::new(),
|
||||
ext_remote_paths: OnceLock::new(),
|
||||
already_downloaded_extensions: Mutex::new(HashSet::new()),
|
||||
build_tag,
|
||||
};
|
||||
let compute = Arc::new(compute_node);
|
||||
|
||||
@@ -243,9 +244,6 @@ fn main() -> Result<()> {
|
||||
let _configurator_handle =
|
||||
launch_configurator(&compute).expect("cannot launch configurator thread");
|
||||
|
||||
let _download_extensions_handle =
|
||||
launch_download_extensions(&compute).expect("cannot launch download extensions thread");
|
||||
|
||||
// Start Postgres
|
||||
let mut delay_exit = false;
|
||||
let mut exit_code = None;
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::HashMap;
|
||||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::io::BufRead;
|
||||
@@ -21,7 +22,7 @@ use compute_api::responses::{ComputeMetrics, ComputeStatus};
|
||||
use compute_api::spec::{ComputeMode, ComputeSpec};
|
||||
use utils::measured_stream::MeasuredReader;
|
||||
|
||||
use remote_storage::GenericRemoteStorage;
|
||||
use remote_storage::{GenericRemoteStorage, RemotePath};
|
||||
|
||||
use crate::pg_helpers::*;
|
||||
use crate::spec::*;
|
||||
@@ -55,8 +56,10 @@ pub struct ComputeNode {
|
||||
pub state_changed: Condvar,
|
||||
/// the S3 bucket that we search for extensions in
|
||||
pub ext_remote_storage: Option<GenericRemoteStorage>,
|
||||
// cached lists of available extensions and libraries
|
||||
pub available_extensions: OnceLock<HashSet<String>>,
|
||||
// (key: extension name, value: path to extension archive in remote storage)
|
||||
pub ext_remote_paths: OnceLock<HashMap<String, RemotePath>>,
|
||||
pub already_downloaded_extensions: Mutex<HashSet<String>>,
|
||||
pub build_tag: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -735,23 +738,23 @@ LIMIT 100",
|
||||
|
||||
// If remote extension storage is configured,
|
||||
// download extension control files
|
||||
#[tokio::main]
|
||||
pub async fn prepare_external_extensions(&self, compute_state: &ComputeState) -> Result<()> {
|
||||
if let Some(ref ext_remote_storage) = self.ext_remote_storage {
|
||||
let pspec = compute_state.pspec.as_ref().expect("spec must be set");
|
||||
let spec = &pspec.spec;
|
||||
let custom_ext_prefixes = spec.custom_extensions.clone().unwrap_or(Vec::new());
|
||||
info!("custom_ext_prefixes: {:?}", &custom_ext_prefixes);
|
||||
let available_extensions = extension_server::get_available_extensions(
|
||||
let custom_ext = spec.custom_extensions.clone().unwrap_or(Vec::new());
|
||||
info!("custom extensions: {:?}", &custom_ext);
|
||||
let ext_remote_paths = extension_server::get_available_extensions(
|
||||
ext_remote_storage,
|
||||
&self.pgbin,
|
||||
&self.pgversion,
|
||||
&custom_ext_prefixes,
|
||||
&custom_ext,
|
||||
&self.build_tag,
|
||||
)
|
||||
.await?;
|
||||
self.available_extensions
|
||||
.set(available_extensions)
|
||||
.expect("available_extensions.set error");
|
||||
self.ext_remote_paths
|
||||
.set(ext_remote_paths)
|
||||
.expect("ext_remote_paths.set error");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -760,11 +763,31 @@ LIMIT 100",
|
||||
match &self.ext_remote_storage {
|
||||
None => anyhow::bail!("No remote extension storage"),
|
||||
Some(remote_storage) => {
|
||||
// TODO: eliminate useless LOAD Library calls to this function (if possible)
|
||||
// not clear that we can distinguish between useful and useless
|
||||
// library calls better than the below code
|
||||
let ext_name = ext_name.replace(".so", "");
|
||||
{
|
||||
let mut already_downloaded_extensions =
|
||||
self.already_downloaded_extensions.lock().expect("bad lock");
|
||||
if already_downloaded_extensions.contains(&ext_name) {
|
||||
info!(
|
||||
"extension {:?} already exists, skipping download",
|
||||
&ext_name
|
||||
);
|
||||
return Ok(());
|
||||
} else {
|
||||
already_downloaded_extensions.insert(ext_name.clone());
|
||||
}
|
||||
}
|
||||
extension_server::download_extension(
|
||||
ext_name,
|
||||
&ext_name,
|
||||
&self
|
||||
.ext_remote_paths
|
||||
.get()
|
||||
.expect("error accessing ext_remote_paths")[&ext_name],
|
||||
remote_storage,
|
||||
&self.pgbin,
|
||||
&self.pgversion,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -809,6 +832,9 @@ LIMIT 100",
|
||||
libs_vec.extend(preload_libs_vec);
|
||||
}
|
||||
|
||||
info!("Download ext_index.json, find the extension paths");
|
||||
self.prepare_external_extensions(compute_state).await?;
|
||||
|
||||
info!("Downloading to shared preload libraries: {:?}", &libs_vec);
|
||||
let mut download_tasks = Vec::new();
|
||||
for library in &libs_vec {
|
||||
|
||||
@@ -1,35 +1,75 @@
|
||||
// AT LEAST download_extension / get_available_extensions need large changes
|
||||
// refactor custom_extensions to be clear that we have moved the access logic to control plane
|
||||
|
||||
// Download extension files from the extension store
|
||||
// and put them in the right place in the postgres directory
|
||||
/*
|
||||
The layout of the S3 bucket is as follows:
|
||||
├── 111
|
||||
│ ├── v14
|
||||
│ │ ├── extensions
|
||||
│ │ │ ├── anon.tar.zst
|
||||
│ │ │ └── embedding.tar.zst
|
||||
│ │ └── ext_index.json
|
||||
│ └── v15
|
||||
│ ├── extensions
|
||||
│ │ ├── anon.tar.zst
|
||||
│ │ └── embedding.tar.zst
|
||||
│ └── ext_index.json
|
||||
├── 112
|
||||
│ ├── v14
|
||||
│ │ ├── extensions
|
||||
│ │ │ └── anon.tar.zst
|
||||
│ │ └── ext_index.json
|
||||
│ └── v15
|
||||
│ ├── extensions
|
||||
│ │ └── anon.tar.zst
|
||||
│ └── ext_index.json
|
||||
└── 113
|
||||
├── v14
|
||||
│ ├── extensions
|
||||
│ │ └── embedding.tar.zst
|
||||
│ └── ext_index.json
|
||||
└── v15
|
||||
├── extensions
|
||||
│ └── embedding.tar.zst
|
||||
└── ext_index.json
|
||||
|
||||
v14/ext_index.json
|
||||
-- this contains information necessary to create control files
|
||||
v14/extensions/test_ext1.tar.gz
|
||||
-- this contains the library files and sql files necessary to create this extension
|
||||
v14/extensions/custom_ext1.tar.gz
|
||||
Note that build number cannot be part of prefix because we might need extensions
|
||||
from other build numbers.
|
||||
|
||||
The difference between a private and public extensions is determined by who can
|
||||
load the extension this is specified in ext_index.json
|
||||
ext_index.json stores the control files and location of extension archives
|
||||
|
||||
Speicially, ext_index.json has a list of public extensions, and a list of
|
||||
extensions enabled for specific tenant-ids.
|
||||
We do not duplicate extension.tar.zst files.
|
||||
We only upload a new one if it is updated.
|
||||
*access* is controlled by spec
|
||||
|
||||
More specifically, here is an example ext_index.json
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "111/v14/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "111/v14/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
*/
|
||||
use crate::compute::ComputeNode;
|
||||
use anyhow::Context;
|
||||
use anyhow::{self, Result};
|
||||
use flate2::read::GzDecoder;
|
||||
use futures::future::join_all;
|
||||
use remote_storage::*;
|
||||
use serde_json::{self, Value};
|
||||
use std::collections::HashSet;
|
||||
use std::collections::HashMap;
|
||||
use std::io::Read;
|
||||
use std::num::{NonZeroU32, NonZeroUsize};
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
use tar::Archive;
|
||||
use tokio::io::AsyncReadExt;
|
||||
use tracing::info;
|
||||
use zstd::stream::read::Decoder;
|
||||
|
||||
fn get_pg_config(argument: &str, pgbin: &str) -> String {
|
||||
// gives the result of `pg_config [argument]`
|
||||
@@ -61,133 +101,111 @@ pub fn get_pg_version(pgbin: &str) -> String {
|
||||
panic!("Unsuported postgres version {human_version}");
|
||||
}
|
||||
|
||||
// download extension control files
|
||||
// if custom_ext_prefixes is provided - search also in custom extension paths
|
||||
// download control files for enabled_extensions
|
||||
// return the paths in s3 to the archives containing the actual extension files
|
||||
// for use in creating the extension
|
||||
pub async fn get_available_extensions(
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
pgbin: &str,
|
||||
pg_version: &str,
|
||||
custom_ext_prefixes: &[String],
|
||||
) -> Result<HashSet<String>> {
|
||||
enabled_extensions: &[String],
|
||||
build_tag: &str,
|
||||
) -> Result<HashMap<String, RemotePath>> {
|
||||
let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
|
||||
let index_path = pg_version.to_owned() + "/ext_index.json";
|
||||
let index_path = format!("{build_tag}/{pg_version}/ext_index.json");
|
||||
let index_path = RemotePath::new(Path::new(&index_path)).context("error forming path")?;
|
||||
info!("download ext_index.json: {:?}", &index_path);
|
||||
info!("download ext_index.json from: {:?}", &index_path);
|
||||
|
||||
// TODO: potential optimization: cache ext_index.json
|
||||
let mut download = remote_storage.download(&index_path).await?;
|
||||
let mut write_data_buffer = Vec::new();
|
||||
let mut ext_idx_buffer = Vec::new();
|
||||
download
|
||||
.download_stream
|
||||
.read_to_end(&mut write_data_buffer)
|
||||
.read_to_end(&mut ext_idx_buffer)
|
||||
.await?;
|
||||
let ext_index_str = match str::from_utf8(&write_data_buffer) {
|
||||
Ok(v) => v,
|
||||
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
|
||||
};
|
||||
|
||||
let ext_index_str = str::from_utf8(&ext_idx_buffer).expect("error parsing json");
|
||||
let ext_index_full: Value = serde_json::from_str(ext_index_str)?;
|
||||
let ext_index_full = ext_index_full.as_object().context("error parsing json")?;
|
||||
let control_data = ext_index_full["control_data"]
|
||||
.as_object()
|
||||
.context("json parse error")?;
|
||||
let enabled_extensions = ext_index_full["enabled_extensions"]
|
||||
.as_object()
|
||||
.context("json parse error")?;
|
||||
info!("{:?}", control_data.clone());
|
||||
info!("{:?}", enabled_extensions.clone());
|
||||
info!("ext_index: {:?}", &ext_index_full);
|
||||
|
||||
let mut prefixes = vec!["public".to_string()];
|
||||
prefixes.extend(custom_ext_prefixes.to_owned());
|
||||
info!("{:?}", &prefixes);
|
||||
let mut all_extensions = HashSet::new();
|
||||
for prefix in prefixes {
|
||||
let prefix_extensions = match enabled_extensions.get(&prefix) {
|
||||
Some(Value::Array(ext_name)) => ext_name,
|
||||
_ => {
|
||||
info!("prefix {} has no extensions", prefix);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
info!("{:?}", prefix_extensions);
|
||||
for ext_name in prefix_extensions {
|
||||
all_extensions.insert(ext_name.as_str().context("json parse error")?.to_string());
|
||||
}
|
||||
info!("enabled_extensions: {:?}", enabled_extensions);
|
||||
let mut ext_remote_paths = HashMap::new();
|
||||
let mut file_create_tasks = Vec::new();
|
||||
for extension in enabled_extensions {
|
||||
let ext_data = ext_index_full[extension]
|
||||
.as_object()
|
||||
.context("error parsing json")?;
|
||||
let control_contents = ext_data["control_file_content"]
|
||||
.as_str()
|
||||
.context("error parsing json")?;
|
||||
let control_path = local_sharedir.join(extension.to_owned() + ".control");
|
||||
info!("writing file {:?}{:?}", control_path, control_contents);
|
||||
file_create_tasks.push(tokio::fs::write(control_path, control_contents));
|
||||
|
||||
let ext_archive_path = ext_data["extension_archive"]
|
||||
.as_str()
|
||||
.context("error parsing json")?;
|
||||
ext_remote_paths.insert(
|
||||
extension.to_string(),
|
||||
RemotePath::from_string(ext_archive_path)?,
|
||||
);
|
||||
}
|
||||
|
||||
for prefix in &all_extensions {
|
||||
let control_contents = control_data[prefix].as_str().context("json parse error")?;
|
||||
let control_path = local_sharedir.join(prefix.to_owned() + ".control");
|
||||
|
||||
info!("WRITING FILE {:?}{:?}", control_path, control_contents);
|
||||
std::fs::write(control_path, control_contents)?;
|
||||
let results = join_all(file_create_tasks).await;
|
||||
for result in results {
|
||||
result?;
|
||||
}
|
||||
|
||||
Ok(all_extensions.into_iter().collect())
|
||||
Ok(ext_remote_paths)
|
||||
}
|
||||
|
||||
// download all sqlfiles (and possibly data files) for a given extension name
|
||||
pub async fn download_extension(
|
||||
ext_name: &str,
|
||||
ext_path: &RemotePath,
|
||||
remote_storage: &GenericRemoteStorage,
|
||||
pgbin: &str,
|
||||
pg_version: &str,
|
||||
) -> Result<()> {
|
||||
// TODO: potential optimization: only download the extension if it doesn't exist
|
||||
// problem: how would we tell if it exists?
|
||||
let ext_name = ext_name.replace(".so", "");
|
||||
let ext_name_targz = ext_name.to_owned() + ".tar.gz";
|
||||
if Path::new(&ext_name_targz).exists() {
|
||||
info!("extension {:?} already exists", ext_name_targz);
|
||||
return Ok(());
|
||||
}
|
||||
let ext_path = RemotePath::new(
|
||||
&Path::new(pg_version)
|
||||
.join("extensions")
|
||||
.join(ext_name_targz.clone()),
|
||||
)?;
|
||||
info!(
|
||||
"Start downloading extension {:?} from {:?}",
|
||||
ext_name, ext_path
|
||||
);
|
||||
let mut download = remote_storage.download(&ext_path).await?;
|
||||
let mut write_data_buffer = Vec::new();
|
||||
info!("Download extension {:?} from {:?}", ext_name, ext_path);
|
||||
let mut download = remote_storage.download(ext_path).await?;
|
||||
let mut download_buffer = Vec::new();
|
||||
download
|
||||
.download_stream
|
||||
.read_to_end(&mut write_data_buffer)
|
||||
.read_to_end(&mut download_buffer)
|
||||
.await?;
|
||||
let unzip_dest = pgbin.strip_suffix("/bin/postgres").expect("bad pgbin");
|
||||
let tar = GzDecoder::new(write_data_buffer.as_slice());
|
||||
let mut archive = Archive::new(tar);
|
||||
archive.unpack(unzip_dest)?;
|
||||
let mut decoder = Decoder::new(download_buffer.as_slice())?;
|
||||
let mut decompress_buffer = Vec::new();
|
||||
decoder.read_to_end(&mut decompress_buffer)?;
|
||||
let mut archive = Archive::new(decompress_buffer.as_slice());
|
||||
let unzip_dest = pgbin
|
||||
.strip_suffix("/bin/postgres")
|
||||
.expect("bad pgbin")
|
||||
.to_string()
|
||||
+ "/download_extensions";
|
||||
archive.unpack(&unzip_dest)?;
|
||||
info!("Download + unzip {:?} completed successfully", &ext_path);
|
||||
|
||||
let local_sharedir = Path::new(&get_pg_config("--sharedir", pgbin)).join("extension");
|
||||
let zip_sharedir = format!("{unzip_dest}/extensions/{ext_name}/share/extension");
|
||||
info!("mv {zip_sharedir:?}/* {local_sharedir:?}");
|
||||
for file in std::fs::read_dir(zip_sharedir)? {
|
||||
let old_file = file?.path();
|
||||
let new_file =
|
||||
Path::new(&local_sharedir).join(old_file.file_name().context("error parsing file")?);
|
||||
std::fs::rename(old_file, new_file)?;
|
||||
}
|
||||
let local_libdir = Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql");
|
||||
let zip_libdir = format!("{unzip_dest}/extensions/{ext_name}/lib");
|
||||
info!("mv {zip_libdir:?}/* {local_libdir:?}");
|
||||
for file in std::fs::read_dir(zip_libdir)? {
|
||||
let old_file = file?.path();
|
||||
let new_file =
|
||||
Path::new(&local_libdir).join(old_file.file_name().context("error parsing file")?);
|
||||
std::fs::rename(old_file, new_file)?;
|
||||
let sharedir_paths = (
|
||||
format!("{unzip_dest}/{ext_name}/share/extension"),
|
||||
Path::new(&get_pg_config("--sharedir", pgbin)).join("extension"),
|
||||
);
|
||||
let libdir_paths = (
|
||||
format!("{unzip_dest}/{ext_name}/lib"),
|
||||
Path::new(&get_pg_config("--libdir", pgbin)).join("postgresql"),
|
||||
);
|
||||
// move contents of the libdir / sharedir in unzipped archive to the correct local paths
|
||||
for paths in [sharedir_paths, libdir_paths] {
|
||||
let (zip_dir, real_dir) = paths;
|
||||
info!("mv {zip_dir:?}/* {real_dir:?}");
|
||||
for file in std::fs::read_dir(zip_dir)? {
|
||||
let old_file = file?.path();
|
||||
let new_file =
|
||||
Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?);
|
||||
std::fs::rename(old_file, new_file)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// This function initializes the necessary structs to use remote storage (should be fairly cheap)
|
||||
pub fn init_remote_storage(
|
||||
remote_ext_config: &str,
|
||||
default_prefix: &str,
|
||||
) -> anyhow::Result<GenericRemoteStorage> {
|
||||
pub fn init_remote_storage(remote_ext_config: &str) -> anyhow::Result<GenericRemoteStorage> {
|
||||
let remote_ext_config: serde_json::Value = serde_json::from_str(remote_ext_config)?;
|
||||
|
||||
let remote_ext_bucket = remote_ext_config["bucket"]
|
||||
@@ -199,19 +217,9 @@ pub fn init_remote_storage(
|
||||
let remote_ext_endpoint = remote_ext_config["endpoint"].as_str();
|
||||
let remote_ext_prefix = remote_ext_config["prefix"]
|
||||
.as_str()
|
||||
.unwrap_or(default_prefix)
|
||||
.unwrap_or_default()
|
||||
.to_string();
|
||||
|
||||
// control plane passes the aws creds via CLI ARGS to compute_ctl
|
||||
let aws_key = remote_ext_config["key"].as_str();
|
||||
let aws_id = remote_ext_config["id"].as_str();
|
||||
if let Some(aws_key) = aws_key {
|
||||
if let Some(aws_id) = aws_id {
|
||||
std::env::set_var("AWS_SECRET_ACCESS_KEY", aws_key);
|
||||
std::env::set_var("AWS_ACCESS_KEY_ID", aws_id);
|
||||
}
|
||||
}
|
||||
|
||||
// If needed, it is easy to allow modification of other parameters
|
||||
// however, default values should be fine for now
|
||||
let config = S3Config {
|
||||
@@ -229,19 +237,3 @@ pub fn init_remote_storage(
|
||||
};
|
||||
GenericRemoteStorage::from_config(&config)
|
||||
}
|
||||
|
||||
pub fn launch_download_extensions(
|
||||
compute: &Arc<ComputeNode>,
|
||||
) -> Result<thread::JoinHandle<()>, std::io::Error> {
|
||||
let compute = Arc::clone(compute);
|
||||
thread::Builder::new()
|
||||
.name("download-extensions".into())
|
||||
.spawn(move || {
|
||||
info!("start download_extension_files");
|
||||
let compute_state = compute.state.lock().expect("error unlocking compute.state");
|
||||
compute
|
||||
.prepare_external_extensions(&compute_state)
|
||||
.expect("error preparing extensions");
|
||||
info!("download_extension_files done, exiting thread");
|
||||
})
|
||||
}
|
||||
|
||||
@@ -499,9 +499,7 @@ impl Endpoint {
|
||||
//
|
||||
// The proper way to implement this is to pass the custom extension
|
||||
// in spec, but we don't have a way to do that yet in the python tests.
|
||||
// NEW HACK: we enable the anon custom extension for everyone! this is of course just for testing
|
||||
// how will we do it for real?
|
||||
custom_extensions: Some(vec!["123454321".to_string(), self.tenant_id.to_string()]),
|
||||
custom_extensions: Some(vec!["embedding".into(), "anon".into()]),
|
||||
};
|
||||
let spec_path = self.endpoint_path().join("spec.json");
|
||||
std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
|
||||
|
||||
@@ -65,6 +65,10 @@ impl RemotePath {
|
||||
Ok(Self(relative_path.to_path_buf()))
|
||||
}
|
||||
|
||||
pub fn from_string(relative_path: &str) -> anyhow::Result<Self> {
|
||||
Self::new(Path::new(relative_path))
|
||||
}
|
||||
|
||||
pub fn with_base(&self, base_path: &Path) -> PathBuf {
|
||||
base_path.join(&self.0)
|
||||
}
|
||||
|
||||
@@ -721,7 +721,10 @@ class NeonEnvBuilder:
|
||||
self.remote_storage = LocalFsStorage(Path(self.repo_dir / "local_fs_remote_storage"))
|
||||
|
||||
def enable_mock_s3_remote_storage(
|
||||
self, bucket_name: str, force_enable: bool = True, enable_remote_extensions: bool = False
|
||||
self,
|
||||
bucket_name: str,
|
||||
force_enable: bool = True,
|
||||
enable_remote_extensions: bool = False,
|
||||
):
|
||||
"""
|
||||
Sets up the pageserver to use the S3 mock server, creates the bucket, if it's not present already.
|
||||
@@ -763,7 +766,10 @@ class NeonEnvBuilder:
|
||||
)
|
||||
|
||||
def enable_real_s3_remote_storage(
|
||||
self, test_name: str, force_enable: bool = True, enable_remote_extensions: bool = False
|
||||
self,
|
||||
test_name: str,
|
||||
force_enable: bool = True,
|
||||
enable_remote_extensions: bool = False,
|
||||
):
|
||||
"""
|
||||
Sets up configuration to use real s3 endpoint without mock server
|
||||
@@ -810,7 +816,7 @@ class NeonEnvBuilder:
|
||||
bucket_region="eu-central-1",
|
||||
access_key=access_key,
|
||||
secret_key=secret_key,
|
||||
prefix_in_bucket="5555",
|
||||
prefix_in_bucket="",
|
||||
)
|
||||
|
||||
def cleanup_local_storage(self):
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "111/v14/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "111/v14/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "111/v15/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "111/v15/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "111/v14/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "112/v14/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "111/v15/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "112/v15/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "113/v14/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "111/v14/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"embedding": {
|
||||
"control_file_content": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"extension_archive": "113/v15/extensions/embedding.tar.zst"
|
||||
},
|
||||
"anon": {
|
||||
"control_file_content": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n",
|
||||
"extension_archive": "111/v15/extensions/anon.tar.zst"
|
||||
}
|
||||
}
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"enabled_extensions": {
|
||||
"123454321": [
|
||||
"anon"
|
||||
],
|
||||
"public": [
|
||||
"embedding"
|
||||
]
|
||||
},
|
||||
"control_data": {
|
||||
"embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"enabled_extensions": {
|
||||
"123454321": [
|
||||
"anon"
|
||||
],
|
||||
"public": [
|
||||
"embedding"
|
||||
]
|
||||
},
|
||||
"control_data": {
|
||||
"embedding": "comment = 'hnsw index' \ndefault_version = '0.1.0' \nmodule_pathname = '$libdir/embedding' \nrelocatable = true \ntrusted = true",
|
||||
"anon": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n"
|
||||
}
|
||||
}
|
||||
@@ -11,21 +11,16 @@ from fixtures.neon_fixtures import (
|
||||
)
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
# Generate mock extension files and upload them to the mock bucket.
|
||||
#
|
||||
# NOTE: You must have appropriate AWS credentials to run REAL_S3 test.
|
||||
# It may also be necessary to set the following environment variables for MOCK_S3 test:
|
||||
# export AWS_ACCESS_KEY_ID='test' # export AWS_SECRET_ACCESS_KEY='test'
|
||||
# export AWS_SECURITY_TOKEN='test' # export AWS_SESSION_TOKEN='test'
|
||||
# export AWS_DEFAULT_REGION='us-east-1'
|
||||
|
||||
|
||||
# Test downloading remote extension.
|
||||
@pytest.mark.parametrize("remote_storage_kind", available_s3_storages())
|
||||
def test_remote_extensions(
|
||||
neon_env_builder: NeonEnvBuilder,
|
||||
remote_storage_kind: RemoteStorageKind,
|
||||
pg_version: PgVersion,
|
||||
):
|
||||
if remote_storage_kind == RemoteStorageKind.REAL_S3:
|
||||
return None
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=remote_storage_kind,
|
||||
test_name="test_remote_extensions",
|
||||
@@ -36,87 +31,84 @@ def test_remote_extensions(
|
||||
tenant_id, _ = env.neon_cli.create_tenant()
|
||||
env.neon_cli.create_timeline("test_remote_extensions", tenant_id=tenant_id)
|
||||
|
||||
assert env.ext_remote_storage is not None # satisfy mypy
|
||||
assert env.remote_storage_client is not None # satisfy mypy
|
||||
|
||||
# For MOCK_S3 we upload test files.
|
||||
# For REAL_S3 we use the files already in the bucket
|
||||
if remote_storage_kind == RemoteStorageKind.MOCK_S3:
|
||||
log.info("Uploading test files to mock bucket")
|
||||
os.chdir("test_runner/regress/data/extension_test")
|
||||
for path in os.walk("."):
|
||||
prefix, _, files = path
|
||||
for file in files:
|
||||
# the [2:] is to remove the leading "./"
|
||||
full_path = os.path.join(prefix, file)[2:]
|
||||
|
||||
def upload_test_file(from_path, to_path):
|
||||
assert env.ext_remote_storage is not None # satisfy mypy
|
||||
assert env.remote_storage_client is not None # satisfy mypy
|
||||
with open(
|
||||
f"test_runner/regress/data/extension_test/v{pg_version}/{from_path}", "rb"
|
||||
) as f:
|
||||
env.remote_storage_client.upload_fileobj(
|
||||
f,
|
||||
env.ext_remote_storage.bucket_name,
|
||||
f"ext/v{pg_version}/{to_path}",
|
||||
)
|
||||
with open(full_path, "rb") as f:
|
||||
log.info(f"UPLOAD {full_path} to ext/{full_path}")
|
||||
env.remote_storage_client.upload_fileobj(
|
||||
f,
|
||||
env.ext_remote_storage.bucket_name,
|
||||
f"ext/{full_path}",
|
||||
)
|
||||
os.chdir("../../../..")
|
||||
# Start a compute node and check that it can download the extensions
|
||||
# and use them to CREATE EXTENSION and LOAD
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_remote_extensions",
|
||||
tenant_id=tenant_id,
|
||||
remote_ext_config=env.ext_remote_storage.to_string(),
|
||||
# config_lines=["log_min_messages=debug3"],
|
||||
)
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Check that appropriate control files were downloaded
|
||||
cur.execute("SELECT * FROM pg_available_extensions")
|
||||
all_extensions = [x[0] for x in cur.fetchall()]
|
||||
log.info(all_extensions)
|
||||
assert "anon" in all_extensions
|
||||
assert "embedding" in all_extensions
|
||||
# TODO: check that we cant't download custom extensions for other tenant ids
|
||||
|
||||
upload_test_file("ext_index.json", "ext_index.json")
|
||||
upload_test_file("anon.tar.gz", "extensions/anon.tar.gz")
|
||||
upload_test_file("embedding.tar.gz", "extensions/embedding.tar.gz")
|
||||
# check that we can download public extension
|
||||
cur.execute("CREATE EXTENSION embedding")
|
||||
cur.execute("SELECT extname FROM pg_extension")
|
||||
assert "embedding" in [x[0] for x in cur.fetchall()]
|
||||
|
||||
assert env.ext_remote_storage is not None # satisfy mypy
|
||||
assert env.remote_storage_client is not None # satisfy mypy
|
||||
try:
|
||||
# Start a compute node and check that it can download the extensions
|
||||
# and use them to CREATE EXTENSION and LOAD
|
||||
endpoint = env.endpoints.create_start(
|
||||
"test_remote_extensions",
|
||||
tenant_id=tenant_id,
|
||||
remote_ext_config=env.ext_remote_storage.to_string(),
|
||||
# config_lines=["log_min_messages=debug3"],
|
||||
)
|
||||
with closing(endpoint.connect()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
# Check that appropriate control files were downloaded
|
||||
cur.execute("SELECT * FROM pg_available_extensions")
|
||||
all_extensions = [x[0] for x in cur.fetchall()]
|
||||
log.info(all_extensions)
|
||||
assert "anon" in all_extensions
|
||||
assert "embedding" in all_extensions
|
||||
# TODO: check that we cant't download custom extensions for other tenant ids
|
||||
|
||||
# check that we can download public extension
|
||||
cur.execute("CREATE EXTENSION embedding")
|
||||
cur.execute("SELECT extname FROM pg_extension")
|
||||
assert "embedding" in [x[0] for x in cur.fetchall()]
|
||||
|
||||
# check that we can download private extension
|
||||
try:
|
||||
cur.execute("CREATE EXTENSION anon")
|
||||
except Exception as err:
|
||||
log.info("error creating anon extension")
|
||||
assert "pgcrypto" in str(err), "unexpected error creating anon extension"
|
||||
|
||||
# TODO: try to load libraries as well
|
||||
|
||||
finally:
|
||||
# Cleaning up downloaded files is important for local tests
|
||||
# or else one test could reuse the files from another test or another test run
|
||||
cleanup_files = [
|
||||
"lib/postgresql/anon.so",
|
||||
"lib/postgresql/embedding.so",
|
||||
"share/postgresql/extension/anon.control",
|
||||
"share/postgresql/extension/embedding--0.1.0.sql",
|
||||
"share/postgresql/extension/embedding.control",
|
||||
]
|
||||
cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files]
|
||||
cleanup_folders = [
|
||||
"extensions",
|
||||
f"pg_install/v{pg_version}/share/postgresql/extension/anon",
|
||||
f"pg_install/v{pg_version}/extensions",
|
||||
]
|
||||
for file in cleanup_files:
|
||||
# check that we can download private extension
|
||||
try:
|
||||
os.remove(file)
|
||||
log.info(f"removed file {file}")
|
||||
cur.execute("CREATE EXTENSION anon")
|
||||
except Exception as err:
|
||||
log.info(f"error removing file {file}: {err}")
|
||||
for folder in cleanup_folders:
|
||||
try:
|
||||
shutil.rmtree(folder)
|
||||
log.info(f"removed folder {folder}")
|
||||
except Exception as err:
|
||||
log.info(f"error removing file {file}: {err}")
|
||||
log.info("error creating anon extension")
|
||||
assert "pgcrypto" in str(err), "unexpected error creating anon extension"
|
||||
|
||||
# TODO: try to load libraries as well
|
||||
|
||||
# Cleaning up downloaded files is important for local tests
|
||||
# or else one test could reuse the files from another test or another test run
|
||||
cleanup_files = [
|
||||
"lib/postgresql/anon.so",
|
||||
"lib/postgresql/embedding.so",
|
||||
"share/postgresql/extension/anon.control",
|
||||
"share/postgresql/extension/embedding--0.1.0.sql",
|
||||
"share/postgresql/extension/embedding.control",
|
||||
]
|
||||
cleanup_files = [f"pg_install/v{pg_version}/" + x for x in cleanup_files]
|
||||
cleanup_folders = [
|
||||
"extensions",
|
||||
f"pg_install/v{pg_version}/share/postgresql/extension/anon",
|
||||
f"pg_install/v{pg_version}/extensions",
|
||||
]
|
||||
for file in cleanup_files:
|
||||
try:
|
||||
os.remove(file)
|
||||
log.info(f"removed file {file}")
|
||||
except Exception as err:
|
||||
log.info(f"error removing file {file}: {err}")
|
||||
for folder in cleanup_folders:
|
||||
try:
|
||||
shutil.rmtree(folder)
|
||||
log.info(f"removed folder {folder}")
|
||||
except Exception as err:
|
||||
log.info(f"error removing file {file}: {err}")
|
||||
|
||||
@@ -60,6 +60,7 @@ url = { version = "2", features = ["serde"] }
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
bytes = { version = "1", features = ["serde"] }
|
||||
cc = { version = "1", default-features = false, features = ["parallel"] }
|
||||
either = { version = "1" }
|
||||
itertools = { version = "0.10" }
|
||||
libc = { version = "0.2", features = ["extra_traits"] }
|
||||
|
||||
Reference in New Issue
Block a user