Compare commits

..

2 Commits

Author SHA1 Message Date
Andrey Taranik
34ca0f4c33 compute docker image multi-platform buiild 2022-01-10 00:01:07 +03:00
Andrey Taranik
63923c1b4e build flow for zenithdb/build:buster 2022-01-09 18:16:54 +03:00
5 changed files with 157 additions and 93 deletions

View File

@@ -443,25 +443,27 @@ jobs:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Login to docker hub
command: echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
- run:
name: Setup buildx
command: docker run -it --rm --privileged tonistiigi/binfmt --install all
# Build zenithdb/compute-tools:latest image and push it to Docker hub
# TODO: this should probably also use versioned tag, not just :latest.
# XXX: but should it? We build and use it only locally now.
- run:
name: Build and push compute-tools Docker image
command: |
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
docker build -t zenithdb/compute-tools:latest ./compute_tools/
docker push zenithdb/compute-tools:latest
command: docker buildx build --platform linux/amd64,linux/arm64 --push -t zenithdb/compute-tools:latest compute_tools
- run:
name: Init postgres submodule
command: git submodule update --init --depth 1
- run:
name: Build and push compute-node Docker image
command: |
echo $DOCKER_PWD | docker login -u $DOCKER_LOGIN --password-stdin
DOCKER_TAG=$(git log --oneline|wc -l)
docker build -t zenithdb/compute-node:latest vendor/postgres && docker push zenithdb/compute-node:latest
docker tag zenithdb/compute-node:latest zenithdb/compute-node:${DOCKER_TAG} && docker push zenithdb/compute-node:${DOCKER_TAG}
docker buildx build --platform linux/amd64,linux/arm64 --push -t zenithdb/compute-node:latest vendor/postgres
docker buildx build --platform linux/amd64,linux/arm64 --push -t zenithdb/compute-node:${DOCKER_TAG} vendor/postgres
deploy-staging:
docker:
@@ -688,10 +690,11 @@ workflows:
branches:
only:
- main
requires:
- pg_regress-tests-release
- other-tests-release
- compute-tools-test
- docker-multi-platform
# requires:
# - pg_regress-tests-release
# - other-tests-release
# - compute-tools-test
- deploy-staging:
# Context gives an ability to login
context: Docker Hub

44
.github/workflows/docker-builder.yml vendored Normal file
View File

@@ -0,0 +1,44 @@
## Build docker image zenithdb/build:buster for linux/adm64 and linux/arm64 platforms
name: docker-builder
on:
push:
branches:
- 'docker-multi-platform'
schedule:
# * is a special character in YAML so you have to quote this string
# buil daily at 5:30am
- cron: '30 5 * * *'
jobs:
docker-builder-buster:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v2
with:
submodules: false
-
name: Set up QEMU
uses: docker/setup-qemu-action@v1
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
-
name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}
-
name: Build and push zenithdb/build:buster
uses: docker/build-push-action@v2
with:
push: true
file: Dockerfile.build
platforms: linux/amd64,linux/arm64
cache-from: type=registry,ref=zenithdb/build:buster
tags: zenithdb/build:buster

View File

@@ -17,8 +17,7 @@ mod proxy;
mod state;
mod waiters;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
fn main() -> anyhow::Result<()> {
let arg_matches = App::new("Zenith proxy/router")
.version(GIT_VERSION)
.arg(
@@ -98,10 +97,20 @@ async fn main() -> anyhow::Result<()> {
println!("Starting mgmt on {}", state.conf.mgmt_address);
let mgmt_listener = tcp_listener::bind(state.conf.mgmt_address)?;
tokio::try_join!(
proxy::thread_main(state, pageserver_listener),
mgmt::thread_main(state, mgmt_listener),
)?;
let threads = [
// Spawn a thread to listen for connections. It will spawn further threads
// for each connection.
thread::Builder::new()
.name("Listener thread".into())
.spawn(move || proxy::thread_main(state, pageserver_listener))?,
thread::Builder::new()
.name("Mgmt thread".into())
.spawn(move || mgmt::thread_main(state, mgmt_listener))?,
];
for t in threads {
t.join().unwrap()?;
}
Ok(())
}

View File

@@ -16,7 +16,7 @@ use crate::{cplane_api::DatabaseInfo, ProxyState};
///
/// Listens for connections, and launches a new handler thread for each.
///
pub async fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow::Result<()> {
pub fn thread_main(state: &'static ProxyState, listener: TcpListener) -> anyhow::Result<()> {
loop {
let (socket, peer_addr) = listener.accept()?;
println!("accepted connection from {}", peer_addr);

View File

@@ -14,7 +14,6 @@ use zenith_utils::postgres_backend::{self, PostgresBackend, ProtoState, Stream};
use zenith_utils::pq_proto::{BeMessage as Be, FeMessage as Fe, *};
use zenith_utils::sock_split::{ReadStream, WriteStream};
#[derive(Clone)]
struct CancelClosure {
socket_addr: SocketAddr,
cancel_token: tokio_postgres::CancelToken,
@@ -36,14 +35,9 @@ lazy_static! {
static ref CANCEL_MAP: Mutex<HashMap<CancelKeyData, CancelClosure>> = Mutex::new(HashMap::new());
}
/// Create new CancelKeyData with backend_pid that doesn't necessarily
/// correspond to the backend_pid of any actual backend.
fn fabricate_cancel_key_data() -> CancelKeyData {
let mut rng = StdRng::from_entropy();
CancelKeyData {
backend_pid: rng.gen(),
cancel_key: rng.gen(),
}
thread_local! {
// Used to clean up the CANCEL_MAP. Might not be necessary if we use tokio thread pool in main loop.
static THREAD_CANCEL_KEY_DATA: Cell<Option<CancelKeyData>> = Cell::new(None);
}
///
@@ -51,7 +45,7 @@ fn fabricate_cancel_key_data() -> CancelKeyData {
///
/// Listens for connections, and launches a new handler thread for each.
///
pub async fn thread_main(
pub fn thread_main(
state: &'static ProxyState,
listener: std::net::TcpListener,
) -> anyhow::Result<()> {
@@ -60,16 +54,23 @@ pub async fn thread_main(
println!("accepted connection from {}", peer_addr);
socket.set_nodelay(true).unwrap();
tokio::task::spawn(async move {
let cancel_key_data = fabricate_cancel_key_data();
let res = tokio::task::spawn(proxy_conn_main(state, socket, cancel_key_data)).await;
CANCEL_MAP.lock().remove(&cancel_key_data);
match res {
Err(join_err) => println!("join error: {}", join_err),
Ok(Err(conn_err)) => println!("connection error: {}", conn_err),
Ok(Ok(())) => {},
}
});
// TODO Use a threadpool instead. Maybe use tokio's threadpool by
// spawning a future into its runtime. Tokio's JoinError should
// allow us to handle cleanup properly even if the future panics.
thread::Builder::new()
.name("Proxy thread".into())
.spawn(move || {
if let Err(err) = proxy_conn_main(state, socket) {
println!("error: {}", err);
}
// Clean up CANCEL_MAP.
THREAD_CANCEL_KEY_DATA.with(|cell| {
if let Some(cancel_key_data) = cell.get() {
CANCEL_MAP.lock().remove(&cancel_key_data);
};
});
})?;
}
}
@@ -80,7 +81,7 @@ struct ProxyConnection {
pgb: PostgresBackend,
}
pub async fn proxy_conn_main(state: &'static ProxyState, socket: TcpStream, cancel_key_data: CancelKeyData) -> anyhow::Result<()> {
pub fn proxy_conn_main(state: &'static ProxyState, socket: TcpStream) -> anyhow::Result<()> {
let conn = ProxyConnection {
state,
psql_session_id: hex::encode(rand::random::<[u8; 8]>()),
@@ -92,7 +93,7 @@ pub async fn proxy_conn_main(state: &'static ProxyState, socket: TcpStream, canc
)?,
};
let (client, server) = match conn.handle_client(cancel_key_data).await? {
let (client, server) = match conn.handle_client()? {
Some(x) => x,
None => return Ok(()),
};
@@ -104,41 +105,28 @@ pub async fn proxy_conn_main(state: &'static ProxyState, socket: TcpStream, canc
_ => panic!("invalid stream type"),
};
proxy(client.split(), server.split()).await
proxy(client.split(), server.split())
}
impl ProxyConnection {
/// Returns Ok(None) when connection was successfully closed.
async fn handle_client(mut self, cancel_key_data: CancelKeyData) -> anyhow::Result<Option<(Stream, TcpStream)>> {
let (username, dbname) = match self.handle_startup().await? {
Some(x) => x,
None => return Ok(None),
};
fn handle_client(mut self) -> anyhow::Result<Option<(Stream, TcpStream)>> {
let mut authenticate = || {
let (username, dbname) = match self.handle_startup()? {
Some(x) => x,
None => return Ok(None),
};
let dbinfo = {
if true || username.ends_with("@zenith") {
// Both scenarios here should end up producing database credentials
if username.ends_with("@zenith") {
self.handle_existing_user(&username, &dbname).map(Some)
} else {
self.handle_new_user().map(Some)
}
};
// let mut authenticate = || async {
// let (username, dbname) = match self.handle_startup().await? {
// Some(x) => x,
// None => return Ok(None),
// };
// // Both scenarios here should end up producing database credentials
// if true || username.ends_with("@zenith") {
// self.handle_existing_user(&username, &dbname).map(Some)
// } else {
// self.handle_new_user().map(Some)
// }
// };
let conn = match dbinfo {
Ok(Some(info)) => connect_to_db(info),
let conn = match authenticate() {
Ok(Some(db_info)) => connect_to_db(db_info),
Ok(None) => return Ok(None),
Err(e) => {
// Report the error to the client
@@ -149,8 +137,11 @@ impl ProxyConnection {
// We'll get rid of this once migration to async is complete
let (pg_version, db_stream) = {
let (pg_version, stream, cancel_closure) = conn.await?;
CANCEL_MAP.lock().insert(cancel_key_data, cancel_closure);
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()?;
let (pg_version, stream, cancel_key_data) = runtime.block_on(conn)?;
self.pgb
.write_message(&BeMessage::BackendKeyData(cancel_key_data))?;
let stream = stream.into_std()?;
@@ -170,7 +161,7 @@ impl ProxyConnection {
}
/// Returns Ok(None) when connection was successfully closed.
async fn handle_startup(&mut self) -> anyhow::Result<Option<(String, String)>> {
fn handle_startup(&mut self) -> anyhow::Result<Option<(String, String)>> {
let have_tls = self.pgb.tls_config.is_some();
let mut encrypted = false;
@@ -207,9 +198,12 @@ impl ProxyConnection {
return Ok(Some((get_param("user")?, get_param("database")?)));
}
FeStartupPacket::CancelRequest(cancel_key_data) => {
let entry = CANCEL_MAP.lock().get(&cancel_key_data).map(core::clone::Clone::clone);
if let Some(cancel_closure) = entry {
cancel_closure.try_cancel_query().await;
if let Some(cancel_closure) = CANCEL_MAP.lock().get(&cancel_key_data) {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();
runtime.block_on(cancel_closure.try_cancel_query());
}
return Ok(None);
}
@@ -237,21 +231,14 @@ impl ProxyConnection {
.split_last()
.ok_or_else(|| anyhow!("unexpected password message"))?;
let db_info = DatabaseInfo {
host: "localhost".into(),
port: 5432,
dbname: "postgres".into(),
user: "postgres".into(),
password: Some("postgres".into()),
};
// let cplane = CPlaneApi::new(&self.state.conf.auth_endpoint, &self.state.waiters);
// let db_info = cplane.authenticate_proxy_request(
// user,
// db,
// md5_response,
// &md5_salt,
// &self.psql_session_id,
// )?;
let cplane = CPlaneApi::new(&self.state.conf.auth_endpoint, &self.state.waiters);
let db_info = cplane.authenticate_proxy_request(
user,
db,
md5_response,
&md5_salt,
&self.psql_session_id,
)?;
self.pgb
.write_message_noflush(&Be::AuthenticationOk)?
@@ -300,7 +287,7 @@ fn hello_message(redirect_uri: &str, session_id: &str) -> String {
/// Create a TCP connection to a postgres database, authenticate with it, and receive the ReadyForQuery message
async fn connect_to_db(
db_info: DatabaseInfo,
) -> anyhow::Result<(String, tokio::net::TcpStream, CancelClosure)> {
) -> anyhow::Result<(String, tokio::net::TcpStream, CancelKeyData)> {
// Make raw connection. When connect_raw finishes we've received ReadyForQuery.
let socket_addr = db_info.socket_addr()?;
let mut socket = tokio::net::TcpStream::connect(socket_addr).await?;
@@ -308,21 +295,41 @@ async fn connect_to_db(
// NOTE We effectively ignore some ParameterStatus and NoticeResponse
// messages here. Not sure if that could break something.
let (client, conn) = config.connect_raw(&mut socket, NoTls).await?;
// Save info for potentially cancelling the query later
let mut rng = StdRng::from_entropy();
let cancel_key_data = CancelKeyData {
// HACK We'd rather get the real backend_pid but tokio_postgres doesn't
// expose it and we don't want to do another roundtrip to query
// for it. The client will be able to notice that this is not the
// actual backend_pid, but backend_pid is not used for anything
// so it doesn't matter.
backend_pid: rng.gen(),
cancel_key: rng.gen(),
};
let cancel_closure = CancelClosure {
socket_addr,
cancel_token: client.cancel_token(),
};
CANCEL_MAP.lock().insert(cancel_key_data, cancel_closure);
THREAD_CANCEL_KEY_DATA.with(|cell| {
let prev_value = cell.replace(Some(cancel_key_data));
assert!(
prev_value.is_none(),
"THREAD_CANCEL_KEY_DATA was already set"
);
});
let version = conn.parameter("server_version").unwrap();
Ok((version.into(), socket, cancel_closure))
Ok((version.into(), socket, cancel_key_data))
}
/// Concurrently proxy both directions of the client and server connections
async fn proxy(
fn proxy(
(client_read, client_write): (ReadStream, WriteStream),
(server_read, server_write): (ReadStream, WriteStream),
) -> anyhow::Result<()> {
async fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result<u64> {
fn do_proxy(mut reader: impl io::Read, mut writer: WriteStream) -> io::Result<u64> {
/// FlushWriter will make sure that every message is sent as soon as possible
struct FlushWriter<W>(W);
@@ -347,9 +354,10 @@ async fn proxy(
res
}
tokio::try_join!(
do_proxy(client_read, server_write),
do_proxy(server_read, client_write),
)?;
let client_to_server_jh = thread::spawn(move || do_proxy(client_read, server_write));
do_proxy(server_read, client_write)?;
client_to_server_jh.join().unwrap()?;
Ok(())
}