Compare commits

..

1 Commits

Author SHA1 Message Date
Stas Kelvich
f5a4a1eba5 Update README.md
I think installing postgres client is optionall. And stackoverflow link looks a bit strange there.
2022-07-13 22:09:28 +03:00
11 changed files with 420 additions and 546 deletions

View File

@@ -1,13 +0,0 @@
# The binaries are really slow, if you compile them in 'dev' mode with the defaults.
# Enable some optimizations even in 'dev' mode, to make tests faster. The basic
# optimizations enabled by "opt-level=1" don't affect debuggability too much.
#
# See https://www.reddit.com/r/rust/comments/gvrgca/this_is_a_neat_trick_for_getting_good_runtime/
#
[profile.dev.package."*"]
# Set the default for dependencies in Development mode.
opt-level = 3
[profile.dev]
# Turn on a small amount of optimization in Development mode.
opt-level = 1

View File

@@ -11,9 +11,8 @@ defaults:
shell: bash -ex {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1
@@ -171,10 +170,7 @@ jobs:
for bin in $test_exe_paths; do
SRC=$bin
DST=/tmp/neon/test_bin/$(basename $bin)
# We don't need debug symbols for code coverage, so strip them out to make
# the artifact smaller.
strip "$SRC" -o "$DST"
cp "$SRC" "$DST"
echo "$DST" >> /tmp/coverage/binaries.list
done
fi
@@ -445,14 +441,14 @@ jobs:
fi
id: legacy-build-tag
- name: Build neon Docker image
- name: Build compute-tools Docker image
uses: docker/build-push-action@v2
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
GIT_VERSION="${GITHUB_SHA}"
AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
pull: true
push: true
tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}}
@@ -512,9 +508,8 @@ jobs:
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
push: false
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:local
@@ -524,9 +519,8 @@ jobs:
with:
context: .
build-args: |
GIT_VERSION="${{github.sha}}"
AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}"
AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}"
AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
push: true
file: Dockerfile.compute-tools
tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}}
@@ -564,11 +558,7 @@ jobs:
deploy:
runs-on: [ self-hosted, Linux, k8s-runner ]
# We need both storage **and** compute images for deploy, because control plane
# picks the compute version based on the storage version. If it notices a fresh
# storage it may bump the compute version. And if compute image failed to build
# it may break things badly.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
needs: [ docker-image, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'
@@ -611,9 +601,7 @@ jobs:
deploy-proxy:
runs-on: [ self-hosted, Linux, k8s-runner ]
# Compute image isn't strictly required for proxy deploy, but let's still wait for it
# to run all deploy jobs consistently.
needs: [ docker-image, docker-image-compute, calculate-deploy-targets ]
needs: [ docker-image, calculate-deploy-targets ]
if: |
(github.ref_name == 'main' || github.ref_name == 'release') &&
github.event_name != 'workflow_dispatch'

View File

@@ -11,9 +11,8 @@ defaults:
shell: bash -ex {0}
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
RUST_BACKTRACE: 1

View File

@@ -13,9 +13,8 @@ on:
workflow_dispatch:
concurrency:
# Allow only one workflow per any non-`main` branch.
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
test-postgres-client-libs:

774
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -62,13 +62,6 @@ brew install protobuf etcd openssl
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
```
3. Install PostgreSQL Client
```
# from https://stackoverflow.com/questions/44654216/correct-way-to-install-psql-without-full-postgres-on-macos
brew install libpq
brew link --force libpq
```
#### Building on Linux and OSX
1. Build neon and patched postgres

View File

@@ -1,80 +0,0 @@
# Postgres user and database management
We've accumulated a bunch of problems with our approach to role and database management, namely:
1. we don't allow role and database creation from Postgres, and users are complaining about that
2. fine-grained role management is not possible both from Postgres and console
3. web_access and @user are different roles, which creates object access problems in some cases
Right now, we do store users and databases both in console and Postgres, and there are two main reasons for
that:
* we want to be able to authenticate users in proxy against the console without Postgres involvement. Otherwise,
malicious brute force attempts will wake up Postgres (expensive) and may exhaust the Postgres connection pool (deny of service).
* it is handy when we can render console UI without waking up compute (e.g., show database list)
Storing the same information in two systems is a form of replication. And in the current scheme
the console is primary, and Postgres catalog is a replica.
This RFC proposes to address problems 1. and 2. by making Postgres a source of truth for roles/databases and
only caching this info in the console. So using the replication analogy, now the Postgres catalog will be primary, and
the console will be a replica. Problem 3 is a bit different and could be addressed by ditching the web_access
user and using, e.g., JWT auth for the @username user so that we do not introduce a new user (JWT is needed
since we don't know users password).
This RFC doesn't talk about giving root access to the database, which is blocked by a secure runtime setup.
## Overview
* Add `/tenant/$tenant/branch/$branch/refresh_catalog` endpoint to console management API which asks `/get_catalog` and updates cached roles/databases info.
* Whenever user edits list of databases or users postgres signals `compute_ctl` to call `/<...>/refresh_catalog` in the console
* Add password strenght check in our extension
## Postgres behavior
Default user role (@username) should have `CREATE ROLE`, `CREATE DB`, and `BYPASSRLS` privileges. We expose Postgres port
to the open internet, so we need to check passwords strength. We can use the `passwordcheck` extension or do the same
from our extension.
Whenever a user edits a list of databases or users, Postgres sends SIGHUP to `compute_ctl`. `compute_ctl` should write PID to `compute_ctl.pid` file.
## Compute_ctl behavior
Upon `SIGHUP` signal `compute_ctl` should call `/tenant/$tenant/branch/$branch/refresh_catalog` to inform console about changes in the database. The console will circle back and load the data from `/get_catalog` on compute (see next section on why this approach instead of direct PUT/PATH to the console). In the case of `/refresh_catalog` failure, we should retry it N times.
Also `compute_ctl` listens for http `/get_catalog` and returns list of databases and users upon request:
```
/get_catalog: -> {
databases: [{
name: "db1",
owner: "jack"
}],
roles: [{
name: "jack",
rolepassword: "SCRAM-SHA-256..."
}]
}
```
## Console behavior
Whenever the console receives `/refresh_catalog` on the management API it goes to compute and asks for `/get_catalog`. I suggest using this way instead of accepting a list of databases/roles directly to the console endpoint for the following reasons:
* we, anyway, will need console originated call to compute's `/get_catalog` after historical branch creation
* If an intruder gains access to some other `/tenant/$tenant/.../refresh_catalog` he won't be able to change the roles list and will just force an unnecessary reload.
`/refresh_catalog` returns HTTP 200 OK on success.
We should have a button in the admin UI to manually force `/refresh_catalog` in case of data desync.
# Scalability
On my laptop, I can create 4200 roles per second. That corresponds to 363 million roles per day. So both `/get_catalog` can become expensive, and our roles database can snowball. While we can address `/get_catalog` size by catching only the latest changes (e.g., maintain the audit table and drain it by the console), it is still not nice that a single tenant can blow up a multi-tenant console database. I would instead propose to limit the number of databases and roles by some big number like 1000 and bump this limit if somebody asks for it with a legit use case.
# QA:
- Why implement `/get_catalog` instead of sending an SQL query from the console to the compute?
- So far, we do not allow remote superuser access to Postgres, and exposing only endpoints with fixed queries beneath them reduces the attack surface.

View File

@@ -122,7 +122,9 @@ where
download_index_parts(conf, storage, sync_ids)
.await
.remove(&tenant_id)
.ok_or_else(|| anyhow::anyhow!("Missing tenant index parts. This is a bug."))
.ok_or(anyhow::anyhow!(
"Missing tenant index parts. This is a bug."
))
}
/// Retrieves index data from the remote storage for a given timeline.

View File

@@ -83,9 +83,7 @@ impl ElectionLeader {
) -> Result<bool> {
let resp = self.client.leader(election_name).await?;
let kv = resp
.kv()
.ok_or_else(|| anyhow!("failed to get leader response"))?;
let kv = resp.kv().ok_or(anyhow!("failed to get leader response"))?;
let leader = kv.value_str()?;
Ok(leader == candidate_name)

View File

@@ -302,8 +302,6 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder):
class BackgroundCompute(object):
MAX_QUERY_GAP_SECONDS = 2
def __init__(self, index: int, env: NeonEnv, branch: str):
self.index = index
self.env = env
@@ -341,7 +339,7 @@ class BackgroundCompute(object):
# With less sleep, there is a very big chance of not committing
# anything or only 1 xact during test run.
await asyncio.sleep(random.uniform(0, self.MAX_QUERY_GAP_SECONDS))
await asyncio.sleep(2 * random.random())
self.running = False
@@ -358,34 +356,20 @@ async def run_concurrent_computes(env: NeonEnv,
background_tasks = [asyncio.create_task(compute.run()) for compute in computes]
await asyncio.sleep(run_seconds)
log.info("stopping all tasks but one")
for compute in computes[1:]:
compute.stopped = True
await asyncio.gather(*background_tasks[1:])
log.info("stopped all tasks but one")
# work for some time with only one compute -- it should be able to make some xacts
TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3
initial_queries_by_0 = len(computes[0].successful_queries)
log.info(f'Waiting for another query by computes[0], '
f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s')
for _ in range(10 * TIMEOUT_SECONDS):
current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0
if current_queries_by_0 >= 1:
log.info(f'Found {current_queries_by_0} successful queries '
f'by computes[0], completing the test')
break
await asyncio.sleep(0.1)
else:
assert False, "Timed out while waiting for another query by computes[0]"
await asyncio.sleep(8)
computes[0].stopped = True
await asyncio.gather(background_tasks[0])
await asyncio.gather(*background_tasks)
result = await exec_compute_query(env, branch, 'SELECT * FROM query_log')
# we should have inserted something while single compute was running
log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them '
f'by computes[0] after we started stopping the others')
assert len(result) >= 4
log.info(f'Executed {len(result)} queries')
for row in result:
log.info(f'{row[0]} {row[1]} {row[2]}')

View File

@@ -33,9 +33,7 @@ itoa = { version = "0.4", features = ["i128", "std"] }
libc = { version = "0.2", features = ["extra_traits", "std"] }
log = { version = "0.4", default-features = false, features = ["serde", "std"] }
memchr = { version = "2", features = ["std", "use_std"] }
nom = { version = "7", features = ["alloc", "std"] }
num-bigint = { version = "0.4", features = ["std"] }
num-integer = { version = "0.1", default-features = false, features = ["i128", "std"] }
num-integer = { version = "0.1", default-features = false, features = ["i128"] }
num-traits = { version = "0.2", features = ["i128", "std"] }
prost = { version = "0.10", features = ["prost-derive", "std"] }
rand = { version = "0.8", features = ["alloc", "getrandom", "libc", "rand_chacha", "rand_hc", "small_rng", "std", "std_rng"] }
@@ -43,11 +41,10 @@ regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cac
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
scopeguard = { version = "1", features = ["use_std"] }
serde = { version = "1", features = ["alloc", "derive", "serde_derive", "std"] }
time = { version = "0.3", features = ["alloc", "formatting", "itoa", "macros", "parsing", "quickcheck", "quickcheck-dep", "std", "time-macros"] }
tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros", "winapi"] }
tokio = { version = "1", features = ["bytes", "fs", "io-std", "io-util", "libc", "macros", "memchr", "mio", "net", "num_cpus", "once_cell", "process", "rt", "rt-multi-thread", "signal-hook-registry", "socket2", "sync", "time", "tokio-macros"] }
tokio-util = { version = "0.7", features = ["codec", "io"] }
tracing = { version = "0.1", features = ["attributes", "log", "std", "tracing-attributes"] }
tracing-core = { version = "0.1", features = ["lazy_static", "std", "valuable"] }
tracing-core = { version = "0.1", features = ["lazy_static", "std"] }
[build-dependencies]
ahash = { version = "0.7", features = ["std"] }
@@ -60,7 +57,6 @@ indexmap = { version = "1", default-features = false, features = ["std"] }
libc = { version = "0.2", features = ["extra_traits", "std"] }
log = { version = "0.4", default-features = false, features = ["serde", "std"] }
memchr = { version = "2", features = ["std", "use_std"] }
nom = { version = "7", features = ["alloc", "std"] }
prost = { version = "0.10", features = ["prost-derive", "std"] }
regex = { version = "1", features = ["aho-corasick", "memchr", "perf", "perf-cache", "perf-dfa", "perf-inline", "perf-literal", "std", "unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }
regex-syntax = { version = "0.6", features = ["unicode", "unicode-age", "unicode-bool", "unicode-case", "unicode-gencat", "unicode-perl", "unicode-script", "unicode-segment"] }