Compare commits

..

47 Commits

Author SHA1 Message Date
Jere Vaara
ab72f94599 create pg_session_jwt extension if local proxy spec has jwks 2024-10-09 18:48:47 +03:00
Arseny Sher
a181392738 safekeeper: add evicted_timelines gauge. (#9318)
showing total number of evicted timelines.
2024-10-09 14:40:30 +03:00
Alexander Bayandin
fc7397122c test_runner: fix path to tpc-h queries (#9327)
## Problem

The path to TPC-H queries was incorrectly changed in #9306.
This path is used for `test_tpch` parameterization, so all perf tests
started to fail:

```
==================================== ERRORS ====================================
__________ ERROR collecting test_runner/performance/test_perf_olap.py __________
test_runner/performance/test_perf_olap.py:205: in <module>
    @pytest.mark.parametrize("query", tpch_queuies())
test_runner/performance/test_perf_olap.py:196: in tpch_queuies
    assert queries_dir.exists(), f"TPC-H queries dir not found: {queries_dir}"
E   AssertionError: TPC-H queries dir not found: /__w/neon/neon/test_runner/performance/performance/tpc-h/queries
E   assert False
E    +  where False = <bound method Path.exists of PosixPath('/__w/neon/neon/test_runner/performance/performance/tpc-h/queries')>()
E    +    where <bound method Path.exists of PosixPath('/__w/neon/neon/test_runner/performance/performance/tpc-h/queries')> = PosixPath('/__w/neon/neon/test_runner/performance/performance/tpc-h/queries').exists
```

## Summary of changes
- Fix the path to tpc-h queries
2024-10-09 12:11:06 +01:00
Vlad Lazar
cc599e23c1 storcon: make observed state updates more granular (#9276)
## Problem

Previously, observed state updates from the reconciler may have
clobbered inline changes made to the observed state by other code paths.

## Summary of changes

Model observed state changes from reconcilers as deltas. This means that
we only update what has changed. Handling for node going off-line concurrently
during the reconcile is also added: set observed state to None in such cases to
respect the convention.

Closes https://github.com/neondatabase/neon/issues/9124
2024-10-09 11:53:29 +01:00
Folke Behrens
54d1185789 proxy: Unalias hyper1 and replace one use of hyper0 in test (#9324)
Leaves one final use of hyper0 in proxy for the health service,
which requires some coordinated effort with other services.
2024-10-09 12:44:17 +02:00
Heikki Linnakangas
8a138db8b7 tests: Reduce noise from logging renamed files (#9315)
Instead of printing the full absolute path for every file, print just
the filenames.

Before:

    2024-10-08 13:19:39.98 INFO [test_pageserver_generations.py:669] Found file /home/heikki/git-sandbox/neon/test_output/test_upgrade_generationless_local_file_paths[debug-pg16]/repo/pageserver_1/tenants/0c04a8df7691a367ad0bb1cc1373ba4d/timelines/f41022551e5f96ce8dbefb9b5d35ab45/000000067F0000000100000A8D0100000000-000000067F0000000100000AC10000000002__00000000014F16F0-v1-00000001
    2024-10-08 13:19:39.99 INFO [test_pageserver_generations.py:673] Renamed /home/heikki/git-sandbox/neon/test_output/test_upgrade_generationless_local_file_paths[debug-pg16]/repo/pageserver_1/tenants/0c04a8df7691a367ad0bb1cc1373ba4d/timelines/f41022551e5f96ce8dbefb9b5d35ab45/000000067F0000000100000A8D0100000000-000000067F0000000100000AC10000000002__00000000014F16F0-v1-00000001 -> /home/heikki/git-sandbox/neon/test_output/test_upgrade_generationless_local_file_paths[debug-pg16]/repo/pageserver_1/tenants/0c04a8df7691a367ad0bb1cc1373ba4d/timelines/f41022551e5f96ce8dbefb9b5d35ab45/000000067F0000000100000A8D0100000000-000000067F0000000100000AC10000000002__00000000014F16F0

After:

    2024-10-08 13:24:39.726 INFO [test_pageserver_generations.py:667] Renaming files in /home/heikki/git-sandbox/neon/test_output/test_upgrade_generationless_local_file_paths[debug-pg16]/repo/pageserver_1/tenants/3439538816c520adecc541cc8b1de21c/timelines/6a7be8ee707b355de48dd91b326d6ae1
    2024-10-08 13:24:39.728 INFO [test_pageserver_generations.py:673] Renamed
000000067F0000000100000A8D0100000000-000000067F0000000100000AC10000000002__00000000014F16F0-v1-00000001 -> 000000067F0000000100000A8D0100000000-000000067F0000000100000AC10000000002__00000000014F16F0
2024-10-09 10:55:56 +01:00
Erik Grinaker
211970f0e0 remote_storage: add DownloadOpts::byte_(start|end) (#9293)
`download_byte_range()` is basically a copy of `download()` with an
additional option passed to the backend SDKs. This can cause these code
paths to diverge, and prevents combining various options.

This patch adds `DownloadOpts::byte_(start|end)` and move byte range
handling into `download()`.
2024-10-09 10:29:06 +01:00
Heikki Linnakangas
f87f5a383e tests: Remove redundant log lines when starting an endpoint (#9316)
The "Starting postgres endpoint <name>" message is not needed, because
the neon_cli.py prints the neon_local command line used to start the
endpoint. That contains the same information. The "Postgres startup took
XX seconds" message is not very useful because no one pays attention to
those in the python test logs when things are going smoothly, and if you
do wonder about the startup speed, the same information and more can be
found in the compute log.

Before:

    2024-10-07 22:32:27.794 INFO [neon_fixtures.py:3492] Starting postgres endpoint ep-1
    2024-10-07 22:32:27.794 INFO [neon_cli.py:73] Running command "/tmp/neon/bin/neon_local endpoint start --safekeepers 1 ep-1"
    2024-10-07 22:32:27.901 INFO [neon_fixtures.py:3690] Postgres startup took 0.11398935317993164 seconds

After:

    2024-10-07 22:32:27.794 INFO [neon_cli.py:73] Running command "/tmp/neon/bin/neon_local endpoint start --safekeepers 1 ep-1"
2024-10-09 09:58:50 +01:00
Arpad Müller
e8ae37652b Add timeline offload mechanism (#8907)
Implements an initial mechanism for offloading of archived timelines.

Offloading is implemented as specified in the RFC.

For now, there is no persistence, so a restart of the pageserver will
retrigger downloads until the timeline is offloaded again.

We trigger offloading in the compaction loop because we need the signal
for whether compaction is done and everything has been uploaded or not.

Part of #8088
2024-10-09 01:33:39 +02:00
Tristan Partin
5bd8e2363a Enable all pyupgrade checks in ruff
This will help to keep us from using deprecated Python features going
forward.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-10-08 14:32:26 -05:00
Vlad Lazar
618680c299 storcon: apply all node status changes before handling transitions (#9281)
## Problem

When a node goes offline, we trigger reconciles to migrate shards away
from it. If multiple nodes go offline at the same time, we handled them in
sequence. Hence, we might migrate shards from the first offline node to the second
offline node and increase the unavailability period.

## Summary of changes

Refactor heartbeat delta handling to:
1. Update in memory state for all nodes first
2. Handle availability transitions one by one (we have full picture for each node after (1))

Closes https://github.com/neondatabase/neon/issues/9126
2024-10-08 17:55:25 +01:00
Alexander Bayandin
baf27ba6a3 Fix compiler warnings on macOS (#9319)
## Problem

On macOS:
```
/Users/runner/work/neon/neon//pgxn/neon/file_cache.c:623:19: error: variable 'has_remaining_pages' is used uninitialized whenever 'for' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized]
```

## Summary of changes
- Initialise `has_remaining_pages` with `false`
2024-10-08 17:34:35 +01:00
Tristan Partin
16417d919d Remove get_self_dir()
It didn't serve much value, and was only used twice.
Path(__file__).parent is a pretty easy invocation to use.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-10-08 08:57:11 -05:00
Heikki Linnakangas
18b97150b2 Remove non-existent entries from .dockerignore (#9209) 2024-10-08 14:55:24 +03:00
Heikki Linnakangas
17c59ed786 Don't override CFLAGS when building neon extension
If you override CFLAGS, you also override any flags that PostgreSQL
configure script had picked. That includes many options that enable
extra compiler warnings, like '-Wall', '-Wmissing-prototypes', and so
forth. The override was added in commit 171385ac14, but the intention
of that was to be *more* strict, by enabling '-Werror', not less
strict. The proper way of setting '-Werror', as documented in the docs
and mentioned in PR #2405, is to set COPT='-Werror', but leave CFLAGS
alone.

All the compiler warnings with the standard PostgreSQL flags have now
been fixed, so we can do this without adding noise.

Part of the cleanup issue #9217.
2024-10-07 23:49:33 +03:00
Heikki Linnakangas
d7b960c9b5 Silence compiler warning about using variable uninitialized
It's not a bug, the variable is initialized when it's used, but the
compiler isn't smart enough to see that through all the conditions.

Part of the cleanup issue #9217.
2024-10-07 23:49:31 +03:00
Heikki Linnakangas
2ff6d2b6b5 Silence compiler warning about variable only used in assertions
Part of the cleanup issue #9217.
2024-10-07 23:49:29 +03:00
Heikki Linnakangas
30f7fbc88d Add pg_attribute_printf to WalProposerLibLog, per gcc's suggestion
/pgxn/neon/walproposer_compat.c:192:9: warning: function ‘WalProposerLibLog’ might be a candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format]
      192 |         vsnprintf(buf, sizeof(buf), fmt, args);
          |         ^~~~~~~~~
2024-10-07 23:49:27 +03:00
Heikki Linnakangas
09f2000f91 Silence warnings about shadowed local variables
Part of the cleanup issue #9217.
2024-10-07 23:49:24 +03:00
Heikki Linnakangas
e553ca9e4f Silence warnings about mixed declarations and code
The warning:

    warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]

It's PostgreSQL project style to stick to the old C90 style.
(Alternatively, we could disable it for our extension.)

Part of the cleanup issue #9217.
2024-10-07 23:49:22 +03:00
Heikki Linnakangas
0a80dbce83 neon_write() function is not used on v17
ifdef it out on v17, to silence compiler warning.

Part of the cleanup issue #9217.
2024-10-07 23:49:20 +03:00
Heikki Linnakangas
e763256448 Fix warnings about missing function prototypes
Prototypes for neon_writev(), neon_readv(), and neon_regisersync()
were missing. But instead of adding the missing prototypes, mark all
the smgr functions 'static'.

Part of the cleanup issue #9217.
2024-10-07 23:49:18 +03:00
Heikki Linnakangas
129d4480bb Move "/* fallthrough */" comments so that GCC recognizes them
This silences warnings about implicit fallthroughs.

Part of the cleanup issue #9217.
2024-10-07 23:49:16 +03:00
Heikki Linnakangas
776df963ba Fix function prototypes
Silences these compiler warnings:

    /pgxn/neon_walredo/walredoproc.c:452:1: warning: ‘CreateFakeSharedMemoryAndSemaphores’ was used with no prototype before its definition [-Wmissing-prototypes]
      452 | CreateFakeSharedMemoryAndSemaphores()
          | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    /pgxn/neon/walproposer_pg.c:541:1: warning: no previous prototype for ‘GetWalpropShmemState’ [-Wmissing-prototypes]
      541 | GetWalpropShmemState()
          | ^~~~~~~~~~~~~~~~~~~~

Part of the cleanup issue #9217.
2024-10-07 23:49:13 +03:00
Heikki Linnakangas
11dc5feb36 Remove unused static function
In v16 merge, we copied much of heap RMGR, to distinguish vanilla
Postgres heap records from records generated with neon patches, with
the additional CID fields. This function is only used by the
HEAP_TRUNCATE records, however, which we didn't need to copy.

Part of the cleanup issue #9217.
2024-10-07 23:49:11 +03:00
Heikki Linnakangas
dbbe57a837 Remove unused local vars and a prototype for non-existent function
Per compiler warnings. Part of the cleanup issue #9217.
2024-10-07 23:49:09 +03:00
Em Sharnoff
cc29def544 vm-monitor: Ignore LFC in postgres cgroup memory threshold (#8668)
In short: Currently we reserve 75% of memory to the LFC, meaning that if
we scale up to keep postgres using less than 25% of the compute's
memory.

This means that for certain memory-heavy workloads, we end up scaling
much higher than is actually needed — in the worst case, up to 4x,
although in practice it tends not to be quite so bad.

Part of neondatabase/autoscaling#1030.
2024-10-07 21:25:34 +01:00
Arpad Müller
912d47ec02 storage_broker: update hyper and tonic again (#9299)
Update hyper and tonic again in the storage broker, this time with a fix
for the issue that made us revert the update last time.

The first commit is a revert of #9268, the second a fix for the issue.

fixes #9231.
2024-10-07 21:12:13 +02:00
Tristan Partin
6eba29c732 Improve logging on changes in a compute's status
I'm trying to debug a situation with the LR benchmark publisher not
being in the correct state. This should aid in debugging, while just
being generally useful.

PR: https://github.com/neondatabase/neon/pull/9265
Signed-off-by: Tristan Partin <tristan@neon.tech>
2024-10-07 13:19:48 -04:00
Heikki Linnakangas
99d4c1877b Replace BUFFERTAGS_EQUAL compatibility macro with new-style function (#9294)
In PostgreSQL v16, BUFFERTAGS_EQUAL was replaced with a static inline
macro, BufferTagsEqual. Let's use the new name going forward, and have
backwards-compatibility glue to allow using the new name on v14 and v15,
rather than the other way round. This also makes BufferTagsEquals
consistent with InitBufferTag, for which we were already using the new
name.
2024-10-07 19:49:27 +03:00
Jere Vaara
2272dc8a48 feat(compute_tools): Create JWKS Postgres roles without attributes (#9031)
Requires https://github.com/neondatabase/neon/pull/9086 first to have
`local_proxy_config`. This logic can still be reviewed implementation
wise.

Create JWT Auth functionality related roles without attributes and
`neon_superuser` group.

Read the JWT related roles from `local_proxy_config` `JWKS` settings and
handle them differently than other console created roles.
2024-10-07 19:37:32 +03:00
Heikki Linnakangas
323bd018cd Make sure BufferTag padding bytes are cleared in hash keys (#9292)
The prefetch-queue hash table uses a BufferTag struct as the hash key,
and it's hashed using hash_bytes(). It's important that all the padding
bytes in the key are cleared, because hash_bytes() will include them.

I was getting compiler warnings like this on v14 and v15, when compiling
with -Warray-bounds:

    In function ‘prfh_lookup_hash_internal’,
inlined from ‘prfh_lookup’ at
pg_install/v14/include/postgresql/server/lib/simplehash.h:821:9,
inlined from ‘neon_read_at_lsnv’ at pgxn/neon/pagestore_smgr.c:2789:11,
inlined from ‘neon_read_at_lsn’ at pgxn/neon/pagestore_smgr.c:2904:2:
pg_install/v14/include/postgresql/server/storage/relfilenode.h:90:43:
warning: array subscript ‘PrefetchRequest[0]’ is partly outside array
bounds of ‘BufferTag[1]’ {aka ‘struct buftag[1]’} [-Warray-bounds]
       89 |         ((node1).relNode == (node2).relNode && \
          |         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       90 |          (node1).dbNode == (node2).dbNode && \
          |          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
       91 |          (node1).spcNode == (node2).spcNode)
          |          ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
pg_install/v14/include/postgresql/server/storage/buf_internals.h:116:9:
note: in expansion of macro ‘RelFileNodeEquals’
      116 |         RelFileNodeEquals((a).rnode, (b).rnode) && \
          |         ^~~~~~~~~~~~~~~~~
pgxn/neon/neon_pgversioncompat.h:25:31: note: in expansion of macro
‘BUFFERTAGS_EQUAL’
       25 | #define BufferTagsEqual(a, b) BUFFERTAGS_EQUAL(*(a), *(b))
          |                               ^~~~~~~~~~~~~~~~
pgxn/neon/pagestore_smgr.c:220:34: note: in expansion of macro
‘BufferTagsEqual’
220 | #define SH_EQUAL(tb, a, b) (BufferTagsEqual(&(a)->buftag,
&(b)->buftag))
          |                                  ^~~~~~~~~~~~~~~
pg_install/v14/include/postgresql/server/lib/simplehash.h:280:77: note:
in expansion of macro ‘SH_EQUAL’
280 | #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash ==
SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
| ^~~~~~~~
pg_install/v14/include/postgresql/server/lib/simplehash.h:799:21: note:
in expansion of macro ‘SH_COMPARE_KEYS’
      799 |                 if (SH_COMPARE_KEYS(tb, hash, key, entry))
          |                     ^~~~~~~~~~~~~~~
    pgxn/neon/pagestore_smgr.c: In function ‘neon_read_at_lsn’:
    pgxn/neon/pagestore_smgr.c:2742:25: note: object ‘buftag’ of size 20
     2742 |         BufferTag       buftag = {0};
          |                         ^~~~~~

This commit silences those warnings, although it's not clear to me why
the compiler complained like that in the first place. I found the issue
with padding bytes while looking into those warnings, but that was
coincidental, I don't think the padding bytes explain the warnings as
such.

In v16, the BUFFERTAGS_EQUAL macro was replaced with a static inline
function, and that also silences the compiler warning. Not clear to me
why.
2024-10-07 18:04:04 +03:00
Folke Behrens
ad267d849f proxy: Move module base files into module directory (#9297) 2024-10-07 16:25:34 +02:00
Conrad Ludgate
8cd7b5bf54 proxy: rename console -> control_plane, rename web -> console_redirect (#9266)
rename console -> control_plane
rename web -> console_redirect

I think these names are a little more representative.
2024-10-07 14:09:54 +01:00
Konstantin Knizhnik
47c3c9a413 Fix update of statistic for LFC/prefetch (#9272)
## Problem

See #9199

## Summary of changes

Fix update of hits/misses for LFC and prefetch introduced in
78938d1b59

## Checklist before requesting a review

- [ ] I have performed a self-review of my code.
- [ ] If it is a core feature, I have added thorough tests.
- [ ] Do we need to implement analytics? if so did you add the relevant
metrics to the dashboard?
- [ ] If this PR requires public announcement, mark it with
/release-notes label and add several sentences in this section.

## Checklist before merging

- [ ] Do not forget to reformat commit message to not include the above
checklist

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2024-10-07 12:21:16 +03:00
Arseny Sher
eae4470bb6 safekeeper: remove local WAL files ignoring peer_horizon_lsn. (#8900)
If peer safekeeper needs garbage collected segment it will be fetched
now from s3 using on-demand WAL download. Reduces danger of running out of disk space when safekeeper fails.
2024-10-04 19:07:39 +03:00
Ivan Efremov
2d248aea6f proxy: exclude triple logging of connect compute errors (#9277)
Fixes (#9020)
 - Use the compute::COULD_NOT_CONNECT for connection error message;
 - Eliminate logging for one connection attempt;
 - Typo fix.
2024-10-04 18:21:39 +03:00
Conrad Ludgate
6c05f89f7d proxy: add local-proxy to compute image (#8823)
1. Adds local-proxy to compute image and vm spec
2. Updates local-proxy config processing, writing PID to a file eagerly
3. Updates compute-ctl to understand local proxy compute spec and to
send SIGHUP to local-proxy over that pid.

closes https://github.com/neondatabase/cloud/issues/16867
2024-10-04 14:52:01 +00:00
Arseny Sher
db53f98725 neon walsender_hooks: take basebackup LSN directly. (#9263)
NeonWALReader needs to know LSN before which WAL is not available
locally, that is, basebackup LSN. Previously it was taken from
WalpropShmemState, but that's racy, as walproposer sets its there only
after successfull election. Get it directly with GetRedoStartLsn.

Should fix flakiness of
test_ondemand_wal_download_in_replication_slot_funcs etc.

ref #9201
2024-10-04 14:56:15 +01:00
Erik Grinaker
04a6222418 remote_storage: add head_object integration test (#9274) 2024-10-04 12:40:41 +01:00
Vlad Lazar
dcf7af5a16 storcon: do timeline creation on all attached location (#9237)
## Problem

Creation of a timelines during a reconciliation can lead to
unavailability if the user attempts to
start a compute before the storage controller has notified cplane of the
cut-over.

## Summary of changes

Create timelines on all currently attached locations. For the latest
location, we still look
at the database (this is a previously). With this change we also look
into the observed state
to find *other* attached locations.

Related https://github.com/neondatabase/neon/issues/9144
2024-10-04 11:56:43 +01:00
Erik Grinaker
37158d0424 pageserver: use conditional GET for secondary tenant heatmaps (#9236)
## Problem

Secondary tenant heatmaps were always downloaded, even when they hadn't
changed. This can be avoided by using a conditional GET request passing
the `ETag` of the previous heatmap.

## Summary of changes

The `ETag` was already plumbed down into the heatmap downloader, and
just needed further plumbing into the remote storage backends.

* Add a `DownloadOpts` struct and pass it to
`RemoteStorage::download()`.
* Add an optional `DownloadOpts::etag` field, which uses a conditional
GET and returns `DownloadError::Unmodified` on match.
2024-10-04 12:29:48 +02:00
Erik Grinaker
60fb840e1f Cargo.toml: enable sso for aws-config (#9261)
## Problem

The S3 tests couldn't use SSO authentication for local tests against S3.

## Summary of changes

Enable the `sso` feature of `aws-config`. Also run `cargo hakari
generate` which made some updates to `workspace_hack`.
2024-10-04 11:27:06 +01:00
Heikki Linnakangas
52232dd85c tests: Add a comment explaining the rules of NeonLocalCli wrappers (#9195) 2024-10-03 22:03:29 +03:00
Heikki Linnakangas
8ef0c38b23 tests: Rename NeonLocalCli functions to match the 'neon_local' commands (#9195)
This makes it more clear that the functions in NeonLocalCli are just
typed wrappers around the corresponding 'neon_local' commands.
2024-10-03 22:03:27 +03:00
Heikki Linnakangas
56bb1ac458 tests: Move NeonCli and friends to separate file (#9195)
In the passing, rename it to NeonLocalCli, to reflect that the binary
is called 'neon_local'.

Add wrapper for the 'timeline_import' command, eliminating the last
raw call to the raw_cli() function from tests, except for a few in
test_neon_cli.py which are about testing the 'neon_local' iteself. All
the other calls are now made through the strongly-typed wrapper
functions
2024-10-03 22:03:25 +03:00
Heikki Linnakangas
19db9e9aad tests: Replace direct calls to neon_cli with wrappers in NeonEnv (#9195)
Add wrappers for a few commands that didn't have them before. Move the
logic to generate tenant and timeline IDs from NeonCli to the callers,
so that NeonCli is more purely just a type-safe wrapper around
'neon_local'.
2024-10-03 22:03:22 +03:00
311 changed files with 4812 additions and 3104 deletions

View File

@@ -5,9 +5,7 @@
!Cargo.toml !Cargo.toml
!Makefile !Makefile
!rust-toolchain.toml !rust-toolchain.toml
!scripts/combine_control_files.py
!scripts/ninstall.sh !scripts/ninstall.sh
!vm-cgconfig.conf
!docker-compose/run-tests.sh !docker-compose/run-tests.sh
# Directories # Directories
@@ -17,15 +15,12 @@
!compute_tools/ !compute_tools/
!control_plane/ !control_plane/
!libs/ !libs/
!neon_local/
!pageserver/ !pageserver/
!patches/
!pgxn/ !pgxn/
!proxy/ !proxy/
!storage_scrubber/ !storage_scrubber/
!safekeeper/ !safekeeper/
!storage_broker/ !storage_broker/
!storage_controller/ !storage_controller/
!trace/
!vendor/postgres-*/ !vendor/postgres-*/
!workspace_hack/ !workspace_hack/

View File

@@ -33,7 +33,7 @@ jobs:
github-event-name: ${{ github.event_name }} github-event-name: ${{ github.event_name }}
cancel-previous-e2e-tests: cancel-previous-e2e-tests:
needs: [ check-permissions, promote-images, tag ] needs: [ check-permissions ]
if: github.event_name == 'pull_request' if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04 runs-on: ubuntu-22.04
@@ -518,7 +518,7 @@ jobs:
trigger-e2e-tests: trigger-e2e-tests:
if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }} if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' }}
needs: [ check-permissions, promote-images, tag, cancel-previous-e2e-tests ] needs: [ check-permissions, promote-images, tag ]
uses: ./.github/workflows/trigger-e2e-tests.yml uses: ./.github/workflows/trigger-e2e-tests.yml
secrets: inherit secrets: inherit

272
Cargo.lock generated
View File

@@ -666,34 +666,6 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "axum"
version = "0.6.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf"
dependencies = [
"async-trait",
"axum-core 0.3.4",
"bitflags 1.3.2",
"bytes",
"futures-util",
"http 0.2.9",
"http-body 0.4.5",
"hyper 0.14.30",
"itoa",
"matchit 0.7.0",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"sync_wrapper 0.1.2",
"tower",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "axum" name = "axum"
version = "0.7.5" version = "0.7.5"
@@ -701,7 +673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"axum-core 0.4.5", "axum-core",
"base64 0.21.1", "base64 0.21.1",
"bytes", "bytes",
"futures-util", "futures-util",
@@ -731,23 +703,6 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "axum-core"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
dependencies = [
"async-trait",
"bytes",
"futures-util",
"http 0.2.9",
"http-body 0.4.5",
"mime",
"rustversion",
"tower-layer",
"tower-service",
]
[[package]] [[package]]
name = "axum-core" name = "axum-core"
version = "0.4.5" version = "0.4.5"
@@ -971,7 +926,7 @@ dependencies = [
"clang-sys", "clang-sys",
"itertools 0.12.1", "itertools 0.12.1",
"log", "log",
"prettyplease 0.2.17", "prettyplease",
"proc-macro2", "proc-macro2",
"quote", "quote",
"regex", "regex",
@@ -1265,6 +1220,7 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes", "bytes",
"camino",
"cfg-if", "cfg-if",
"chrono", "chrono",
"clap", "clap",
@@ -2453,15 +2409,6 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "hostname" name = "hostname"
version = "0.4.0" version = "0.4.0"
@@ -2656,14 +2603,15 @@ dependencies = [
[[package]] [[package]]
name = "hyper-timeout" name = "hyper-timeout"
version = "0.4.1" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
dependencies = [ dependencies = [
"hyper 0.14.30", "hyper 1.4.1",
"hyper-util",
"pin-project-lite", "pin-project-lite",
"tokio", "tokio",
"tokio-io-timeout", "tower-service",
] ]
[[package]] [[package]]
@@ -3469,7 +3417,7 @@ dependencies = [
"opentelemetry-http", "opentelemetry-http",
"opentelemetry-proto", "opentelemetry-proto",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost 0.13.3", "prost",
"reqwest 0.12.4", "reqwest 0.12.4",
"thiserror", "thiserror",
] ]
@@ -3482,8 +3430,8 @@ checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9"
dependencies = [ dependencies = [
"opentelemetry", "opentelemetry",
"opentelemetry_sdk", "opentelemetry_sdk",
"prost 0.13.3", "prost",
"tonic 0.12.3", "tonic",
] ]
[[package]] [[package]]
@@ -4177,16 +4125,6 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "prettyplease"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86"
dependencies = [
"proc-macro2",
"syn 1.0.109",
]
[[package]] [[package]]
name = "prettyplease" name = "prettyplease"
version = "0.2.17" version = "0.2.17"
@@ -4257,16 +4195,6 @@ dependencies = [
"thiserror", "thiserror",
] ]
[[package]]
name = "prost"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
dependencies = [
"bytes",
"prost-derive 0.11.9",
]
[[package]] [[package]]
name = "prost" name = "prost"
version = "0.13.3" version = "0.13.3"
@@ -4274,42 +4202,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
dependencies = [ dependencies = [
"bytes", "bytes",
"prost-derive 0.13.3", "prost-derive",
] ]
[[package]] [[package]]
name = "prost-build" name = "prost-build"
version = "0.11.9" version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
dependencies = [ dependencies = [
"bytes", "bytes",
"heck 0.4.1", "heck 0.5.0",
"itertools 0.10.5", "itertools 0.12.1",
"lazy_static",
"log", "log",
"multimap", "multimap",
"once_cell",
"petgraph", "petgraph",
"prettyplease 0.1.25", "prettyplease",
"prost 0.11.9", "prost",
"prost-types", "prost-types",
"regex", "regex",
"syn 1.0.109", "syn 2.0.52",
"tempfile", "tempfile",
"which",
]
[[package]]
name = "prost-derive"
version = "0.11.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
dependencies = [
"anyhow",
"itertools 0.10.5",
"proc-macro2",
"quote",
"syn 1.0.109",
] ]
[[package]] [[package]]
@@ -4327,11 +4241,11 @@ dependencies = [
[[package]] [[package]]
name = "prost-types" name = "prost-types"
version = "0.11.9" version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
dependencies = [ dependencies = [
"prost 0.11.9", "prost",
] ]
[[package]] [[package]]
@@ -5093,6 +5007,21 @@ dependencies = [
"zeroize", "zeroize",
] ]
[[package]]
name = "rustls"
version = "0.23.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebbbdb961df0ad3f2652da8f3fdc4b36122f568f968f45ad3316f26c025c677b"
dependencies = [
"log",
"once_cell",
"ring",
"rustls-pki-types",
"rustls-webpki 0.102.2",
"subtle",
"zeroize",
]
[[package]] [[package]]
name = "rustls-native-certs" name = "rustls-native-certs"
version = "0.6.2" version = "0.6.2"
@@ -5118,6 +5047,19 @@ dependencies = [
"security-framework", "security-framework",
] ]
[[package]]
name = "rustls-native-certs"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a"
dependencies = [
"openssl-probe",
"rustls-pemfile 2.1.1",
"rustls-pki-types",
"schannel",
"security-framework",
]
[[package]] [[package]]
name = "rustls-pemfile" name = "rustls-pemfile"
version = "1.0.2" version = "1.0.2"
@@ -5193,6 +5135,7 @@ dependencies = [
"fail", "fail",
"futures", "futures",
"hex", "hex",
"http 1.1.0",
"humantime", "humantime",
"hyper 0.14.30", "hyper 0.14.30",
"metrics", "metrics",
@@ -5749,19 +5692,22 @@ version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-stream", "async-stream",
"bytes",
"clap", "clap",
"const_format", "const_format",
"futures", "futures",
"futures-core", "futures-core",
"futures-util", "futures-util",
"http-body-util",
"humantime", "humantime",
"hyper 0.14.30", "hyper 1.4.1",
"hyper-util",
"metrics", "metrics",
"once_cell", "once_cell",
"parking_lot 0.12.1", "parking_lot 0.12.1",
"prost 0.11.9", "prost",
"tokio", "tokio",
"tonic 0.9.2", "tonic",
"tonic-build", "tonic-build",
"tracing", "tracing",
"utils", "utils",
@@ -6305,6 +6251,17 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tokio-rustls"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
dependencies = [
"rustls 0.23.7",
"rustls-pki-types",
"tokio",
]
[[package]] [[package]]
name = "tokio-stream" name = "tokio-stream"
version = "0.1.16" version = "0.1.16"
@@ -6396,29 +6353,30 @@ dependencies = [
[[package]] [[package]]
name = "tonic" name = "tonic"
version = "0.9.2" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum 0.6.20", "axum",
"base64 0.21.1", "base64 0.22.1",
"bytes", "bytes",
"futures-core", "h2 0.4.4",
"futures-util", "http 1.1.0",
"h2 0.3.26", "http-body 1.0.0",
"http 0.2.9", "http-body-util",
"http-body 0.4.5", "hyper 1.4.1",
"hyper 0.14.30",
"hyper-timeout", "hyper-timeout",
"hyper-util",
"percent-encoding", "percent-encoding",
"pin-project", "pin-project",
"prost 0.11.9", "prost",
"rustls-native-certs 0.6.2", "rustls-native-certs 0.8.0",
"rustls-pemfile 1.0.2", "rustls-pemfile 2.1.1",
"socket2",
"tokio", "tokio",
"tokio-rustls 0.24.0", "tokio-rustls 0.26.0",
"tokio-stream", "tokio-stream",
"tower", "tower",
"tower-layer", "tower-layer",
@@ -6427,37 +6385,17 @@ dependencies = [
] ]
[[package]] [[package]]
name = "tonic" name = "tonic-build"
version = "0.12.3" version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
dependencies = [ dependencies = [
"async-trait", "prettyplease",
"base64 0.22.1",
"bytes",
"http 1.1.0",
"http-body 1.0.0",
"http-body-util",
"percent-encoding",
"pin-project",
"prost 0.13.3",
"tokio-stream",
"tower-layer",
"tower-service",
"tracing",
]
[[package]]
name = "tonic-build"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07"
dependencies = [
"prettyplease 0.1.25",
"proc-macro2", "proc-macro2",
"prost-build", "prost-build",
"prost-types",
"quote", "quote",
"syn 1.0.109", "syn 2.0.52",
] ]
[[package]] [[package]]
@@ -6863,7 +6801,7 @@ name = "vm_monitor"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"axum 0.7.5", "axum",
"cgroups-rs", "cgroups-rs",
"clap", "clap",
"futures", "futures",
@@ -7094,18 +7032,6 @@ dependencies = [
"rustls-pki-types", "rustls-pki-types",
] ]
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]] [[package]]
name = "whoami" name = "whoami"
version = "1.5.1" version = "1.5.1"
@@ -7334,15 +7260,10 @@ version = "0.1.0"
dependencies = [ dependencies = [
"ahash", "ahash",
"anyhow", "anyhow",
"aws-config", "axum",
"aws-runtime", "axum-core",
"aws-sigv4",
"aws-smithy-async",
"aws-smithy-http",
"aws-smithy-types",
"base64 0.21.1", "base64 0.21.1",
"base64ct", "base64ct",
"bitflags 2.4.1",
"bytes", "bytes",
"camino", "camino",
"cc", "cc",
@@ -7370,7 +7291,6 @@ dependencies = [
"hyper 1.4.1", "hyper 1.4.1",
"hyper-util", "hyper-util",
"indexmap 1.9.3", "indexmap 1.9.3",
"itertools 0.10.5",
"itertools 0.12.1", "itertools 0.12.1",
"lazy_static", "lazy_static",
"libc", "libc",
@@ -7382,15 +7302,15 @@ dependencies = [
"num-traits", "num-traits",
"once_cell", "once_cell",
"parquet", "parquet",
"prettyplease",
"proc-macro2", "proc-macro2",
"prost 0.11.9", "prost",
"quote", "quote",
"rand 0.8.5", "rand 0.8.5",
"regex", "regex",
"regex-automata 0.4.3", "regex-automata 0.4.3",
"regex-syntax 0.8.2", "regex-syntax 0.8.2",
"reqwest 0.12.4", "reqwest 0.12.4",
"rustls 0.21.11",
"scopeguard", "scopeguard",
"serde", "serde",
"serde_json", "serde_json",
@@ -7406,14 +7326,14 @@ dependencies = [
"time", "time",
"time-macros", "time-macros",
"tokio", "tokio",
"tokio-rustls 0.24.0", "tokio-stream",
"tokio-util", "tokio-util",
"toml_edit", "toml_edit",
"tonic",
"tower", "tower",
"tracing", "tracing",
"tracing-core", "tracing-core",
"url", "url",
"uuid",
"zeroize", "zeroize",
"zstd", "zstd",
"zstd-safe", "zstd-safe",

View File

@@ -53,7 +53,7 @@ azure_storage_blobs = { version = "0.19", default-features = false, features = [
flate2 = "1.0.26" flate2 = "1.0.26"
async-stream = "0.3" async-stream = "0.3"
async-trait = "0.1" async-trait = "0.1"
aws-config = { version = "1.5", default-features = false, features=["rustls"] } aws-config = { version = "1.5", default-features = false, features=["rustls", "sso"] }
aws-sdk-s3 = "1.52" aws-sdk-s3 = "1.52"
aws-sdk-iam = "1.46.0" aws-sdk-iam = "1.46.0"
aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] } aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
@@ -130,7 +130,7 @@ pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
pin-project-lite = "0.2" pin-project-lite = "0.2"
procfs = "0.16" procfs = "0.16"
prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
prost = "0.11" prost = "0.13"
rand = "0.8" rand = "0.8"
redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] } redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
regex = "1.10.2" regex = "1.10.2"
@@ -178,7 +178,7 @@ tokio-tar = "0.3"
tokio-util = { version = "0.7.10", features = ["io", "rt"] } tokio-util = { version = "0.7.10", features = ["io", "rt"] }
toml = "0.8" toml = "0.8"
toml_edit = "0.22" toml_edit = "0.22"
tonic = {version = "0.9", features = ["tls", "tls-roots"]} tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
tower-service = "0.3.2" tower-service = "0.3.2"
tracing = "0.1" tracing = "0.1"
tracing-error = "0.2" tracing-error = "0.2"
@@ -246,7 +246,7 @@ criterion = "0.5.1"
rcgen = "0.12" rcgen = "0.12"
rstest = "0.18" rstest = "0.18"
camino-tempfile = "1.0.2" camino-tempfile = "1.0.2"
tonic-build = "0.9" tonic-build = "0.12"
[patch.crates-io] [patch.crates-io]

View File

@@ -168,27 +168,27 @@ postgres-check-%: postgres-%
neon-pg-ext-%: postgres-% neon-pg-ext-%: postgres-%
+@echo "Compiling neon $*" +@echo "Compiling neon $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
+@echo "Compiling neon_walredo $*" +@echo "Compiling neon_walredo $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
+@echo "Compiling neon_rmgr $*" +@echo "Compiling neon_rmgr $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
+@echo "Compiling neon_test_utils $*" +@echo "Compiling neon_test_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
+@echo "Compiling neon_utils $*" +@echo "Compiling neon_utils $*"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \ -C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install -f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
@@ -220,7 +220,7 @@ neon-pg-clean-ext-%:
walproposer-lib: neon-pg-ext-v17 walproposer-lib: neon-pg-ext-v17
+@echo "Compiling walproposer-lib" +@echo "Compiling walproposer-lib"
mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \ -C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
@@ -333,7 +333,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
# Indent pxgn/neon. # Indent pxgn/neon.
.PHONY: neon-pgindent .PHONY: neon-pgindent
neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17 neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \ $(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \ FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \ INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \ PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \

View File

@@ -1075,6 +1075,20 @@ RUN set -e \
&& make -j $(nproc) dist_man_MANS= \ && make -j $(nproc) dist_man_MANS= \
&& make install dist_man_MANS= && make install dist_man_MANS=
#########################################################################################
#
# Compile the Neon-specific `local_proxy` binary
#
#########################################################################################
FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
ARG BUILD_TAG
ENV BUILD_TAG=$BUILD_TAG
USER nonroot
# Copy entire project to get Cargo.* files with proper dependencies for the whole project
COPY --chown=nonroot . .
RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
######################################################################################### #########################################################################################
# #
# Layers "postgres-exporter" and "sql-exporter" # Layers "postgres-exporter" and "sql-exporter"
@@ -1213,6 +1227,10 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer COPY --from=pgbouncer /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
# local_proxy and its config
COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy
# Metrics exporter binaries and configuration files # Metrics exporter binaries and configuration files
COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter COPY --from=sql-exporter /bin/sql_exporter /bin/sql_exporter

View File

@@ -19,6 +19,10 @@ commands:
user: postgres user: postgres
sysvInitAction: respawn sysvInitAction: respawn
shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini' shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: '/usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter - name: postgres-exporter
user: nobody user: nobody
sysvInitAction: respawn sysvInitAction: respawn

View File

@@ -11,7 +11,7 @@ testing = []
[dependencies] [dependencies]
anyhow.workspace = true anyhow.workspace = true
# camino.workspace = true camino.workspace = true
chrono.workspace = true chrono.workspace = true
cfg-if.workspace = true cfg-if.workspace = true
clap.workspace = true clap.workspace = true

View File

@@ -402,8 +402,7 @@ fn start_postgres(
) -> Result<(Option<PostgresHandle>, StartPostgresResult)> { ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
// We got all we need, update the state. // We got all we need, update the state.
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();
state.status = ComputeStatus::Init; state.set_status(ComputeStatus::Init, &compute.state_changed);
compute.state_changed.notify_all();
info!( info!(
"running compute with features: {:?}", "running compute with features: {:?}",

View File

@@ -34,6 +34,7 @@ use nix::sys::signal::{kill, Signal};
use remote_storage::{DownloadError, RemotePath}; use remote_storage::{DownloadError, RemotePath};
use crate::checker::create_availability_check_data; use crate::checker::create_availability_check_data;
use crate::local_proxy;
use crate::logger::inlinify; use crate::logger::inlinify;
use crate::pg_helpers::*; use crate::pg_helpers::*;
use crate::spec::*; use crate::spec::*;
@@ -108,6 +109,18 @@ impl ComputeState {
metrics: ComputeMetrics::default(), metrics: ComputeMetrics::default(),
} }
} }
pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
let prev = self.status;
info!("Changing compute status from {} to {}", prev, status);
self.status = status;
state_changed.notify_all();
}
pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
self.error = Some(format!("{err:?}"));
self.set_status(ComputeStatus::Failed, state_changed);
}
} }
impl Default for ComputeState { impl Default for ComputeState {
@@ -302,15 +315,12 @@ impl ComputeNode {
pub fn set_status(&self, status: ComputeStatus) { pub fn set_status(&self, status: ComputeStatus) {
let mut state = self.state.lock().unwrap(); let mut state = self.state.lock().unwrap();
state.status = status; state.set_status(status, &self.state_changed);
self.state_changed.notify_all();
} }
pub fn set_failed_status(&self, err: anyhow::Error) { pub fn set_failed_status(&self, err: anyhow::Error) {
let mut state = self.state.lock().unwrap(); let mut state = self.state.lock().unwrap();
state.error = Some(format!("{err:?}")); state.set_failed_status(err, &self.state_changed);
state.status = ComputeStatus::Failed;
self.state_changed.notify_all();
} }
pub fn get_status(&self) -> ComputeStatus { pub fn get_status(&self) -> ComputeStatus {
@@ -880,12 +890,19 @@ impl ComputeNode {
.context("apply_config handle_grants")?; .context("apply_config handle_grants")?;
handle_extensions(spec, &mut client).context("apply_config handle_extensions")?; handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?; handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
handle_jwt_extension(spec, &mut client, connstr.as_str())
.context("apply_config handle_jwt_extension")?;
create_availability_check_data(&mut client) create_availability_check_data(&mut client)
.context("apply_config create_availability_check_data")?; .context("apply_config create_availability_check_data")?;
// 'Close' connection // 'Close' connection
drop(client); drop(client);
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
local_proxy::configure(local_proxy).context("apply_config local_proxy")?;
}
// Run migrations separately to not hold up cold starts // Run migrations separately to not hold up cold starts
thread::spawn(move || { thread::spawn(move || {
let mut connstr = connstr.clone(); let mut connstr = connstr.clone();
@@ -936,6 +953,19 @@ impl ComputeNode {
}); });
} }
if let Some(ref local_proxy) = spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = Some(thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
}));
}
// Write new config // Write new config
let pgdata_path = Path::new(&self.pgdata); let pgdata_path = Path::new(&self.pgdata);
let postgresql_conf_path = pgdata_path.join("postgresql.conf"); let postgresql_conf_path = pgdata_path.join("postgresql.conf");
@@ -964,6 +994,7 @@ impl ComputeNode {
)?; )?;
handle_extensions(&spec, &mut client)?; handle_extensions(&spec, &mut client)?;
handle_extension_neon(&mut client)?; handle_extension_neon(&mut client)?;
handle_jwt_extension(&spec, &mut client, self.connstr.as_str())?;
// We can skip handle_migrations here because a new migration can only appear // We can skip handle_migrations here because a new migration can only appear
// if we have a new version of the compute_ctl binary, which can only happen // if we have a new version of the compute_ctl binary, which can only happen
// if compute got restarted, in which case we'll end up inside of apply_config // if compute got restarted, in which case we'll end up inside of apply_config
@@ -1023,6 +1054,19 @@ impl ComputeNode {
}); });
} }
if let Some(local_proxy) = &pspec.spec.local_proxy_config {
info!("configuring local_proxy");
// Spawn a thread to do the configuration,
// so that we don't block the main thread that starts Postgres.
let local_proxy = local_proxy.clone();
let _handle = thread::spawn(move || {
if let Err(err) = local_proxy::configure(&local_proxy) {
error!("error while configuring local_proxy: {err:?}");
}
});
}
info!( info!(
"start_compute spec.remote_extensions {:?}", "start_compute spec.remote_extensions {:?}",
pspec.spec.remote_extensions pspec.spec.remote_extensions

View File

@@ -24,8 +24,7 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
// Re-check the status after waking up // Re-check the status after waking up
if state.status == ComputeStatus::ConfigurationPending { if state.status == ComputeStatus::ConfigurationPending {
info!("got configuration request"); info!("got configuration request");
state.status = ComputeStatus::Configuration; state.set_status(ComputeStatus::Configuration, &compute.state_changed);
compute.state_changed.notify_all();
drop(state); drop(state);
let mut new_status = ComputeStatus::Failed; let mut new_status = ComputeStatus::Failed;

View File

@@ -264,72 +264,67 @@ async fn handle_configure_request(
let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap(); let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap(); let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
match serde_json::from_str::<ConfigurationRequest>(&spec_raw) { if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
Ok(request) => { let spec = request.spec;
let spec = request.spec;
let parsed_spec = match ParsedSpec::try_from(spec) { let parsed_spec = match ParsedSpec::try_from(spec) {
Ok(ps) => ps, Ok(ps) => ps,
Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)), Err(msg) => return Err((msg, StatusCode::BAD_REQUEST)),
}; };
// XXX: wrap state update under lock in code blocks. Otherwise, // XXX: wrap state update under lock in code blocks. Otherwise,
// we will try to `Send` `mut state` into the spawned thread // we will try to `Send` `mut state` into the spawned thread
// bellow, which will cause error: // bellow, which will cause error:
// ``` // ```
// error: future cannot be sent between threads safely // error: future cannot be sent between threads safely
// ``` // ```
{ {
let mut state = compute.state.lock().unwrap(); let mut state = compute.state.lock().unwrap();
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running { if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!( let msg = format!(
"invalid compute status for configuration request: {:?}", "invalid compute status for configuration request: {:?}",
state.status.clone() state.status.clone()
); );
return Err((msg, StatusCode::PRECONDITION_FAILED)); return Err((msg, StatusCode::PRECONDITION_FAILED));
}
state.pspec = Some(parsed_spec);
state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
drop(state);
info!("set new spec and notified waiters");
}
// Spawn a blocking thread to wait for compute to become Running.
// This is needed to do not block the main pool of workers and
// be able to serve other requests while some particular request
// is waiting for compute to finish configuration.
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed {
let err = state.error.as_ref().map_or("unknown error", |x| x);
let msg = format!("compute configuration failed: {:?}", err);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
} }
state.pspec = Some(parsed_spec);
state.status = ComputeStatus::ConfigurationPending;
compute.state_changed.notify_all();
drop(state);
info!("set new spec and notified waiters");
} }
// Spawn a blocking thread to wait for compute to become Running. Ok(())
// This is needed to do not block the main pool of workers and })
// be able to serve other requests while some particular request .await
// is waiting for compute to finish configuration. .unwrap()?;
let c = compute.clone();
task::spawn_blocking(move || {
let mut state = c.state.lock().unwrap();
while state.status != ComputeStatus::Running {
state = c.state_changed.wait(state).unwrap();
info!(
"waiting for compute to become Running, current status: {:?}",
state.status
);
if state.status == ComputeStatus::Failed { // Return current compute state if everything went well.
let err = state.error.as_ref().map_or("unknown error", |x| x); let state = compute.state.lock().unwrap().clone();
let msg = format!("compute configuration failed: {:?}", err); let status_response = status_response_from_state(&state);
return Err((msg, StatusCode::INTERNAL_SERVER_ERROR)); Ok(serde_json::to_string(&status_response).unwrap())
} } else {
} Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
Ok(())
})
.await
.unwrap()?;
// Return current compute state if everything went well.
let state = compute.state.lock().unwrap().clone();
let status_response = status_response_from_state(&state);
Ok(serde_json::to_string(&status_response).unwrap())
}
Err(err) => {
error!("could not parse spec: {spec_raw}");
Err((format!("invalid spec: {err:?}"), StatusCode::BAD_REQUEST))
}
} }
} }
@@ -366,15 +361,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
} }
if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running { if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
let msg = format!( let msg = format!(
"invalid compute status for termination request: {:?}", "invalid compute status for termination request: {}",
state.status.clone() state.status
); );
return Err((msg, StatusCode::PRECONDITION_FAILED)); return Err((msg, StatusCode::PRECONDITION_FAILED));
} }
state.status = ComputeStatus::TerminationPending; state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
compute.state_changed.notify_all();
drop(state); drop(state);
} }
forward_termination_signal(); forward_termination_signal();
info!("sent signal and notified waiters"); info!("sent signal and notified waiters");
@@ -388,7 +383,8 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
while state.status != ComputeStatus::Terminated { while state.status != ComputeStatus::Terminated {
state = c.state_changed.wait(state).unwrap(); state = c.state_changed.wait(state).unwrap();
info!( info!(
"waiting for compute to become Terminated, current status: {:?}", "waiting for compute to become {}, current status: {:?}",
ComputeStatus::Terminated,
state.status state.status
); );
} }

View File

@@ -15,7 +15,7 @@ pub mod catalog;
pub mod compute; pub mod compute;
pub mod disk_quota; pub mod disk_quota;
pub mod extension_server; pub mod extension_server;
// pub mod local_proxy; pub mod local_proxy;
pub mod lsn_lease; pub mod lsn_lease;
mod migration; mod migration;
pub mod monitor; pub mod monitor;

View File

@@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
@@ -189,6 +190,15 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
let mut xact = client.transaction()?; let mut xact = client.transaction()?;
let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?; let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;
let mut jwks_roles = HashSet::new();
if let Some(local_proxy) = &spec.local_proxy_config {
for jwks_setting in local_proxy.jwks.iter().flatten() {
for role_name in &jwks_setting.role_names {
jwks_roles.insert(role_name.clone());
}
}
}
// Print a list of existing Postgres roles (only in debug mode) // Print a list of existing Postgres roles (only in debug mode)
if span_enabled!(Level::INFO) { if span_enabled!(Level::INFO) {
let mut vec = Vec::new(); let mut vec = Vec::new();
@@ -308,6 +318,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
"CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser", "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
name.pg_quote() name.pg_quote()
); );
if jwks_roles.contains(name.as_str()) {
query = format!("CREATE ROLE {}", name.pg_quote());
}
info!("running role create query: '{}'", &query); info!("running role create query: '{}'", &query);
query.push_str(&role.to_pg_options()); query.push_str(&role.to_pg_options());
xact.execute(query.as_str(), &[])?; xact.execute(query.as_str(), &[])?;
@@ -718,7 +731,48 @@ pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()>
client.simple_query(query)?; client.simple_query(query)?;
} }
} }
Ok(())
}
/// Create pg_session_jwt in all databases if configured
#[instrument(skip_all)]
pub fn handle_jwt_extension(spec: &ComputeSpec, client: &mut Client, connstr: &str) -> Result<()> {
if let Some(local_proxy) = &spec.local_proxy_config {
if let Some(jwks_list) = &local_proxy.jwks {
if !jwks_list.is_empty() {
info!("enabling pg_session_jwt extension");
let existing_dbs = get_existing_dbs(client)?;
for db in &spec.cluster.databases {
match existing_dbs.get(&db.name) {
Some(pg_db) => {
if pg_db.restrict_conn || pg_db.invalid {
info!(
"skipping extension for db {} (invalid: {}, connections not allowed: {})",
db.name, pg_db.invalid, pg_db.restrict_conn
);
continue;
}
}
None => {
bail!(
"database {} doesn't exist in Postgres after handle_databases()",
db.name
);
}
}
let mut conf = Config::from_str(connstr)?;
conf.dbname(&db.name);
let mut db_client = conf.connect(NoTls)?;
let query = "CREATE EXTENSION IF NOT EXISTS pg_session_jwt";
info!("creating pg_session_jwt extension with query: {}", query);
db_client.simple_query(query)?;
}
}
}
}
Ok(()) Ok(())
} }

View File

@@ -1,5 +1,7 @@
//! Structs representing the JSON formats used in the compute_ctl's HTTP API. //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
use std::fmt::Display;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
@@ -58,6 +60,21 @@ pub enum ComputeStatus {
Terminated, Terminated,
} }
impl Display for ComputeStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ComputeStatus::Empty => f.write_str("empty"),
ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
ComputeStatus::Init => f.write_str("init"),
ComputeStatus::Running => f.write_str("running"),
ComputeStatus::Configuration => f.write_str("configuration"),
ComputeStatus::Failed => f.write_str("failed"),
ComputeStatus::TerminationPending => f.write_str("termination-pending"),
ComputeStatus::Terminated => f.write_str("terminated"),
}
}
}
fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error> fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
where where
S: Serializer, S: Serializer,

View File

@@ -109,7 +109,6 @@ pub struct ComputeSpec {
/// Local Proxy configuration used for JWT authentication /// Local Proxy configuration used for JWT authentication
#[serde(default)] #[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub local_proxy_config: Option<LocalProxySpec>, pub local_proxy_config: Option<LocalProxySpec>,
} }
@@ -283,11 +282,13 @@ pub struct GenericOption {
/// declare a `trait` on it. /// declare a `trait` on it.
pub type GenericOptions = Option<Vec<GenericOption>>; pub type GenericOptions = Option<Vec<GenericOption>>;
/// Configured the local-proxy application with the relevant JWKS and roles it should /// Configured the local_proxy application with the relevant JWKS and roles it should
/// use for authorizing connect requests using JWT. /// use for authorizing connect requests using JWT.
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]
pub struct LocalProxySpec { pub struct LocalProxySpec {
pub jwks: Vec<JwksSettings>, #[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub jwks: Option<Vec<JwksSettings>>,
} }
#[derive(Clone, Debug, Deserialize, Serialize)] #[derive(Clone, Debug, Deserialize, Serialize)]

View File

@@ -14,7 +14,7 @@ use std::time::SystemTime;
use super::REMOTE_STORAGE_PREFIX_SEPARATOR; use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
use anyhow::Result; use anyhow::Result;
use azure_core::request_options::{MaxResults, Metadata, Range}; use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
use azure_core::{Continuable, RetryOptions}; use azure_core::{Continuable, RetryOptions};
use azure_identity::DefaultAzureCredential; use azure_identity::DefaultAzureCredential;
use azure_storage::StorageCredentials; use azure_storage::StorageCredentials;
@@ -33,10 +33,10 @@ use tracing::debug;
use utils::backoff; use utils::backoff;
use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind}; use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind};
use crate::ListingObject;
use crate::{ use crate::{
config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, Listing, config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError,
ListingMode, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata,
TimeTravelError, TimeoutOrCancel,
}; };
pub struct AzureBlobStorage { pub struct AzureBlobStorage {
@@ -259,6 +259,7 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
if let Some(http_err) = error.as_http_error() { if let Some(http_err) = error.as_http_error() {
match http_err.status() { match http_err.status() {
StatusCode::NotFound => DownloadError::NotFound, StatusCode::NotFound => DownloadError::NotFound,
StatusCode::NotModified => DownloadError::Unmodified,
StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)), StatusCode::BadRequest => DownloadError::BadInput(anyhow::Error::new(error)),
_ => DownloadError::Other(anyhow::Error::new(error)), _ => DownloadError::Other(anyhow::Error::new(error)),
} }
@@ -484,32 +485,23 @@ impl RemoteStorage for AzureBlobStorage {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
cancel: &CancellationToken, opts: &DownloadOpts,
) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from));
let builder = blob_client.get();
self.download_for_builder(builder, cancel).await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
let blob_client = self.client.blob_client(self.relative_path_to_name(from)); let blob_client = self.client.blob_client(self.relative_path_to_name(from));
let mut builder = blob_client.get(); let mut builder = blob_client.get();
let range: Range = if let Some(end_exclusive) = end_exclusive { if let Some(ref etag) = opts.etag {
(start_inclusive..end_exclusive).into() builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
} else { }
(start_inclusive..).into()
}; if let Some((start, end)) = opts.byte_range() {
builder = builder.range(range); builder = builder.range(match end {
Some(end) => Range::Range(start..end),
None => Range::RangeFrom(start..),
});
}
self.download_for_builder(builder, cancel).await self.download_for_builder(builder, cancel).await
} }

View File

@@ -5,6 +5,8 @@ pub enum DownloadError {
BadInput(anyhow::Error), BadInput(anyhow::Error),
/// The file was not found in the remote storage. /// The file was not found in the remote storage.
NotFound, NotFound,
/// The caller provided an ETag, and the file was not modified.
Unmodified,
/// A cancellation token aborted the download, typically during /// A cancellation token aborted the download, typically during
/// tenant detach or process shutdown. /// tenant detach or process shutdown.
Cancelled, Cancelled,
@@ -24,6 +26,7 @@ impl std::fmt::Display for DownloadError {
write!(f, "Failed to download a remote file due to user input: {e}") write!(f, "Failed to download a remote file due to user input: {e}")
} }
DownloadError::NotFound => write!(f, "No file found for the remote object id given"), DownloadError::NotFound => write!(f, "No file found for the remote object id given"),
DownloadError::Unmodified => write!(f, "File was not modified"),
DownloadError::Cancelled => write!(f, "Cancelled, shutting down"), DownloadError::Cancelled => write!(f, "Cancelled, shutting down"),
DownloadError::Timeout => write!(f, "timeout"), DownloadError::Timeout => write!(f, "timeout"),
DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"), DownloadError::Other(e) => write!(f, "Failed to download a remote file: {e:?}"),
@@ -38,7 +41,7 @@ impl DownloadError {
pub fn is_permanent(&self) -> bool { pub fn is_permanent(&self) -> bool {
use DownloadError::*; use DownloadError::*;
match self { match self {
BadInput(_) | NotFound | Cancelled => true, BadInput(_) | NotFound | Unmodified | Cancelled => true,
Timeout | Other(_) => false, Timeout | Other(_) => false,
} }
} }

View File

@@ -19,7 +19,8 @@ mod simulate_failures;
mod support; mod support;
use std::{ use std::{
collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime, collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc,
time::SystemTime,
}; };
use anyhow::Context; use anyhow::Context;
@@ -161,6 +162,63 @@ pub struct Listing {
pub keys: Vec<ListingObject>, pub keys: Vec<ListingObject>,
} }
/// Options for downloads. The default value is a plain GET.
pub struct DownloadOpts {
/// If given, returns [`DownloadError::Unmodified`] if the object still has
/// the same ETag (using If-None-Match).
pub etag: Option<Etag>,
/// The start of the byte range to download, or unbounded.
pub byte_start: Bound<u64>,
/// The end of the byte range to download, or unbounded. Must be after the
/// start bound.
pub byte_end: Bound<u64>,
}
impl Default for DownloadOpts {
fn default() -> Self {
Self {
etag: Default::default(),
byte_start: Bound::Unbounded,
byte_end: Bound::Unbounded,
}
}
}
impl DownloadOpts {
/// Returns the byte range with inclusive start and exclusive end, or None
/// if unbounded.
pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
return None;
}
let start = match self.byte_start {
Bound::Excluded(i) => i + 1,
Bound::Included(i) => i,
Bound::Unbounded => 0,
};
let end = match self.byte_end {
Bound::Excluded(i) => Some(i),
Bound::Included(i) => Some(i + 1),
Bound::Unbounded => None,
};
if let Some(end) = end {
assert!(start < end, "range end {end} at or before start {start}");
}
Some((start, end))
}
/// Returns the byte range as an RFC 2616 Range header value with inclusive
/// bounds, or None if unbounded.
pub fn byte_range_header(&self) -> Option<String> {
self.byte_range()
.map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{end}"),
None => format!("bytes={start}-"),
})
}
}
/// Storage (potentially remote) API to manage its state. /// Storage (potentially remote) API to manage its state.
/// This storage tries to be unaware of any layered repository context, /// This storage tries to be unaware of any layered repository context,
/// providing basic CRUD operations for storage files. /// providing basic CRUD operations for storage files.
@@ -245,21 +303,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
cancel: &CancellationToken, opts: &DownloadOpts,
) -> Result<Download, DownloadError>;
/// Streams a given byte range of the remote storage entry contents.
///
/// The returned download stream will obey initial timeout and cancellation signal by erroring
/// on whichever happens first. Only one of the reasons will fail the stream, which is usually
/// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
///
/// Returns the metadata, if any was stored with the file previously.
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError>; ) -> Result<Download, DownloadError>;
@@ -401,43 +445,18 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
} }
} }
/// See [`RemoteStorage::download`]
pub async fn download( pub async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
match self { match self {
Self::LocalFs(s) => s.download(from, cancel).await, Self::LocalFs(s) => s.download(from, opts, cancel).await,
Self::AwsS3(s) => s.download(from, cancel).await, Self::AwsS3(s) => s.download(from, opts, cancel).await,
Self::AzureBlob(s) => s.download(from, cancel).await, Self::AzureBlob(s) => s.download(from, opts, cancel).await,
Self::Unreliable(s) => s.download(from, cancel).await, Self::Unreliable(s) => s.download(from, opts, cancel).await,
}
}
pub async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match self {
Self::LocalFs(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AwsS3(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::AzureBlob(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
Self::Unreliable(s) => {
s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
}
} }
} }
@@ -562,20 +581,6 @@ impl GenericRemoteStorage {
}) })
} }
/// Downloads the storage object into the `to_path` provided.
/// `byte_range` could be specified to dowload only a part of the file, if needed.
pub async fn download_storage_object(
&self,
byte_range: Option<(u64, Option<u64>)>,
from: &RemotePath,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
match byte_range {
Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
None => self.download(from, cancel).await,
}
}
/// The name of the bucket/container/etc. /// The name of the bucket/container/etc.
pub fn bucket_name(&self) -> Option<&str> { pub fn bucket_name(&self) -> Option<&str> {
match self { match self {
@@ -649,6 +654,76 @@ impl ConcurrencyLimiter {
mod tests { mod tests {
use super::*; use super::*;
/// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
/// with optional end bound, or None when unbounded.
#[test]
fn download_opts_byte_range() {
// Consider using test_case or a similar table-driven test framework.
let cases = [
// (byte_start, byte_end, expected)
(Bound::Unbounded, Bound::Unbounded, None),
(Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
(Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
(Bound::Included(3), Bound::Unbounded, Some((3, None))),
(Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
(Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
(Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
(Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
(Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
// 1-sized ranges are fine, 0 aren't and will panic (separate test).
(Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
(Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
];
for (byte_start, byte_end, expect) in cases {
let opts = DownloadOpts {
byte_start,
byte_end,
..Default::default()
};
let result = opts.byte_range();
assert_eq!(
result, expect,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);
// Check generated HTTP header, which uses an inclusive range.
let expect_header = expect.map(|(start, end)| match end {
Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
None => format!("bytes={start}-"),
});
assert_eq!(
opts.byte_range_header(),
expect_header,
"byte_start={byte_start:?} byte_end={byte_end:?}"
);
}
}
/// DownloadOpts::byte_range() zero-sized byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_zero() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Excluded(3),
..Default::default()
}
.byte_range();
}
/// DownloadOpts::byte_range() negative byte range should panic.
#[test]
#[should_panic]
fn download_opts_byte_range_negative() {
DownloadOpts {
byte_start: Bound::Included(3),
byte_end: Bound::Included(2),
..Default::default()
}
.byte_range();
}
#[test] #[test]
fn test_object_name() { fn test_object_name() {
let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap(); let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();

View File

@@ -23,8 +23,8 @@ use tokio_util::{io::ReaderStream, sync::CancellationToken};
use utils::crashsafe::path_with_suffix_extension; use utils::crashsafe::path_with_suffix_extension;
use crate::{ use crate::{
Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, TimeTravelError, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath,
TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR, TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR,
}; };
use super::{RemoteStorage, StorageMetadata}; use super::{RemoteStorage, StorageMetadata};
@@ -494,61 +494,19 @@ impl RemoteStorage for LocalFs {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
let target_path = from.with_base(&self.storage_root); let target_path = from.with_base(&self.storage_root);
let file_metadata = file_metadata(&target_path).await?; let file_metadata = file_metadata(&target_path).await?;
let source = ReaderStream::new(
fs::OpenOptions::new()
.read(true)
.open(&target_path)
.await
.with_context(|| {
format!("Failed to open source file {target_path:?} to use in the download")
})
.map_err(DownloadError::Other)?,
);
let metadata = self
.read_storage_metadata(&target_path)
.await
.map_err(DownloadError::Other)?;
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
let etag = mock_etag(&file_metadata); let etag = mock_etag(&file_metadata);
Ok(Download {
metadata,
last_modified: file_metadata
.modified()
.map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
etag,
download_stream: Box::pin(source),
})
}
async fn download_byte_range( if opts.etag.as_ref() == Some(&etag) {
&self, return Err(DownloadError::Unmodified);
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
if let Some(end_exclusive) = end_exclusive {
if end_exclusive <= start_inclusive {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
};
if start_inclusive == end_exclusive.saturating_sub(1) {
return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
}
} }
let target_path = from.with_base(&self.storage_root); let mut file = fs::OpenOptions::new()
let file_metadata = file_metadata(&target_path).await?;
let mut source = tokio::fs::OpenOptions::new()
.read(true) .read(true)
.open(&target_path) .open(&target_path)
.await .await
@@ -557,31 +515,29 @@ impl RemoteStorage for LocalFs {
}) })
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let len = source let mut take = file_metadata.len();
.metadata() if let Some((start, end)) = opts.byte_range() {
.await if start > 0 {
.context("query file length") file.seek(io::SeekFrom::Start(start))
.map_err(DownloadError::Other)? .await
.len(); .context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
}
if let Some(end) = end {
take = end - start;
}
}
source let source = ReaderStream::new(file.take(take));
.seek(io::SeekFrom::Start(start_inclusive))
.await
.context("Failed to seek to the range start in a local storage file")
.map_err(DownloadError::Other)?;
let metadata = self let metadata = self
.read_storage_metadata(&target_path) .read_storage_metadata(&target_path)
.await .await
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
let source = ReaderStream::new(source);
let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone()); let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
let source = crate::support::DownloadStream::new(cancel_or_timeout, source); let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
let etag = mock_etag(&file_metadata);
Ok(Download { Ok(Download {
metadata, metadata,
last_modified: file_metadata last_modified: file_metadata
@@ -683,7 +639,7 @@ mod fs_tests {
use super::*; use super::*;
use camino_tempfile::tempdir; use camino_tempfile::tempdir;
use std::{collections::HashMap, io::Write}; use std::{collections::HashMap, io::Write, ops::Bound};
async fn read_and_check_metadata( async fn read_and_check_metadata(
storage: &LocalFs, storage: &LocalFs,
@@ -692,7 +648,7 @@ mod fs_tests {
) -> anyhow::Result<String> { ) -> anyhow::Result<String> {
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
let download = storage let download = storage
.download(remote_storage_path, &cancel) .download(remote_storage_path, &DownloadOpts::default(), &cancel)
.await .await
.map_err(|e| anyhow::anyhow!("Download failed: {e}"))?; .map_err(|e| anyhow::anyhow!("Download failed: {e}"))?;
ensure!( ensure!(
@@ -773,8 +729,8 @@ mod fs_tests {
"We should upload and download the same contents" "We should upload and download the same contents"
); );
let non_existing_path = "somewhere/else"; let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?;
match storage.download(&RemotePath::new(Utf8Path::new(non_existing_path))?, &cancel).await { match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await {
Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys
other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"), other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"),
} }
@@ -799,10 +755,12 @@ mod fs_tests {
let (first_part_local, second_part_local) = uploaded_bytes.split_at(3); let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);
let first_part_download = storage let first_part_download = storage
.download_byte_range( .download(
&upload_target, &upload_target,
0, &DownloadOpts {
Some(first_part_local.len() as u64), byte_end: Bound::Excluded(first_part_local.len() as u64),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -818,10 +776,15 @@ mod fs_tests {
); );
let second_part_download = storage let second_part_download = storage
.download_byte_range( .download(
&upload_target, &upload_target,
first_part_local.len() as u64, &DownloadOpts {
Some((first_part_local.len() + second_part_local.len()) as u64), byte_start: Bound::Included(first_part_local.len() as u64),
byte_end: Bound::Excluded(
(first_part_local.len() + second_part_local.len()) as u64,
),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -837,7 +800,14 @@ mod fs_tests {
); );
let suffix_bytes = storage let suffix_bytes = storage
.download_byte_range(&upload_target, 13, None, &cancel) .download(
&upload_target,
&DownloadOpts {
byte_start: Bound::Included(13),
..Default::default()
},
&cancel,
)
.await? .await?
.download_stream; .download_stream;
let suffix_bytes = aggregate(suffix_bytes).await?; let suffix_bytes = aggregate(suffix_bytes).await?;
@@ -845,7 +815,7 @@ mod fs_tests {
assert_eq!(upload_name, suffix); assert_eq!(upload_name, suffix);
let all_bytes = storage let all_bytes = storage
.download_byte_range(&upload_target, 0, None, &cancel) .download(&upload_target, &DownloadOpts::default(), &cancel)
.await? .await?
.download_stream; .download_stream;
let all_bytes = aggregate(all_bytes).await?; let all_bytes = aggregate(all_bytes).await?;
@@ -856,48 +826,26 @@ mod fs_tests {
} }
#[tokio::test] #[tokio::test]
async fn download_file_range_negative() -> anyhow::Result<()> { #[should_panic(expected = "at or before start")]
let (storage, cancel) = create_storage()?; async fn download_file_range_negative() {
let (storage, cancel) = create_storage().unwrap();
let upload_name = "upload_1"; let upload_name = "upload_1";
let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?; let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)
.await
.unwrap();
let start = 1_000_000_000; storage
let end = start + 1; .download(
match storage
.download_byte_range(
&upload_target, &upload_target,
start, &DownloadOpts {
Some(end), // exclusive end byte_start: Bound::Included(10),
byte_end: Bound::Excluded(10),
..Default::default()
},
&cancel, &cancel,
) )
.await .await
{ .unwrap();
Ok(_) => panic!("Should not allow downloading wrong ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("zero bytes"));
assert!(error_string.contains(&start.to_string()));
assert!(error_string.contains(&end.to_string()));
}
}
let start = 10000;
let end = 234;
assert!(start > end, "Should test an incorrect range");
match storage
.download_byte_range(&upload_target, start, Some(end), &cancel)
.await
{
Ok(_) => panic!("Should not allow downloading wrong ranges"),
Err(e) => {
let error_string = e.to_string();
assert!(error_string.contains("Invalid range"));
assert!(error_string.contains(&start.to_string()));
assert!(error_string.contains(&end.to_string()));
}
}
Ok(())
} }
#[tokio::test] #[tokio::test]
@@ -940,10 +888,12 @@ mod fs_tests {
let (first_part_local, _) = uploaded_bytes.split_at(3); let (first_part_local, _) = uploaded_bytes.split_at(3);
let partial_download_with_metadata = storage let partial_download_with_metadata = storage
.download_byte_range( .download(
&upload_target, &upload_target,
0, &DownloadOpts {
Some(first_part_local.len() as u64), byte_end: Bound::Excluded(first_part_local.len() as u64),
..Default::default()
},
&cancel, &cancel,
) )
.await?; .await?;
@@ -1101,7 +1051,13 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?; let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(body, read); assert_eq!(body, read);
let shorter = Bytes::from_static(b"shorter body"); let shorter = Bytes::from_static(b"shorter body");
@@ -1112,7 +1068,13 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?; let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(shorter, read); assert_eq!(shorter, read);
Ok(()) Ok(())
} }
@@ -1145,7 +1107,13 @@ mod fs_tests {
storage.upload(body, len, &path, None, &cancel).await?; storage.upload(body, len, &path, None, &cancel).await?;
} }
let read = aggregate(storage.download(&path, &cancel).await?.download_stream).await?; let read = aggregate(
storage
.download(&path, &DownloadOpts::default(), &cancel)
.await?
.download_stream,
)
.await?;
assert_eq!(body, read); assert_eq!(body, read);
Ok(()) Ok(())

View File

@@ -28,6 +28,7 @@ use aws_sdk_s3::{
Client, Client,
}; };
use aws_smithy_async::rt::sleep::TokioSleep; use aws_smithy_async::rt::sleep::TokioSleep;
use http_types::StatusCode;
use aws_smithy_types::{body::SdkBody, DateTime}; use aws_smithy_types::{body::SdkBody, DateTime};
use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError}; use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError};
@@ -44,8 +45,8 @@ use crate::{
error::Cancelled, error::Cancelled,
metrics::{start_counting_cancelled_wait, start_measuring_requests}, metrics::{start_counting_cancelled_wait, start_measuring_requests},
support::PermitCarrying, support::PermitCarrying,
ConcurrencyLimiter, Download, DownloadError, Listing, ListingMode, ListingObject, RemotePath, ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject,
RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE, RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE,
REMOTE_STORAGE_PREFIX_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR,
}; };
@@ -67,6 +68,7 @@ pub struct S3Bucket {
struct GetObjectRequest { struct GetObjectRequest {
bucket: String, bucket: String,
key: String, key: String,
etag: Option<String>,
range: Option<String>, range: Option<String>,
} }
impl S3Bucket { impl S3Bucket {
@@ -248,13 +250,18 @@ impl S3Bucket {
let started_at = start_measuring_requests(kind); let started_at = start_measuring_requests(kind);
let get_object = self let mut builder = self
.client .client
.get_object() .get_object()
.bucket(request.bucket) .bucket(request.bucket)
.key(request.key) .key(request.key)
.set_range(request.range) .set_range(request.range);
.send();
if let Some(etag) = request.etag {
builder = builder.if_none_match(etag);
}
let get_object = builder.send();
let get_object = tokio::select! { let get_object = tokio::select! {
res = get_object => res, res = get_object => res,
@@ -277,6 +284,20 @@ impl S3Bucket {
); );
return Err(DownloadError::NotFound); return Err(DownloadError::NotFound);
} }
Err(SdkError::ServiceError(e))
// aws_smithy_runtime_api::http::response::StatusCode isn't
// re-exported by any aws crates, so just check the numeric
// status against http_types::StatusCode instead of pulling it.
if e.raw().status().as_u16() == StatusCode::NotModified =>
{
// Count an unmodified file as a success.
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind,
AttemptOutcome::Ok,
started_at,
);
return Err(DownloadError::Unmodified);
}
Err(e) => { Err(e) => {
crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed( crate::metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
kind, kind,
@@ -773,6 +794,7 @@ impl RemoteStorage for S3Bucket {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
// if prefix is not none then download file `prefix/from` // if prefix is not none then download file `prefix/from`
@@ -781,33 +803,8 @@ impl RemoteStorage for S3Bucket {
GetObjectRequest { GetObjectRequest {
bucket: self.bucket_name.clone(), bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from), key: self.relative_path_to_s3_object(from),
range: None, etag: opts.etag.as_ref().map(|e| e.to_string()),
}, range: opts.byte_range_header(),
cancel,
)
.await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
// S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
// and needs both ends to be exclusive
let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
let range = Some(match end_inclusive {
Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
None => format!("bytes={start_inclusive}-"),
});
self.download_object(
GetObjectRequest {
bucket: self.bucket_name.clone(),
key: self.relative_path_to_s3_object(from),
range,
}, },
cancel, cancel,
) )

View File

@@ -12,8 +12,8 @@ use std::{collections::hash_map::Entry, sync::Arc};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use crate::{ use crate::{
Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage, Download, DownloadError, DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath,
StorageMetadata, TimeTravelError, RemoteStorage, StorageMetadata, TimeTravelError,
}; };
pub struct UnreliableWrapper { pub struct UnreliableWrapper {
@@ -167,28 +167,14 @@ impl RemoteStorage for UnreliableWrapper {
async fn download( async fn download(
&self, &self,
from: &RemotePath, from: &RemotePath,
opts: &DownloadOpts,
cancel: &CancellationToken, cancel: &CancellationToken,
) -> Result<Download, DownloadError> { ) -> Result<Download, DownloadError> {
// Note: We treat any byte range as an "attempt" of the same operation.
// We don't pay attention to the ranges. That's good enough for now.
self.attempt(RemoteOp::Download(from.clone())) self.attempt(RemoteOp::Download(from.clone()))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
self.inner.download(from, cancel).await self.inner.download(from, opts, cancel).await
}
async fn download_byte_range(
&self,
from: &RemotePath,
start_inclusive: u64,
end_exclusive: Option<u64>,
cancel: &CancellationToken,
) -> Result<Download, DownloadError> {
// Note: We treat any download_byte_range as an "attempt" of the same
// operation. We don't pay attention to the ranges. That's good enough
// for now.
self.attempt(RemoteOp::Download(from.clone()))
.map_err(DownloadError::Other)?;
self.inner
.download_byte_range(from, start_inclusive, end_exclusive, cancel)
.await
} }
async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> { async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {

View File

@@ -1,8 +1,8 @@
use anyhow::Context; use anyhow::Context;
use camino::Utf8Path; use camino::Utf8Path;
use futures::StreamExt; use futures::StreamExt;
use remote_storage::ListingMode; use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
use remote_storage::RemotePath; use std::ops::Bound;
use std::sync::Arc; use std::sync::Arc;
use std::{collections::HashSet, num::NonZeroU32}; use std::{collections::HashSet, num::NonZeroU32};
use test_context::test_context; use test_context::test_context;
@@ -284,14 +284,25 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
ctx.client.upload(data, len, &path, None, &cancel).await?; ctx.client.upload(data, len, &path, None, &cancel).await?;
// Normal download request // Normal download request
let dl = ctx.client.download(&path, &cancel).await?; let dl = ctx
.client
.download(&path, &DownloadOpts::default(), &cancel)
.await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
// Full range (end specified) // Full range (end specified)
let dl = ctx let dl = ctx
.client .client
.download_byte_range(&path, 0, Some(len as u64), &cancel) .download(
&path,
&DownloadOpts {
byte_start: Bound::Included(0),
byte_end: Bound::Excluded(len as u64),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -299,7 +310,15 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// partial range (end specified) // partial range (end specified)
let dl = ctx let dl = ctx
.client .client
.download_byte_range(&path, 4, Some(10), &cancel) .download(
&path,
&DownloadOpts {
byte_start: Bound::Included(4),
byte_end: Bound::Excluded(10),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[4..10]); assert_eq!(&buf, &orig[4..10]);
@@ -307,7 +326,15 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// partial range (end beyond real end) // partial range (end beyond real end)
let dl = ctx let dl = ctx
.client .client
.download_byte_range(&path, 8, Some(len as u64 * 100), &cancel) .download(
&path,
&DownloadOpts {
byte_start: Bound::Included(8),
byte_end: Bound::Excluded(len as u64 * 100),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[8..]); assert_eq!(&buf, &orig[8..]);
@@ -315,7 +342,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// Partial range (end unspecified) // Partial range (end unspecified)
let dl = ctx let dl = ctx
.client .client
.download_byte_range(&path, 4, None, &cancel) .download(
&path,
&DownloadOpts {
byte_start: Bound::Included(4),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig[4..]); assert_eq!(&buf, &orig[4..]);
@@ -323,7 +357,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
// Full range (end unspecified) // Full range (end unspecified)
let dl = ctx let dl = ctx
.client .client
.download_byte_range(&path, 0, None, &cancel) .download(
&path,
&DownloadOpts {
byte_start: Bound::Included(0),
..Default::default()
},
&cancel,
)
.await?; .await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -337,6 +378,54 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
Ok(()) Ok(())
} }
/// Tests that conditional downloads work properly, by returning
/// DownloadError::Unmodified when the object ETag matches the given ETag.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn download_conditional(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
// Create a file.
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Download it to obtain its etag.
let mut opts = DownloadOpts::default();
let download = ctx.client.download(&path, &opts, &cancel).await?;
// Download with the etag yields DownloadError::Unmodified.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
// Replace the file contents.
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// A download with the old etag should yield the new file.
let download = ctx.client.download(&path, &opts, &cancel).await?;
assert_ne!(download.etag, opts.etag.unwrap(), "ETag did not change");
// A download with the new etag should yield Unmodified again.
opts.etag = Some(download.etag);
let result = ctx.client.download(&path, &opts, &cancel).await;
assert!(
matches!(result, Err(DownloadError::Unmodified)),
"expected DownloadError::Unmodified, got {result:?}"
);
Ok(())
}
#[test_context(MaybeEnabledStorage)] #[test_context(MaybeEnabledStorage)]
#[tokio::test] #[tokio::test]
async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> { async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
@@ -364,7 +453,10 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
// Normal download request // Normal download request
ctx.client.copy_object(&path, &path_dest, &cancel).await?; ctx.client.copy_object(&path, &path_dest, &cancel).await?;
let dl = ctx.client.download(&path_dest, &cancel).await?; let dl = ctx
.client
.download(&path_dest, &DownloadOpts::default(), &cancel)
.await?;
let buf = download_to_vec(dl).await?; let buf = download_to_vec(dl).await?;
assert_eq!(&buf, &orig); assert_eq!(&buf, &orig);
@@ -376,3 +468,56 @@ async fn copy_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
/// Tests that head_object works properly.
#[test_context(MaybeEnabledStorage)]
#[tokio::test]
async fn head_object(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
let MaybeEnabledStorage::Enabled(ctx) = ctx else {
return Ok(());
};
let cancel = CancellationToken::new();
let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))?;
// Errors on missing file.
let result = ctx.client.head_object(&path, &cancel).await;
assert!(
matches!(result, Err(DownloadError::NotFound)),
"expected NotFound, got {result:?}"
);
// Create the file.
let data = bytes::Bytes::from_static("foo".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
// Fetch the head metadata.
let object = ctx.client.head_object(&path, &cancel).await?;
assert_eq!(
object,
ListingObject {
key: path.clone(),
last_modified: object.last_modified, // ignore
size: 3
}
);
// Wait for a couple of seconds, and then update the file to check the last
// modified timestamp.
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
let data = bytes::Bytes::from_static("bar".as_bytes());
let (stream, len) = wrap_stream(data);
ctx.client.upload(stream, len, &path, None, &cancel).await?;
let new = ctx.client.head_object(&path, &cancel).await?;
assert!(
!new.last_modified
.duration_since(object.last_modified)?
.is_zero(),
"last_modified did not advance"
);
Ok(())
}

View File

@@ -12,8 +12,8 @@ use anyhow::Context;
use camino::Utf8Path; use camino::Utf8Path;
use futures_util::StreamExt; use futures_util::StreamExt;
use remote_storage::{ use remote_storage::{
DownloadError, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig, DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath,
RemoteStorageKind, S3Config, RemoteStorageConfig, RemoteStorageKind, S3Config,
}; };
use test_context::test_context; use test_context::test_context;
use test_context::AsyncTestContext; use test_context::AsyncTestContext;
@@ -121,7 +121,8 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
// A little check to ensure that our clock is not too far off from the S3 clock // A little check to ensure that our clock is not too far off from the S3 clock
{ {
let dl = retry(|| ctx.client.download(&path2, &cancel)).await?; let opts = DownloadOpts::default();
let dl = retry(|| ctx.client.download(&path2, &opts, &cancel)).await?;
let last_modified = dl.last_modified; let last_modified = dl.last_modified;
let half_wt = WAIT_TIME.mul_f32(0.5); let half_wt = WAIT_TIME.mul_f32(0.5);
let t0_hwt = t0 + half_wt; let t0_hwt = t0 + half_wt;
@@ -159,7 +160,12 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t2_files_recovered = list_files(&ctx.client, &cancel).await?; let t2_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t2: {t2_files_recovered:?}"); println!("after recovery to t2: {t2_files_recovered:?}");
assert_eq!(t2_files, t2_files_recovered); assert_eq!(t2_files, t2_files_recovered);
let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?; let path2_recovered_t2 = download_to_vec(
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
assert_eq!(path2_recovered_t2, new_data.as_bytes()); assert_eq!(path2_recovered_t2, new_data.as_bytes());
// after recovery to t1: path1 is back, path2 has the old content // after recovery to t1: path1 is back, path2 has the old content
@@ -170,7 +176,12 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow:
let t1_files_recovered = list_files(&ctx.client, &cancel).await?; let t1_files_recovered = list_files(&ctx.client, &cancel).await?;
println!("after recovery to t1: {t1_files_recovered:?}"); println!("after recovery to t1: {t1_files_recovered:?}");
assert_eq!(t1_files, t1_files_recovered); assert_eq!(t1_files, t1_files_recovered);
let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2, &cancel).await?).await?; let path2_recovered_t1 = download_to_vec(
ctx.client
.download(&path2, &DownloadOpts::default(), &cancel)
.await?,
)
.await?;
assert_eq!(path2_recovered_t1, old_data.as_bytes()); assert_eq!(path2_recovered_t1, old_data.as_bytes());
// after recovery to t0: everything is gone except for path1 // after recovery to t0: everything is gone except for path1
@@ -416,7 +427,7 @@ async fn download_is_timeouted(ctx: &mut MaybeEnabledStorage) {
let started_at = std::time::Instant::now(); let started_at = std::time::Instant::now();
let mut stream = ctx let mut stream = ctx
.client .client
.download(&path, &cancel) .download(&path, &DownloadOpts::default(), &cancel)
.await .await
.expect("download succeeds") .expect("download succeeds")
.download_stream; .download_stream;
@@ -491,7 +502,7 @@ async fn download_is_cancelled(ctx: &mut MaybeEnabledStorage) {
{ {
let stream = ctx let stream = ctx
.client .client
.download(&path, &cancel) .download(&path, &DownloadOpts::default(), &cancel)
.await .await
.expect("download succeeds") .expect("download succeeds")
.download_stream; .download_stream;

View File

@@ -79,8 +79,7 @@ pub struct Config {
/// memory. /// memory.
/// ///
/// The default value of `0.15` means that we *guarantee* sending upscale requests if the /// The default value of `0.15` means that we *guarantee* sending upscale requests if the
/// cgroup is using more than 85% of total memory (even if we're *not* separately reserving /// cgroup is using more than 85% of total memory.
/// memory for the file cache).
cgroup_min_overhead_fraction: f64, cgroup_min_overhead_fraction: f64,
cgroup_downscale_threshold_buffer_bytes: u64, cgroup_downscale_threshold_buffer_bytes: u64,
@@ -97,24 +96,12 @@ impl Default for Config {
} }
impl Config { impl Config {
fn cgroup_threshold(&self, total_mem: u64, file_cache_disk_size: u64) -> u64 { fn cgroup_threshold(&self, total_mem: u64) -> u64 {
// If the file cache is in tmpfs, then it will count towards shmem usage of the cgroup, // We want our threshold to be met gracefully instead of letting postgres get OOM-killed
// and thus be non-reclaimable, so we should allow for additional memory usage. // (or if there's room, spilling to swap).
//
// If the file cache sits on disk, our desired stable system state is for it to be fully
// page cached (its contents should only be paged to/from disk in situations where we can't
// upscale fast enough). Page-cached memory is reclaimable, so we need to lower the
// threshold for non-reclaimable memory so we scale up *before* the kernel starts paging
// out the file cache.
let memory_remaining_for_cgroup = total_mem.saturating_sub(file_cache_disk_size);
// Even if we're not separately making room for the file cache (if it's in tmpfs), we still
// want our threshold to be met gracefully instead of letting postgres get OOM-killed.
// So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory // So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory
// remaining above the threshold. // remaining above the threshold.
let max_threshold = (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64; (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64
memory_remaining_for_cgroup.min(max_threshold)
} }
} }
@@ -149,11 +136,6 @@ impl Runner {
let mem = get_total_system_memory(); let mem = get_total_system_memory();
let mut file_cache_disk_size = 0;
// We need to process file cache initialization before cgroup initialization, so that the memory
// allocated to the file cache is appropriately taken into account when we decide the cgroup's
// memory limits.
if let Some(connstr) = &args.pgconnstr { if let Some(connstr) = &args.pgconnstr {
info!("initializing file cache"); info!("initializing file cache");
let config = FileCacheConfig::default(); let config = FileCacheConfig::default();
@@ -184,7 +166,6 @@ impl Runner {
info!("file cache size actually got set to {actual_size}") info!("file cache size actually got set to {actual_size}")
} }
file_cache_disk_size = actual_size;
state.filecache = Some(file_cache); state.filecache = Some(file_cache);
} }
@@ -207,7 +188,7 @@ impl Runner {
cgroup.watch(hist_tx).await cgroup.watch(hist_tx).await
}); });
let threshold = state.config.cgroup_threshold(mem, file_cache_disk_size); let threshold = state.config.cgroup_threshold(mem);
info!(threshold, "set initial cgroup threshold",); info!(threshold, "set initial cgroup threshold",);
state.cgroup = Some(CgroupState { state.cgroup = Some(CgroupState {
@@ -259,9 +240,7 @@ impl Runner {
return Ok((false, status.to_owned())); return Ok((false, status.to_owned()));
} }
let new_threshold = self let new_threshold = self.config.cgroup_threshold(usable_system_memory);
.config
.cgroup_threshold(usable_system_memory, expected_file_cache_size);
let current = last_history.avg_non_reclaimable; let current = last_history.avg_non_reclaimable;
@@ -282,13 +261,11 @@ impl Runner {
// The downscaling has been approved. Downscale the file cache, then the cgroup. // The downscaling has been approved. Downscale the file cache, then the cgroup.
let mut status = vec![]; let mut status = vec![];
let mut file_cache_disk_size = 0;
if let Some(file_cache) = &mut self.filecache { if let Some(file_cache) = &mut self.filecache {
let actual_usage = file_cache let actual_usage = file_cache
.set_file_cache_size(expected_file_cache_size) .set_file_cache_size(expected_file_cache_size)
.await .await
.context("failed to set file cache size")?; .context("failed to set file cache size")?;
file_cache_disk_size = actual_usage;
let message = format!( let message = format!(
"set file cache size to {} MiB", "set file cache size to {} MiB",
bytes_to_mebibytes(actual_usage), bytes_to_mebibytes(actual_usage),
@@ -298,9 +275,7 @@ impl Runner {
} }
if let Some(cgroup) = &mut self.cgroup { if let Some(cgroup) = &mut self.cgroup {
let new_threshold = self let new_threshold = self.config.cgroup_threshold(usable_system_memory);
.config
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
let message = format!( let message = format!(
"set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB", "set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB",
@@ -329,7 +304,6 @@ impl Runner {
let new_mem = resources.mem; let new_mem = resources.mem;
let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes); let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);
let mut file_cache_disk_size = 0;
if let Some(file_cache) = &mut self.filecache { if let Some(file_cache) = &mut self.filecache {
let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory); let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);
info!( info!(
@@ -342,7 +316,6 @@ impl Runner {
.set_file_cache_size(expected_usage) .set_file_cache_size(expected_usage)
.await .await
.context("failed to set file cache size")?; .context("failed to set file cache size")?;
file_cache_disk_size = actual_usage;
if actual_usage != expected_usage { if actual_usage != expected_usage {
warn!( warn!(
@@ -354,9 +327,7 @@ impl Runner {
} }
if let Some(cgroup) = &mut self.cgroup { if let Some(cgroup) = &mut self.cgroup {
let new_threshold = self let new_threshold = self.config.cgroup_threshold(usable_system_memory);
.config
.cgroup_threshold(usable_system_memory, file_cache_disk_size);
info!( info!(
"set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB", "set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB",

View File

@@ -703,6 +703,8 @@ async fn timeline_archival_config_handler(
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?; let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
check_permission(&request, Some(tenant_shard_id.tenant_id))?; check_permission(&request, Some(tenant_shard_id.tenant_id))?;
let state = get_state(&request); let state = get_state(&request);
@@ -713,7 +715,7 @@ async fn timeline_archival_config_handler(
.get_attached_tenant_shard(tenant_shard_id)?; .get_attached_tenant_shard(tenant_shard_id)?;
tenant tenant
.apply_timeline_archival_config(timeline_id, request_data.state) .apply_timeline_archival_config(timeline_id, request_data.state, ctx)
.await?; .await?;
Ok::<_, ApiError>(()) Ok::<_, ApiError>(())
} }

View File

@@ -38,6 +38,7 @@ use std::future::Future;
use std::sync::Weak; use std::sync::Weak;
use std::time::SystemTime; use std::time::SystemTime;
use storage_broker::BrokerClientChannel; use storage_broker::BrokerClientChannel;
use timeline::offload::offload_timeline;
use tokio::io::BufReader; use tokio::io::BufReader;
use tokio::sync::watch; use tokio::sync::watch;
use tokio::task::JoinSet; use tokio::task::JoinSet;
@@ -287,9 +288,13 @@ pub struct Tenant {
/// During timeline creation, we first insert the TimelineId to the /// During timeline creation, we first insert the TimelineId to the
/// creating map, then `timelines`, then remove it from the creating map. /// creating map, then `timelines`, then remove it from the creating map.
/// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating` /// **Lock order**: if acquiring both, acquire`timelines` before `timelines_creating`
timelines_creating: std::sync::Mutex<HashSet<TimelineId>>, timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,
/// Possibly offloaded and archived timelines
/// **Lock order**: if acquiring both, acquire`timelines` before `timelines_offloaded`
timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
// This mutex prevents creation of new timelines during GC. // This mutex prevents creation of new timelines during GC.
// Adding yet another mutex (in addition to `timelines`) is needed because holding // Adding yet another mutex (in addition to `timelines`) is needed because holding
// `timelines` mutex during all GC iteration // `timelines` mutex during all GC iteration
@@ -484,6 +489,65 @@ impl WalRedoManager {
} }
} }
pub struct OffloadedTimeline {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,
// TODO: once we persist offloaded state, make this lazily constructed
pub remote_client: Arc<RemoteTimelineClient>,
/// Prevent two tasks from deleting the timeline at the same time. If held, the
/// timeline is being deleted. If 'true', the timeline has already been deleted.
pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
}
impl OffloadedTimeline {
fn from_timeline(timeline: &Timeline) -> Self {
Self {
tenant_shard_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
remote_client: timeline.remote_client.clone(),
delete_progress: timeline.delete_progress.clone(),
}
}
}
#[derive(Clone)]
pub enum TimelineOrOffloaded {
Timeline(Arc<Timeline>),
Offloaded(Arc<OffloadedTimeline>),
}
impl TimelineOrOffloaded {
pub fn tenant_shard_id(&self) -> TenantShardId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
}
}
pub fn timeline_id(&self) -> TimelineId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
}
}
pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
}
}
pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
}
}
}
#[derive(Debug, thiserror::Error, PartialEq, Eq)] #[derive(Debug, thiserror::Error, PartialEq, Eq)]
pub enum GetTimelineError { pub enum GetTimelineError {
#[error("Timeline is shutting down")] #[error("Timeline is shutting down")]
@@ -1406,52 +1470,192 @@ impl Tenant {
} }
} }
pub(crate) async fn apply_timeline_archival_config( fn check_to_be_archived_has_no_unarchived_children(
&self,
timeline_id: TimelineId, timeline_id: TimelineId,
state: TimelineArchivalState, timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
) -> Result<(), TimelineArchivalError> {
let children: Vec<TimelineId> = timelines
.iter()
.filter_map(|(id, entry)| {
if entry.get_ancestor_timeline_id() != Some(timeline_id) {
return None;
}
if entry.is_archived() == Some(true) {
return None;
}
Some(*id)
})
.collect();
if !children.is_empty() {
return Err(TimelineArchivalError::HasUnarchivedChildren(children));
}
Ok(())
}
fn check_ancestor_of_to_be_unarchived_is_not_archived(
ancestor_timeline_id: TimelineId,
timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
offloaded_timelines: &std::sync::MutexGuard<
'_,
HashMap<TimelineId, Arc<OffloadedTimeline>>,
>,
) -> Result<(), TimelineArchivalError> {
let has_archived_parent =
if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {
ancestor_timeline.is_archived() == Some(true)
} else if offloaded_timelines.contains_key(&ancestor_timeline_id) {
true
} else {
error!("ancestor timeline {ancestor_timeline_id} not found");
if cfg!(debug_assertions) {
panic!("ancestor timeline {ancestor_timeline_id} not found");
}
return Err(TimelineArchivalError::NotFound);
};
if has_archived_parent {
return Err(TimelineArchivalError::HasArchivedParent(
ancestor_timeline_id,
));
}
Ok(())
}
fn check_to_be_unarchived_timeline_has_no_archived_parent(
timeline: &Arc<Timeline>,
) -> Result<(), TimelineArchivalError> {
if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
if ancestor_timeline.is_archived() == Some(true) {
return Err(TimelineArchivalError::HasArchivedParent(
ancestor_timeline.timeline_id,
));
}
}
Ok(())
}
/// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline
async fn unoffload_timeline(
self: &Arc<Self>,
timeline_id: TimelineId,
ctx: RequestContext,
) -> Result<Arc<Timeline>, TimelineArchivalError> {
let cancel = self.cancel.clone();
let timeline_preload = self
.load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
.await;
let index_part = match timeline_preload.index_part {
Ok(index_part) => {
debug!("remote index part exists for timeline {timeline_id}");
index_part
}
Err(DownloadError::NotFound) => {
error!(%timeline_id, "index_part not found on remote");
return Err(TimelineArchivalError::NotFound);
}
Err(e) => {
// Some (possibly ephemeral) error happened during index_part download.
warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
return Err(TimelineArchivalError::Other(
anyhow::Error::new(e).context("downloading index_part from remote storage"),
));
}
};
let index_part = match index_part {
MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
MaybeDeletedIndexPart::Deleted(_index_part) => {
info!("timeline is deleted according to index_part.json");
return Err(TimelineArchivalError::NotFound);
}
};
let remote_metadata = index_part.metadata.clone();
let timeline_resources = self.build_timeline_resources(timeline_id);
self.load_remote_timeline(
timeline_id,
index_part,
remote_metadata,
timeline_resources,
&ctx,
)
.await
.with_context(|| {
format!(
"failed to load remote timeline {} for tenant {}",
timeline_id, self.tenant_shard_id
)
})?;
let timelines = self.timelines.lock().unwrap();
if let Some(timeline) = timelines.get(&timeline_id) {
let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
if offloaded_timelines.remove(&timeline_id).is_none() {
warn!("timeline already removed from offloaded timelines");
}
Ok(Arc::clone(timeline))
} else {
warn!("timeline not available directly after attach");
Err(TimelineArchivalError::Other(anyhow::anyhow!(
"timeline not available directly after attach"
)))
}
}
pub(crate) async fn apply_timeline_archival_config(
self: &Arc<Self>,
timeline_id: TimelineId,
new_state: TimelineArchivalState,
ctx: RequestContext,
) -> Result<(), TimelineArchivalError> { ) -> Result<(), TimelineArchivalError> {
info!("setting timeline archival config"); info!("setting timeline archival config");
let timeline = { // First part: figure out what is needed to do, and do validation
let timeline_or_unarchive_offloaded = 'outer: {
let timelines = self.timelines.lock().unwrap(); let timelines = self.timelines.lock().unwrap();
let Some(timeline) = timelines.get(&timeline_id) else { let Some(timeline) = timelines.get(&timeline_id) else {
return Err(TimelineArchivalError::NotFound); let offloaded_timelines = self.timelines_offloaded.lock().unwrap();
let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {
return Err(TimelineArchivalError::NotFound);
};
if new_state == TimelineArchivalState::Archived {
// It's offloaded already, so nothing to do
return Ok(());
}
if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {
Self::check_ancestor_of_to_be_unarchived_is_not_archived(
ancestor_timeline_id,
&timelines,
&offloaded_timelines,
)?;
}
break 'outer None;
}; };
if state == TimelineArchivalState::Unarchived { // Do some validation. We release the timelines lock below, so there is potential
if let Some(ancestor_timeline) = timeline.ancestor_timeline() { // for race conditions: these checks are more present to prevent misunderstandings of
if ancestor_timeline.is_archived() == Some(true) { // the API's capabilities, instead of serving as the sole way to defend their invariants.
return Err(TimelineArchivalError::HasArchivedParent( match new_state {
ancestor_timeline.timeline_id, TimelineArchivalState::Unarchived => {
)); Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?
} }
TimelineArchivalState::Archived => {
Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?
} }
} }
Some(Arc::clone(timeline))
// Ensure that there are no non-archived child timelines
let children: Vec<TimelineId> = timelines
.iter()
.filter_map(|(id, entry)| {
if entry.get_ancestor_timeline_id() != Some(timeline_id) {
return None;
}
if entry.is_archived() == Some(true) {
return None;
}
Some(*id)
})
.collect();
if !children.is_empty() && state == TimelineArchivalState::Archived {
return Err(TimelineArchivalError::HasUnarchivedChildren(children));
}
Arc::clone(timeline)
}; };
// Second part: unarchive timeline (if needed)
let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
timeline
} else {
// Turn offloaded timeline into a non-offloaded one
self.unoffload_timeline(timeline_id, ctx).await?
};
// Third part: upload new timeline archival state and block until it is present in S3
let upload_needed = timeline let upload_needed = timeline
.remote_client .remote_client
.schedule_index_upload_for_timeline_archival_state(state)?; .schedule_index_upload_for_timeline_archival_state(new_state)?;
if upload_needed { if upload_needed {
info!("Uploading new state"); info!("Uploading new state");
@@ -1884,7 +2088,7 @@ impl Tenant {
/// ///
/// Returns whether we have pending compaction task. /// Returns whether we have pending compaction task.
async fn compaction_iteration( async fn compaction_iteration(
&self, self: &Arc<Self>,
cancel: &CancellationToken, cancel: &CancellationToken,
ctx: &RequestContext, ctx: &RequestContext,
) -> Result<bool, timeline::CompactionError> { ) -> Result<bool, timeline::CompactionError> {
@@ -1905,21 +2109,28 @@ impl Tenant {
// while holding the lock. Then drop the lock and actually perform the // while holding the lock. Then drop the lock and actually perform the
// compactions. We don't want to block everything else while the // compactions. We don't want to block everything else while the
// compaction runs. // compaction runs.
let timelines_to_compact = { let timelines_to_compact_or_offload;
{
let timelines = self.timelines.lock().unwrap(); let timelines = self.timelines.lock().unwrap();
let timelines_to_compact = timelines timelines_to_compact_or_offload = timelines
.iter() .iter()
.filter_map(|(timeline_id, timeline)| { .filter_map(|(timeline_id, timeline)| {
if timeline.is_active() { let (is_active, can_offload) = (timeline.is_active(), timeline.can_offload());
Some((*timeline_id, timeline.clone())) let has_no_unoffloaded_children = {
} else { !timelines
.iter()
.any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
};
let can_offload = can_offload && has_no_unoffloaded_children;
if (is_active, can_offload) == (false, false) {
None None
} else {
Some((*timeline_id, timeline.clone(), (is_active, can_offload)))
} }
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
drop(timelines); drop(timelines);
timelines_to_compact }
};
// Before doing any I/O work, check our circuit breaker // Before doing any I/O work, check our circuit breaker
if self.compaction_circuit_breaker.lock().unwrap().is_broken() { if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
@@ -1929,20 +2140,34 @@ impl Tenant {
let mut has_pending_task = false; let mut has_pending_task = false;
for (timeline_id, timeline) in &timelines_to_compact { for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
has_pending_task |= timeline {
.compact(cancel, EnumSet::empty(), ctx) let pending_task_left = if *can_compact {
.instrument(info_span!("compact_timeline", %timeline_id)) Some(
.await timeline
.inspect_err(|e| match e { .compact(cancel, EnumSet::empty(), ctx)
timeline::CompactionError::ShuttingDown => (), .instrument(info_span!("compact_timeline", %timeline_id))
timeline::CompactionError::Other(e) => { .await
self.compaction_circuit_breaker .inspect_err(|e| match e {
.lock() timeline::CompactionError::ShuttingDown => (),
.unwrap() timeline::CompactionError::Other(e) => {
.fail(&CIRCUIT_BREAKERS_BROKEN, e); self.compaction_circuit_breaker
} .lock()
})?; .unwrap()
.fail(&CIRCUIT_BREAKERS_BROKEN, e);
}
})?,
)
} else {
None
};
has_pending_task |= pending_task_left.unwrap_or(false);
if pending_task_left == Some(false) && *can_offload {
offload_timeline(self, timeline)
.instrument(info_span!("offload_timeline", %timeline_id))
.await
.map_err(timeline::CompactionError::Other)?;
}
} }
self.compaction_circuit_breaker self.compaction_circuit_breaker
@@ -2852,6 +3077,7 @@ impl Tenant {
constructed_at: Instant::now(), constructed_at: Instant::now(),
timelines: Mutex::new(HashMap::new()), timelines: Mutex::new(HashMap::new()),
timelines_creating: Mutex::new(HashSet::new()), timelines_creating: Mutex::new(HashSet::new()),
timelines_offloaded: Mutex::new(HashMap::new()),
gc_cs: tokio::sync::Mutex::new(()), gc_cs: tokio::sync::Mutex::new(()),
walredo_mgr, walredo_mgr,
remote_storage, remote_storage,

View File

@@ -141,14 +141,14 @@ impl GcBlock {
Ok(()) Ok(())
} }
pub(crate) fn before_delete(&self, timeline: &super::Timeline) { pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {
let unblocked = { let unblocked = {
let mut g = self.reasons.lock().unwrap(); let mut g = self.reasons.lock().unwrap();
if g.is_empty() { if g.is_empty() {
return; return;
} }
g.remove(&timeline.timeline_id); g.remove(timeline_id);
BlockingReasons::clean_and_summarize(g).is_none() BlockingReasons::clean_and_summarize(g).is_none()
}; };

View File

@@ -27,7 +27,7 @@ use crate::tenant::Generation;
use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
use crate::TEMP_FILE_SUFFIX; use crate::TEMP_FILE_SUFFIX;
use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode, RemotePath}; use remote_storage::{DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath};
use utils::crashsafe::path_with_suffix_extension; use utils::crashsafe::path_with_suffix_extension;
use utils::id::{TenantId, TimelineId}; use utils::id::{TenantId, TimelineId};
use utils::pausable_failpoint; use utils::pausable_failpoint;
@@ -153,7 +153,9 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'")) .with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let download = storage.download(src_path, cancel).await?; let download = storage
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
pausable_failpoint!("before-downloading-layer-stream-pausable"); pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -204,7 +206,9 @@ async fn download_object<'a>(
.with_context(|| format!("create a destination file for layer '{dst_path}'")) .with_context(|| format!("create a destination file for layer '{dst_path}'"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let mut download = storage.download(src_path, cancel).await?; let mut download = storage
.download(src_path, &DownloadOpts::default(), cancel)
.await?;
pausable_failpoint!("before-downloading-layer-stream-pausable"); pausable_failpoint!("before-downloading-layer-stream-pausable");
@@ -344,7 +348,9 @@ async fn do_download_index_part(
let index_part_bytes = download_retry_forever( let index_part_bytes = download_retry_forever(
|| async { || async {
let download = storage.download(&remote_path, cancel).await?; let download = storage
.download(&remote_path, &DownloadOpts::default(), cancel)
.await?;
let mut bytes = Vec::new(); let mut bytes = Vec::new();
@@ -526,10 +532,15 @@ pub(crate) async fn download_initdb_tar_zst(
.with_context(|| format!("tempfile creation {temp_path}")) .with_context(|| format!("tempfile creation {temp_path}"))
.map_err(DownloadError::Other)?; .map_err(DownloadError::Other)?;
let download = match storage.download(&remote_path, cancel).await { let download = match storage
.download(&remote_path, &DownloadOpts::default(), cancel)
.await
{
Ok(dl) => dl, Ok(dl) => dl,
Err(DownloadError::NotFound) => { Err(DownloadError::NotFound) => {
storage.download(&remote_preserved_path, cancel).await? storage
.download(&remote_preserved_path, &DownloadOpts::default(), cancel)
.await?
} }
Err(other) => Err(other)?, Err(other) => Err(other)?,
}; };

View File

@@ -49,7 +49,7 @@ use futures::Future;
use metrics::UIntGauge; use metrics::UIntGauge;
use pageserver_api::models::SecondaryProgress; use pageserver_api::models::SecondaryProgress;
use pageserver_api::shard::TenantShardId; use pageserver_api::shard::TenantShardId;
use remote_storage::{DownloadError, Etag, GenericRemoteStorage}; use remote_storage::{DownloadError, DownloadOpts, Etag, GenericRemoteStorage};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{info_span, instrument, warn, Instrument}; use tracing::{info_span, instrument, warn, Instrument};
@@ -944,36 +944,35 @@ impl<'a> TenantDownloader<'a> {
) -> Result<HeatMapDownload, UpdateError> { ) -> Result<HeatMapDownload, UpdateError> {
debug_assert_current_span_has_tenant_id(); debug_assert_current_span_has_tenant_id();
let tenant_shard_id = self.secondary_state.get_tenant_shard_id(); let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
// TODO: pull up etag check into the request, to do a conditional GET rather than
// issuing a GET and then maybe ignoring the response body
// (https://github.com/neondatabase/neon/issues/6199)
tracing::debug!("Downloading heatmap for secondary tenant",); tracing::debug!("Downloading heatmap for secondary tenant",);
let heatmap_path = remote_heatmap_path(tenant_shard_id); let heatmap_path = remote_heatmap_path(tenant_shard_id);
let cancel = &self.secondary_state.cancel; let cancel = &self.secondary_state.cancel;
let opts = DownloadOpts {
etag: prev_etag.cloned(),
..Default::default()
};
backoff::retry( backoff::retry(
|| async { || async {
let download = self let download = match self
.remote_storage .remote_storage
.download(&heatmap_path, cancel) .download(&heatmap_path, &opts, cancel)
.await .await
.map_err(UpdateError::from)?; {
Ok(download) => download,
Err(DownloadError::Unmodified) => return Ok(HeatMapDownload::Unmodified),
Err(err) => return Err(err.into()),
};
SECONDARY_MODE.download_heatmap.inc(); let mut heatmap_bytes = Vec::new();
let mut body = tokio_util::io::StreamReader::new(download.download_stream);
if Some(&download.etag) == prev_etag { let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?;
Ok(HeatMapDownload::Unmodified) Ok(HeatMapDownload::Modified(HeatMapModified {
} else { etag: download.etag,
let mut heatmap_bytes = Vec::new(); last_modified: download.last_modified,
let mut body = tokio_util::io::StreamReader::new(download.download_stream); bytes: heatmap_bytes,
let _size = tokio::io::copy_buf(&mut body, &mut heatmap_bytes).await?; }))
Ok(HeatMapDownload::Modified(HeatMapModified {
etag: download.etag,
last_modified: download.last_modified,
bytes: heatmap_bytes,
}))
}
}, },
|e| matches!(e, UpdateError::NoData | UpdateError::Cancelled), |e| matches!(e, UpdateError::NoData | UpdateError::Cancelled),
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_DOWNLOAD_WARN_THRESHOLD,
@@ -984,6 +983,7 @@ impl<'a> TenantDownloader<'a> {
.await .await
.ok_or_else(|| UpdateError::Cancelled) .ok_or_else(|| UpdateError::Cancelled)
.and_then(|x| x) .and_then(|x| x)
.inspect(|_| SECONDARY_MODE.download_heatmap.inc())
} }
/// Download heatmap layers that are not present on local disk, or update their /// Download heatmap layers that are not present on local disk, or update their

View File

@@ -7,6 +7,7 @@ pub(crate) mod handle;
mod init; mod init;
pub mod layer_manager; pub mod layer_manager;
pub(crate) mod logical_size; pub(crate) mod logical_size;
pub mod offload;
pub mod span; pub mod span;
pub mod uninit; pub mod uninit;
mod walreceiver; mod walreceiver;
@@ -1556,6 +1557,17 @@ impl Timeline {
} }
} }
/// Checks if the internal state of the timeline is consistent with it being able to be offloaded.
/// This is neccessary but not sufficient for offloading of the timeline as it might have
/// child timelines that are not offloaded yet.
pub(crate) fn can_offload(&self) -> bool {
if self.remote_client.is_archived() != Some(true) {
return false;
}
true
}
/// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
/// compaction tasks. /// compaction tasks.
pub(crate) async fn compact( pub(crate) async fn compact(
@@ -1818,7 +1830,6 @@ impl Timeline {
self.current_state() == TimelineState::Active self.current_state() == TimelineState::Active
} }
#[allow(unused)]
pub(crate) fn is_archived(&self) -> Option<bool> { pub(crate) fn is_archived(&self) -> Option<bool> {
self.remote_client.is_archived() self.remote_client.is_archived()
} }

View File

@@ -15,7 +15,7 @@ use crate::{
tenant::{ tenant::{
metadata::TimelineMetadata, metadata::TimelineMetadata,
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient}, remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
CreateTimelineCause, DeleteTimelineError, Tenant, CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
}, },
}; };
@@ -24,12 +24,14 @@ use super::{Timeline, TimelineResources};
/// Mark timeline as deleted in S3 so we won't pick it up next time /// Mark timeline as deleted in S3 so we won't pick it up next time
/// during attach or pageserver restart. /// during attach or pageserver restart.
/// See comment in persist_index_part_with_deleted_flag. /// See comment in persist_index_part_with_deleted_flag.
async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> { async fn set_deleted_in_remote_index(
match timeline timeline: &TimelineOrOffloaded,
.remote_client ) -> Result<(), DeleteTimelineError> {
let res = timeline
.remote_client()
.persist_index_part_with_deleted_flag() .persist_index_part_with_deleted_flag()
.await .await;
{ match res {
// If we (now, or already) marked it successfully as deleted, we can proceed // If we (now, or already) marked it successfully as deleted, we can proceed
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (), Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
// Bail out otherwise // Bail out otherwise
@@ -127,9 +129,9 @@ pub(super) async fn delete_local_timeline_directory(
} }
/// Removes remote layers and an index file after them. /// Removes remote layers and an index file after them.
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> { async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
timeline timeline
.remote_client .remote_client()
.delete_all() .delete_all()
.await .await
.context("delete_all") .context("delete_all")
@@ -137,27 +139,41 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<(
/// It is important that this gets called when DeletionGuard is being held. /// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`] /// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn remove_timeline_from_tenant( async fn remove_maybe_offloaded_timeline_from_tenant(
tenant: &Tenant, tenant: &Tenant,
timeline: &Timeline, timeline: &TimelineOrOffloaded,
_: &DeletionGuard, // using it as a witness _: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// Remove the timeline from the map. // Remove the timeline from the map.
// This observes the locking order between timelines and timelines_offloaded
let mut timelines = tenant.timelines.lock().unwrap(); let mut timelines = tenant.timelines.lock().unwrap();
let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
let offloaded_children_exist = timelines_offloaded
.iter()
.any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
let children_exist = timelines let children_exist = timelines
.iter() .iter()
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id)); .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`. // XXX this can happen because of race conditions with branch creation.
// We already deleted the layer files, so it's probably best to panic. // We already deleted the remote layer files, so it's probably best to panic.
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart) if children_exist || offloaded_children_exist {
if children_exist {
panic!("Timeline grew children while we removed layer files"); panic!("Timeline grew children while we removed layer files");
} }
timelines match timeline {
.remove(&timeline.timeline_id) TimelineOrOffloaded::Timeline(timeline) => {
.expect("timeline that we were deleting was concurrently removed from 'timelines' map"); timelines.remove(&timeline.timeline_id).expect(
"timeline that we were deleting was concurrently removed from 'timelines' map",
);
}
TimelineOrOffloaded::Offloaded(timeline) => {
timelines_offloaded
.remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
}
}
drop(timelines_offloaded);
drop(timelines); drop(timelines);
Ok(()) Ok(())
@@ -207,9 +223,11 @@ impl DeleteTimelineFlow {
guard.mark_in_progress()?; guard.mark_in_progress()?;
// Now that the Timeline is in Stopping state, request all the related tasks to shut down. // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
timeline.shutdown(super::ShutdownMode::Hard).await; if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.shutdown(super::ShutdownMode::Hard).await;
}
tenant.gc_block.before_delete(&timeline); tenant.gc_block.before_delete(&timeline.timeline_id());
fail::fail_point!("timeline-delete-before-index-deleted-at", |_| { fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
Err(anyhow::anyhow!( Err(anyhow::anyhow!(
@@ -285,15 +303,16 @@ impl DeleteTimelineFlow {
guard.mark_in_progress()?; guard.mark_in_progress()?;
let timeline = TimelineOrOffloaded::Timeline(timeline);
Self::schedule_background(guard, tenant.conf, tenant, timeline); Self::schedule_background(guard, tenant.conf, tenant, timeline);
Ok(()) Ok(())
} }
fn prepare( pub(super) fn prepare(
tenant: &Tenant, tenant: &Tenant,
timeline_id: TimelineId, timeline_id: TimelineId,
) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> { ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
// Note the interaction between this guard and deletion guard. // Note the interaction between this guard and deletion guard.
// Here we attempt to lock deletion guard when we're holding a lock on timelines. // Here we attempt to lock deletion guard when we're holding a lock on timelines.
// This is important because when you take into account `remove_timeline_from_tenant` // This is important because when you take into account `remove_timeline_from_tenant`
@@ -307,8 +326,14 @@ impl DeleteTimelineFlow {
let timelines = tenant.timelines.lock().unwrap(); let timelines = tenant.timelines.lock().unwrap();
let timeline = match timelines.get(&timeline_id) { let timeline = match timelines.get(&timeline_id) {
Some(t) => t, Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound), None => {
let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
match offloaded_timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound),
}
}
}; };
// Ensure that there are no child timelines **attached to that pageserver**, // Ensure that there are no child timelines **attached to that pageserver**,
@@ -334,30 +359,32 @@ impl DeleteTimelineFlow {
// to remove the timeline from it. // to remove the timeline from it.
// Always if you have two locks that are taken in different order this can result in a deadlock. // Always if you have two locks that are taken in different order this can result in a deadlock.
let delete_progress = Arc::clone(&timeline.delete_progress); let delete_progress = Arc::clone(timeline.delete_progress());
let delete_lock_guard = match delete_progress.try_lock_owned() { let delete_lock_guard = match delete_progress.try_lock_owned() {
Ok(guard) => DeletionGuard(guard), Ok(guard) => DeletionGuard(guard),
Err(_) => { Err(_) => {
// Unfortunately if lock fails arc is consumed. // Unfortunately if lock fails arc is consumed.
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone( return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
&timeline.delete_progress, timeline.delete_progress(),
))); )));
} }
}; };
timeline.set_state(TimelineState::Stopping); if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.set_state(TimelineState::Stopping);
}
Ok((Arc::clone(timeline), delete_lock_guard)) Ok((timeline, delete_lock_guard))
} }
fn schedule_background( fn schedule_background(
guard: DeletionGuard, guard: DeletionGuard,
conf: &'static PageServerConf, conf: &'static PageServerConf,
tenant: Arc<Tenant>, tenant: Arc<Tenant>,
timeline: Arc<Timeline>, timeline: TimelineOrOffloaded,
) { ) {
let tenant_shard_id = timeline.tenant_shard_id; let tenant_shard_id = timeline.tenant_shard_id();
let timeline_id = timeline.timeline_id; let timeline_id = timeline.timeline_id();
task_mgr::spawn( task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(), task_mgr::BACKGROUND_RUNTIME.handle(),
@@ -368,7 +395,9 @@ impl DeleteTimelineFlow {
async move { async move {
if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await { if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
error!("Error: {err:#}"); error!("Error: {err:#}");
timeline.set_broken(format!("{err:#}")) if let TimelineOrOffloaded::Timeline(timeline) = timeline {
timeline.set_broken(format!("{err:#}"))
}
}; };
Ok(()) Ok(())
} }
@@ -380,15 +409,19 @@ impl DeleteTimelineFlow {
mut guard: DeletionGuard, mut guard: DeletionGuard,
conf: &PageServerConf, conf: &PageServerConf,
tenant: &Tenant, tenant: &Tenant,
timeline: &Timeline, timeline: &TimelineOrOffloaded,
) -> Result<(), DeleteTimelineError> { ) -> Result<(), DeleteTimelineError> {
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?; // Offloaded timelines have no local state
// TODO: once we persist offloaded information, delete the timeline from there, too
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
}
delete_remote_layers_and_index(timeline).await?; delete_remote_layers_and_index(timeline).await?;
pausable_failpoint!("in_progress_delete"); pausable_failpoint!("in_progress_delete");
remove_timeline_from_tenant(tenant, timeline, &guard).await?; remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;
*guard = Self::Finished; *guard = Self::Finished;
@@ -400,7 +433,7 @@ impl DeleteTimelineFlow {
} }
} }
struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>); pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
impl Deref for DeletionGuard { impl Deref for DeletionGuard {
type Target = DeleteTimelineFlow; type Target = DeleteTimelineFlow;

View File

@@ -0,0 +1,69 @@
use std::sync::Arc;
use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
use super::{
delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
Timeline,
};
pub(crate) async fn offload_timeline(
tenant: &Tenant,
timeline: &Arc<Timeline>,
) -> anyhow::Result<()> {
tracing::info!("offloading archived timeline");
let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
let TimelineOrOffloaded::Timeline(timeline) = timeline else {
tracing::error!("timeline already offloaded, but given timeline object");
return Ok(());
};
// TODO extend guard mechanism above with method
// to make deletions possible while offloading is in progress
// TODO mark timeline as offloaded in S3
let conf = &tenant.conf;
delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;
remove_timeline_from_tenant(tenant, &timeline, &guard).await?;
{
let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
offloaded_timelines.insert(
timeline.timeline_id,
Arc::new(OffloadedTimeline::from_timeline(&timeline)),
);
}
Ok(())
}
/// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn remove_timeline_from_tenant(
tenant: &Tenant,
timeline: &Timeline,
_: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> {
// Remove the timeline from the map.
let mut timelines = tenant.timelines.lock().unwrap();
let children_exist = timelines
.iter()
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
// We already deleted the layer files, so it's probably best to panic.
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
if children_exist {
panic!("Timeline grew children while we removed layer files");
}
timelines
.remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
drop(timelines);
Ok(())
}

View File

@@ -146,6 +146,8 @@ ConstructDeltaMessage()
if (RootTable.role_table) if (RootTable.role_table)
{ {
JsonbValue roles; JsonbValue roles;
HASH_SEQ_STATUS status;
RoleEntry *entry;
roles.type = jbvString; roles.type = jbvString;
roles.val.string.val = "roles"; roles.val.string.val = "roles";
@@ -153,9 +155,6 @@ ConstructDeltaMessage()
pushJsonbValue(&state, WJB_KEY, &roles); pushJsonbValue(&state, WJB_KEY, &roles);
pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL); pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);
HASH_SEQ_STATUS status;
RoleEntry *entry;
hash_seq_init(&status, RootTable.role_table); hash_seq_init(&status, RootTable.role_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -190,10 +189,12 @@ ConstructDeltaMessage()
} }
pushJsonbValue(&state, WJB_END_ARRAY, NULL); pushJsonbValue(&state, WJB_END_ARRAY, NULL);
} }
JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL); {
Jsonb *jsonb = JsonbValueToJsonb(result); JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
Jsonb *jsonb = JsonbValueToJsonb(result);
return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ ); return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
}
} }
#define ERROR_SIZE 1024 #define ERROR_SIZE 1024
@@ -272,32 +273,28 @@ SendDeltasToControlPlane()
curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback); curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
} }
char *message = ConstructDeltaMessage();
ErrorString str;
str.size = 0;
curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
const int num_retries = 5;
CURLcode curl_status;
for (int i = 0; i < num_retries; i++)
{
if ((curl_status = curl_easy_perform(handle)) == 0)
break;
elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
pg_usleep(1000 * 1000);
}
if (curl_status != CURLE_OK)
{
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
}
else
{ {
char *message = ConstructDeltaMessage();
ErrorString str;
const int num_retries = 5;
CURLcode curl_status;
long response_code; long response_code;
str.size = 0;
curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
for (int i = 0; i < num_retries; i++)
{
if ((curl_status = curl_easy_perform(handle)) == 0)
break;
elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
pg_usleep(1000 * 1000);
}
if (curl_status != CURLE_OK)
elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION) if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
{ {
if (response_code != 200) if (response_code != 200)
@@ -376,10 +373,11 @@ MergeTable()
if (old_table->db_table) if (old_table->db_table)
{ {
InitDbTableIfNeeded();
DbEntry *entry; DbEntry *entry;
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
InitDbTableIfNeeded();
hash_seq_init(&status, old_table->db_table); hash_seq_init(&status, old_table->db_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -421,10 +419,11 @@ MergeTable()
if (old_table->role_table) if (old_table->role_table)
{ {
InitRoleTableIfNeeded();
RoleEntry *entry; RoleEntry *entry;
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
InitRoleTableIfNeeded();
hash_seq_init(&status, old_table->role_table); hash_seq_init(&status, old_table->role_table);
while ((entry = hash_seq_search(&status)) != NULL) while ((entry = hash_seq_search(&status)) != NULL)
{ {
@@ -515,9 +514,12 @@ RoleIsNeonSuperuser(const char *role_name)
static void static void
HandleCreateDb(CreatedbStmt *stmt) HandleCreateDb(CreatedbStmt *stmt)
{ {
InitDbTableIfNeeded();
DefElem *downer = NULL; DefElem *downer = NULL;
ListCell *option; ListCell *option;
bool found = false;
DbEntry *entry;
InitDbTableIfNeeded();
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
@@ -526,13 +528,11 @@ HandleCreateDb(CreatedbStmt *stmt)
if (strcmp(defel->defname, "owner") == 0) if (strcmp(defel->defname, "owner") == 0)
downer = defel; downer = defel;
} }
bool found = false;
DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
stmt->dbname,
HASH_ENTER,
&found);
entry = hash_search(CurrentDdlTable->db_table,
stmt->dbname,
HASH_ENTER,
&found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
@@ -554,21 +554,24 @@ HandleCreateDb(CreatedbStmt *stmt)
static void static void
HandleAlterOwner(AlterOwnerStmt *stmt) HandleAlterOwner(AlterOwnerStmt *stmt)
{ {
const char *name;
bool found = false;
DbEntry *entry;
const char *new_owner;
if (stmt->objectType != OBJECT_DATABASE) if (stmt->objectType != OBJECT_DATABASE)
return; return;
InitDbTableIfNeeded(); InitDbTableIfNeeded();
const char *name = strVal(stmt->object);
bool found = false;
DbEntry *entry = hash_search(
CurrentDdlTable->db_table,
name,
HASH_ENTER,
&found);
name = strVal(stmt->object);
entry = hash_search(CurrentDdlTable->db_table,
name,
HASH_ENTER,
&found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
const char *new_owner = get_rolespec_name(stmt->newowner);
new_owner = get_rolespec_name(stmt->newowner);
if (RoleIsNeonSuperuser(new_owner)) if (RoleIsNeonSuperuser(new_owner))
elog(ERROR, "can't alter owner to neon_superuser"); elog(ERROR, "can't alter owner to neon_superuser");
entry->owner = get_role_oid(new_owner, false); entry->owner = get_role_oid(new_owner, false);
@@ -578,21 +581,23 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
static void static void
HandleDbRename(RenameStmt *stmt) HandleDbRename(RenameStmt *stmt)
{ {
bool found = false;
DbEntry *entry;
DbEntry *entry_for_new_name;
Assert(stmt->renameType == OBJECT_DATABASE); Assert(stmt->renameType == OBJECT_DATABASE);
InitDbTableIfNeeded(); InitDbTableIfNeeded();
bool found = false; entry = hash_search(CurrentDdlTable->db_table,
DbEntry *entry = hash_search( stmt->subname,
CurrentDdlTable->db_table, HASH_FIND,
stmt->subname, &found);
HASH_FIND,
&found);
DbEntry *entry_for_new_name = hash_search(
CurrentDdlTable->db_table,
stmt->newname,
HASH_ENTER,
NULL);
entry_for_new_name = hash_search(CurrentDdlTable->db_table,
stmt->newname,
HASH_ENTER,
NULL);
entry_for_new_name->type = Op_Set; entry_for_new_name->type = Op_Set;
if (found) if (found)
{ {
if (entry->old_name[0] != '\0') if (entry->old_name[0] != '\0')
@@ -600,8 +605,7 @@ HandleDbRename(RenameStmt *stmt)
else else
strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN); strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
entry_for_new_name->owner = entry->owner; entry_for_new_name->owner = entry->owner;
hash_search( hash_search(CurrentDdlTable->db_table,
CurrentDdlTable->db_table,
stmt->subname, stmt->subname,
HASH_REMOVE, HASH_REMOVE,
NULL); NULL);
@@ -616,14 +620,15 @@ HandleDbRename(RenameStmt *stmt)
static void static void
HandleDropDb(DropdbStmt *stmt) HandleDropDb(DropdbStmt *stmt)
{ {
InitDbTableIfNeeded();
bool found = false; bool found = false;
DbEntry *entry = hash_search( DbEntry *entry;
CurrentDdlTable->db_table,
stmt->dbname,
HASH_ENTER,
&found);
InitDbTableIfNeeded();
entry = hash_search(CurrentDdlTable->db_table,
stmt->dbname,
HASH_ENTER,
&found);
entry->type = Op_Delete; entry->type = Op_Delete;
entry->owner = InvalidOid; entry->owner = InvalidOid;
if (!found) if (!found)
@@ -633,16 +638,14 @@ HandleDropDb(DropdbStmt *stmt)
static void static void
HandleCreateRole(CreateRoleStmt *stmt) HandleCreateRole(CreateRoleStmt *stmt)
{ {
InitRoleTableIfNeeded();
bool found = false; bool found = false;
RoleEntry *entry = hash_search( RoleEntry *entry;
CurrentDdlTable->role_table, DefElem *dpass;
stmt->role,
HASH_ENTER,
&found);
DefElem *dpass = NULL;
ListCell *option; ListCell *option;
InitRoleTableIfNeeded();
dpass = NULL;
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
DefElem *defel = lfirst(option); DefElem *defel = lfirst(option);
@@ -650,6 +653,11 @@ HandleCreateRole(CreateRoleStmt *stmt)
if (strcmp(defel->defname, "password") == 0) if (strcmp(defel->defname, "password") == 0)
dpass = defel; dpass = defel;
} }
entry = hash_search(CurrentDdlTable->role_table,
stmt->role,
HASH_ENTER,
&found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
if (dpass && dpass->arg) if (dpass && dpass->arg)
@@ -662,14 +670,18 @@ HandleCreateRole(CreateRoleStmt *stmt)
static void static void
HandleAlterRole(AlterRoleStmt *stmt) HandleAlterRole(AlterRoleStmt *stmt)
{ {
InitRoleTableIfNeeded();
DefElem *dpass = NULL;
ListCell *option;
const char *role_name = stmt->role->rolename; const char *role_name = stmt->role->rolename;
DefElem *dpass;
ListCell *option;
bool found = false;
RoleEntry *entry;
InitRoleTableIfNeeded();
if (RoleIsNeonSuperuser(role_name) && !superuser()) if (RoleIsNeonSuperuser(role_name) && !superuser())
elog(ERROR, "can't ALTER neon_superuser"); elog(ERROR, "can't ALTER neon_superuser");
dpass = NULL;
foreach(option, stmt->options) foreach(option, stmt->options)
{ {
DefElem *defel = lfirst(option); DefElem *defel = lfirst(option);
@@ -680,13 +692,11 @@ HandleAlterRole(AlterRoleStmt *stmt)
/* We only care about updates to the password */ /* We only care about updates to the password */
if (!dpass) if (!dpass)
return; return;
bool found = false;
RoleEntry *entry = hash_search(
CurrentDdlTable->role_table,
role_name,
HASH_ENTER,
&found);
entry = hash_search(CurrentDdlTable->role_table,
role_name,
HASH_ENTER,
&found);
if (!found) if (!found)
memset(entry->old_name, 0, sizeof(entry->old_name)); memset(entry->old_name, 0, sizeof(entry->old_name));
if (dpass->arg) if (dpass->arg)
@@ -699,20 +709,22 @@ HandleAlterRole(AlterRoleStmt *stmt)
static void static void
HandleRoleRename(RenameStmt *stmt) HandleRoleRename(RenameStmt *stmt)
{ {
InitRoleTableIfNeeded();
Assert(stmt->renameType == OBJECT_ROLE);
bool found = false; bool found = false;
RoleEntry *entry = hash_search( RoleEntry *entry;
CurrentDdlTable->role_table, RoleEntry *entry_for_new_name;
stmt->subname,
HASH_FIND,
&found);
RoleEntry *entry_for_new_name = hash_search( Assert(stmt->renameType == OBJECT_ROLE);
CurrentDdlTable->role_table, InitRoleTableIfNeeded();
stmt->newname,
HASH_ENTER, entry = hash_search(CurrentDdlTable->role_table,
NULL); stmt->subname,
HASH_FIND,
&found);
entry_for_new_name = hash_search(CurrentDdlTable->role_table,
stmt->newname,
HASH_ENTER,
NULL);
entry_for_new_name->type = Op_Set; entry_for_new_name->type = Op_Set;
if (found) if (found)
@@ -738,9 +750,10 @@ HandleRoleRename(RenameStmt *stmt)
static void static void
HandleDropRole(DropRoleStmt *stmt) HandleDropRole(DropRoleStmt *stmt)
{ {
InitRoleTableIfNeeded();
ListCell *item; ListCell *item;
InitRoleTableIfNeeded();
foreach(item, stmt->roles) foreach(item, stmt->roles)
{ {
RoleSpec *spec = lfirst(item); RoleSpec *spec = lfirst(item);

View File

@@ -170,12 +170,14 @@ lfc_disable(char const *op)
if (lfc_desc > 0) if (lfc_desc > 0)
{ {
int rc;
/* /*
* If the reason of error is ENOSPC, then truncation of file may * If the reason of error is ENOSPC, then truncation of file may
* help to reclaim some space * help to reclaim some space
*/ */
pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE); pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
int rc = ftruncate(lfc_desc, 0); rc = ftruncate(lfc_desc, 0);
pgstat_report_wait_end(); pgstat_report_wait_end();
if (rc < 0) if (rc < 0)
@@ -616,7 +618,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
*/ */
if (entry->bitmap[chunk_offs >> 5] == 0) if (entry->bitmap[chunk_offs >> 5] == 0)
{ {
bool has_remaining_pages; bool has_remaining_pages = false;
for (int i = 0; i < CHUNK_BITMAP_SIZE; i++) for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
{ {
@@ -666,7 +668,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
BufferTag tag; BufferTag tag;
FileCacheEntry *entry; FileCacheEntry *entry;
ssize_t rc; ssize_t rc;
bool result = true;
uint32 hash; uint32 hash;
uint64 generation; uint64 generation;
uint32 entry_offset; uint32 entry_offset;
@@ -925,10 +926,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
/* We can reuse a hole that was left behind when the LFC was shrunk previously */ /* We can reuse a hole that was left behind when the LFC was shrunk previously */
FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes)); FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
uint32 offset = hole->offset; uint32 offset = hole->offset;
bool found; bool hole_found;
hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found); hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
CriticalAssert(found); CriticalAssert(hole_found);
lfc_ctl->used += 1; lfc_ctl->used += 1;
entry->offset = offset; /* reuse the hole */ entry->offset = offset; /* reuse the hole */
@@ -1004,7 +1005,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
Datum result; Datum result;
HeapTuple tuple; HeapTuple tuple;
char const *key; char const *key;
uint64 value; uint64 value = 0;
Datum values[NUM_NEON_GET_STATS_COLS]; Datum values[NUM_NEON_GET_STATS_COLS];
bool nulls[NUM_NEON_GET_STATS_COLS]; bool nulls[NUM_NEON_GET_STATS_COLS];

View File

@@ -116,8 +116,6 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
{ {
uint8 count; uint8 count;
uint32 index; uint32 index;
size_t i;
size_t j;
TimestampTz now = GetCurrentTimestamp(); TimestampTz now = GetCurrentTimestamp();
/* Use the first "k" (registerWidth) bits as a zero based index */ /* Use the first "k" (registerWidth) bits as a zero based index */

View File

@@ -89,7 +89,6 @@ typedef struct
#if PG_VERSION_NUM >= 150000 #if PG_VERSION_NUM >= 150000
static shmem_request_hook_type prev_shmem_request_hook = NULL; static shmem_request_hook_type prev_shmem_request_hook = NULL;
static void walproposer_shmem_request(void);
#endif #endif
static shmem_startup_hook_type prev_shmem_startup_hook; static shmem_startup_hook_type prev_shmem_startup_hook;
static PagestoreShmemState *pagestore_shared; static PagestoreShmemState *pagestore_shared;
@@ -441,8 +440,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
return false; return false;
} }
shard->state = PS_Connecting_Startup; shard->state = PS_Connecting_Startup;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connecting_Startup: case PS_Connecting_Startup:
{ {
char *pagestream_query; char *pagestream_query;
@@ -453,8 +452,6 @@ pageserver_connect(shardno_t shard_no, int elevel)
do do
{ {
WaitEvent event;
switch (poll_result) switch (poll_result)
{ {
default: /* unknown/unused states are handled as a failed connection */ default: /* unknown/unused states are handled as a failed connection */
@@ -585,8 +582,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
} }
shard->state = PS_Connecting_PageStream; shard->state = PS_Connecting_PageStream;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connecting_PageStream: case PS_Connecting_PageStream:
{ {
neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream"); neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream");
@@ -631,8 +628,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
} }
shard->state = PS_Connected; shard->state = PS_Connected;
/* fallthrough */
} }
/* FALLTHROUGH */
case PS_Connected: case PS_Connected:
/* /*
* We successfully connected. Future connections to this PageServer * We successfully connected. Future connections to this PageServer

View File

@@ -94,7 +94,6 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t)); metric_t *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
uint64 bucket_accum; uint64 bucket_accum;
int i = 0; int i = 0;
Datum getpage_wait_str;
metrics[i].name = "getpage_wait_seconds_count"; metrics[i].name = "getpage_wait_seconds_count";
metrics[i].is_bucket = false; metrics[i].is_bucket = false;
@@ -224,7 +223,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
Datum values[3]; Datum values[3];
bool nulls[3]; bool nulls[3];
Datum getpage_wait_str;
neon_per_backend_counters totals = {0}; neon_per_backend_counters totals = {0};
metric_t *metrics; metric_t *metrics;

View File

@@ -7,6 +7,7 @@
#define NEON_PGVERSIONCOMPAT_H #define NEON_PGVERSIONCOMPAT_H
#include "fmgr.h" #include "fmgr.h"
#include "storage/buf_internals.h"
#if PG_MAJORVERSION_NUM < 17 #if PG_MAJORVERSION_NUM < 17
#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId) #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
@@ -20,11 +21,24 @@
NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \ NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
) )
/* buftag population & RelFileNode/RelFileLocator rework */ /* These macros were turned into static inline functions in v16 */
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
static inline bool
BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
{
return BUFFERTAGS_EQUAL(*tag1, *tag2);
}
#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn) static inline void
InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
ForkNumber forkNum, BlockNumber blockNum)
{
INIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);
}
#endif
/* RelFileNode -> RelFileLocator rework */
#if PG_MAJORVERSION_NUM < 16
#define USE_RELFILENODE #define USE_RELFILENODE
#define RELFILEINFO_HDR "storage/relfilenode.h" #define RELFILEINFO_HDR "storage/relfilenode.h"
@@ -73,8 +87,6 @@
#define USE_RELFILELOCATOR #define USE_RELFILELOCATOR
#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
#define RELFILEINFO_HDR "storage/relfilelocator.h" #define RELFILEINFO_HDR "storage/relfilelocator.h"
#define NRelFileInfo RelFileLocator #define NRelFileInfo RelFileLocator

View File

@@ -213,32 +213,6 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
extern void smgr_init_neon(void); extern void smgr_init_neon(void);
extern void readahead_buffer_resize(int newsize, void *extra); extern void readahead_buffer_resize(int newsize, void *extra);
/* Neon storage manager functionality */
extern void neon_init(void);
extern void neon_open(SMgrRelation reln);
extern void neon_close(SMgrRelation reln, ForkNumber forknum);
extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
#if PG_MAJORVERSION_NUM < 16
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
#else
extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, const void *buffer, bool skipFsync);
extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nbuffers, bool skipFsync);
#endif
#if PG_MAJORVERSION_NUM >=17
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, int nblocks);
#else
extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum);
#endif
/* /*
* LSN values associated with each request to the pageserver * LSN values associated with each request to the pageserver
*/ */
@@ -278,13 +252,7 @@ extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum,
extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno, extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
neon_request_lsns request_lsns, void *buffer); neon_request_lsns request_lsns, void *buffer);
#endif #endif
extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks);
extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
extern int64 neon_dbsize(Oid dbNode); extern int64 neon_dbsize(Oid dbNode);
extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);
/* utils for neon relsize cache */ /* utils for neon relsize cache */
extern void relsize_hash_init(void); extern void relsize_hash_init(void);

View File

@@ -118,6 +118,8 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id); static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL; static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
/* /*
* Prefetch implementation: * Prefetch implementation:
* *
@@ -215,7 +217,7 @@ typedef struct PrfHashEntry
sizeof(BufferTag) \ sizeof(BufferTag) \
) )
#define SH_EQUAL(tb, a, b) (BUFFERTAGS_EQUAL((a)->buftag, (b)->buftag)) #define SH_EQUAL(tb, a, b) (BufferTagsEqual(&(a)->buftag, &(b)->buftag))
#define SH_SCOPE static inline #define SH_SCOPE static inline
#define SH_DEFINE #define SH_DEFINE
#define SH_DECLARE #define SH_DECLARE
@@ -736,7 +738,7 @@ static void
prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns) prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
{ {
bool found; bool found;
uint64 mySlotNo = slot->my_ring_index; uint64 mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;
NeonGetPageRequest request = { NeonGetPageRequest request = {
.req.tag = T_NeonGetPageRequest, .req.tag = T_NeonGetPageRequest,
@@ -803,15 +805,19 @@ prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
bool is_prefetch) bool is_prefetch)
{ {
uint64 min_ring_index; uint64 min_ring_index;
PrefetchRequest req; PrefetchRequest hashkey;
#if USE_ASSERT_CHECKING #if USE_ASSERT_CHECKING
bool any_hits = false; bool any_hits = false;
#endif #endif
/* We will never read further ahead than our buffer can store. */ /* We will never read further ahead than our buffer can store. */
nblocks = Max(1, Min(nblocks, readahead_buffer_size)); nblocks = Max(1, Min(nblocks, readahead_buffer_size));
/* use an intermediate PrefetchRequest struct to ensure correct alignment */ /*
req.buftag = tag; * Use an intermediate PrefetchRequest struct as the hash key to ensure
* correct alignment and that the padding bytes are cleared.
*/
memset(&hashkey.buftag, 0, sizeof(BufferTag));
hashkey.buftag = tag;
Retry: Retry:
min_ring_index = UINT64_MAX; min_ring_index = UINT64_MAX;
@@ -837,8 +843,8 @@ Retry:
slot = NULL; slot = NULL;
entry = NULL; entry = NULL;
req.buftag.blockNum = tag.blockNum + i; hashkey.buftag.blockNum = tag.blockNum + i;
entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &req); entry = prfh_lookup(MyPState->prf_hash, &hashkey);
if (entry != NULL) if (entry != NULL)
{ {
@@ -849,7 +855,7 @@ Retry:
Assert(slot->status != PRFS_UNUSED); Assert(slot->status != PRFS_UNUSED);
Assert(MyPState->ring_last <= ring_index && Assert(MyPState->ring_last <= ring_index &&
ring_index < MyPState->ring_unused); ring_index < MyPState->ring_unused);
Assert(BUFFERTAGS_EQUAL(slot->buftag, req.buftag)); Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));
/* /*
* If the caller specified a request LSN to use, only accept * If the caller specified a request LSN to use, only accept
@@ -886,12 +892,19 @@ Retry:
{ {
min_ring_index = Min(min_ring_index, ring_index); min_ring_index = Min(min_ring_index, ring_index);
/* The buffered request is good enough, return that index */ /* The buffered request is good enough, return that index */
pgBufferUsage.prefetch.duplicates++; if (is_prefetch)
pgBufferUsage.prefetch.duplicates++;
else
pgBufferUsage.prefetch.hits++;
continue; continue;
} }
} }
} }
else if (!is_prefetch)
{
pgBufferUsage.prefetch.misses += 1;
MyNeonCounters->getpage_prefetch_misses_total++;
}
/* /*
* We can only leave the block above by finding that there's * We can only leave the block above by finding that there's
* no entry that can satisfy this request, either because there * no entry that can satisfy this request, either because there
@@ -974,7 +987,7 @@ Retry:
* We must update the slot data before insertion, because the hash * We must update the slot data before insertion, because the hash
* function reads the buffer tag from the slot. * function reads the buffer tag from the slot.
*/ */
slot->buftag = req.buftag; slot->buftag = hashkey.buftag;
slot->shard_no = get_shard_number(&tag); slot->shard_no = get_shard_number(&tag);
slot->my_ring_index = ring_index; slot->my_ring_index = ring_index;
@@ -1452,7 +1465,6 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
BlockNumber blknos[XLR_MAX_BLOCK_ID]; BlockNumber blknos[XLR_MAX_BLOCK_ID];
Page pageptrs[XLR_MAX_BLOCK_ID]; Page pageptrs[XLR_MAX_BLOCK_ID];
int nregistered = 0; int nregistered = 0;
XLogRecPtr result = 0;
for (int i = 0; i < nblocks; i++) for (int i = 0; i < nblocks; i++)
{ {
@@ -1765,7 +1777,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
/* /*
* neon_init() -- Initialize private state * neon_init() -- Initialize private state
*/ */
void static void
neon_init(void) neon_init(void)
{ {
Size prfs_size; Size prfs_size;
@@ -2155,7 +2167,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
/* /*
* neon_exists() -- Does the physical file exist? * neon_exists() -- Does the physical file exist?
*/ */
bool static bool
neon_exists(SMgrRelation reln, ForkNumber forkNum) neon_exists(SMgrRelation reln, ForkNumber forkNum)
{ {
bool exists; bool exists;
@@ -2261,7 +2273,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
* *
* If isRedo is true, it's okay for the relation to exist already. * If isRedo is true, it's okay for the relation to exist already.
*/ */
void static void
neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo) neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
@@ -2337,7 +2349,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
* Note: any failure should be reported as WARNING not ERROR, because * Note: any failure should be reported as WARNING not ERROR, because
* we are usually not in a transaction anymore when this is called. * we are usually not in a transaction anymore when this is called.
*/ */
void static void
neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo) neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
{ {
/* /*
@@ -2361,7 +2373,7 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
* EOF). Note that we assume writing a block beyond current EOF * EOF). Note that we assume writing a block beyond current EOF
* causes intervening file space to become filled with zeroes. * causes intervening file space to become filled with zeroes.
*/ */
void static void
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
char *buffer, bool skipFsync) char *buffer, bool skipFsync)
@@ -2453,7 +2465,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
} }
#if PG_MAJORVERSION_NUM >= 16 #if PG_MAJORVERSION_NUM >= 16
void static void
neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
int nblocks, bool skipFsync) int nblocks, bool skipFsync)
{ {
@@ -2549,7 +2561,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
/* /*
* neon_open() -- Initialize newly-opened relation. * neon_open() -- Initialize newly-opened relation.
*/ */
void static void
neon_open(SMgrRelation reln) neon_open(SMgrRelation reln)
{ {
/* /*
@@ -2567,7 +2579,7 @@ neon_open(SMgrRelation reln)
/* /*
* neon_close() -- Close the specified relation, if it isn't closed already. * neon_close() -- Close the specified relation, if it isn't closed already.
*/ */
void static void
neon_close(SMgrRelation reln, ForkNumber forknum) neon_close(SMgrRelation reln, ForkNumber forknum)
{ {
/* /*
@@ -2582,13 +2594,12 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
/* /*
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation * neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
*/ */
bool static bool
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
int nblocks) int nblocks)
{ {
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
BufferTag tag; BufferTag tag;
bool io_initiated = false;
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
{ {
@@ -2612,7 +2623,6 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
while (nblocks > 0) while (nblocks > 0)
{ {
int iterblocks = Min(nblocks, PG_IOV_MAX); int iterblocks = Min(nblocks, PG_IOV_MAX);
int seqlen = 0;
bits8 lfc_present[PG_IOV_MAX / 8]; bits8 lfc_present[PG_IOV_MAX / 8];
memset(lfc_present, 0, sizeof(lfc_present)); memset(lfc_present, 0, sizeof(lfc_present));
@@ -2624,8 +2634,6 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
continue; continue;
} }
io_initiated = true;
tag.blockNum = blocknum; tag.blockNum = blocknum;
for (int i = 0; i < PG_IOV_MAX / 8; i++) for (int i = 0; i < PG_IOV_MAX / 8; i++)
@@ -2648,7 +2656,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
/* /*
* neon_prefetch() -- Initiate asynchronous read of the specified block of a relation * neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
*/ */
bool static bool
neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
{ {
uint64 ring_index PG_USED_FOR_ASSERTS_ONLY; uint64 ring_index PG_USED_FOR_ASSERTS_ONLY;
@@ -2692,7 +2700,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
* This accepts a range of blocks because flushing several pages at once is * This accepts a range of blocks because flushing several pages at once is
* considerably more efficient than doing so individually. * considerably more efficient than doing so individually.
*/ */
void static void
neon_writeback(SMgrRelation reln, ForkNumber forknum, neon_writeback(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, BlockNumber nblocks) BlockNumber blocknum, BlockNumber nblocks)
{ {
@@ -2742,14 +2750,19 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
uint64 ring_index; uint64 ring_index;
PrfHashEntry *entry; PrfHashEntry *entry;
PrefetchRequest *slot; PrefetchRequest *slot;
BufferTag buftag = {0}; PrefetchRequest hashkey;
Assert(PointerIsValid(request_lsns)); Assert(PointerIsValid(request_lsns));
Assert(nblocks >= 1); Assert(nblocks >= 1);
CopyNRelFileInfoToBufTag(buftag, rinfo); /*
buftag.forkNum = forkNum; * Use an intermediate PrefetchRequest struct as the hash key to ensure
buftag.blockNum = base_blockno; * correct alignment and that the padding bytes are cleared.
*/
memset(&hashkey.buftag, 0, sizeof(BufferTag));
CopyNRelFileInfoToBufTag(hashkey.buftag, rinfo);
hashkey.buftag.forkNum = forkNum;
hashkey.buftag.blockNum = base_blockno;
/* /*
* The redo process does not lock pages that it needs to replay but are * The redo process does not lock pages that it needs to replay but are
@@ -2767,7 +2780,7 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
* weren't for the behaviour of the LwLsn cache that uses the highest * weren't for the behaviour of the LwLsn cache that uses the highest
* value of the LwLsn cache when the entry is not found. * value of the LwLsn cache when the entry is not found.
*/ */
prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false); prefetch_register_bufferv(hashkey.buftag, request_lsns, nblocks, mask, false);
for (int i = 0; i < nblocks; i++) for (int i = 0; i < nblocks; i++)
{ {
@@ -2788,8 +2801,8 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
* Try to find prefetched page in the list of received pages. * Try to find prefetched page in the list of received pages.
*/ */
Retry: Retry:
buftag.blockNum = blockno; hashkey.buftag.blockNum = blockno;
entry = prfh_lookup(MyPState->prf_hash, (PrefetchRequest *) &buftag); entry = prfh_lookup(MyPState->prf_hash, &hashkey);
if (entry != NULL) if (entry != NULL)
{ {
@@ -2797,7 +2810,6 @@ Retry:
if (neon_prefetch_response_usable(reqlsns, slot)) if (neon_prefetch_response_usable(reqlsns, slot))
{ {
ring_index = slot->my_ring_index; ring_index = slot->my_ring_index;
pgBufferUsage.prefetch.hits += 1;
} }
else else
{ {
@@ -2827,10 +2839,7 @@ Retry:
{ {
if (entry == NULL) if (entry == NULL)
{ {
pgBufferUsage.prefetch.misses += 1; ring_index = prefetch_register_bufferv(hashkey.buftag, reqlsns, 1, NULL, false);
MyNeonCounters->getpage_prefetch_misses_total++;
ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
Assert(ring_index != UINT64_MAX); Assert(ring_index != UINT64_MAX);
slot = GetPrfSlot(ring_index); slot = GetPrfSlot(ring_index);
} }
@@ -2855,8 +2864,8 @@ Retry:
} while (!prefetch_wait_for(ring_index)); } while (!prefetch_wait_for(ring_index));
Assert(slot->status == PRFS_RECEIVED); Assert(slot->status == PRFS_RECEIVED);
Assert(memcmp(&buftag, &slot->buftag, sizeof(BufferTag)) == 0); Assert(memcmp(&hashkey.buftag, &slot->buftag, sizeof(BufferTag)) == 0);
Assert(buftag.blockNum == base_blockno + i); Assert(hashkey.buftag.blockNum == base_blockno + i);
resp = slot->response; resp = slot->response;
@@ -2912,10 +2921,10 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
* neon_read() -- Read the specified block from a relation. * neon_read() -- Read the specified block from a relation.
*/ */
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
void static void
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer) neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)
#else #else
void static void
neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer) neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
#endif #endif
{ {
@@ -3024,7 +3033,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
#endif /* PG_MAJORVERSION_NUM <= 16 */ #endif /* PG_MAJORVERSION_NUM <= 16 */
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
void static void
neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks) void **buffers, BlockNumber nblocks)
{ {
@@ -3059,6 +3068,9 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers, lfc_result = lfc_readv_select(InfoFromSMgrRel(reln), forknum, blocknum, buffers,
nblocks, read); nblocks, read);
if (lfc_result > 0)
MyNeonCounters->file_cache_hits_total += lfc_result;
/* Read all blocks from LFC, so we're done */ /* Read all blocks from LFC, so we're done */
if (lfc_result == nblocks) if (lfc_result == nblocks)
return; return;
@@ -3185,6 +3197,7 @@ hexdump_page(char *page)
} }
#endif #endif
#if PG_MAJORVERSION_NUM < 17
/* /*
* neon_write() -- Write the supplied block at the appropriate location. * neon_write() -- Write the supplied block at the appropriate location.
* *
@@ -3192,7 +3205,7 @@ hexdump_page(char *page)
* relation (ie, those before the current EOF). To extend a relation, * relation (ie, those before the current EOF). To extend a relation,
* use mdextend(). * use mdextend().
*/ */
void static void
#if PG_MAJORVERSION_NUM < 16 #if PG_MAJORVERSION_NUM < 16
neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync) neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
#else #else
@@ -3258,11 +3271,12 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
#endif #endif
#endif #endif
} }
#endif
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
void static void
neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
const void **buffers, BlockNumber nblocks, bool skipFsync) const void **buffers, BlockNumber nblocks, bool skipFsync)
{ {
@@ -3312,7 +3326,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
/* /*
* neon_nblocks() -- Get the number of blocks stored in a relation. * neon_nblocks() -- Get the number of blocks stored in a relation.
*/ */
BlockNumber static BlockNumber
neon_nblocks(SMgrRelation reln, ForkNumber forknum) neon_nblocks(SMgrRelation reln, ForkNumber forknum)
{ {
NeonResponse *resp; NeonResponse *resp;
@@ -3449,7 +3463,7 @@ neon_dbsize(Oid dbNode)
/* /*
* neon_truncate() -- Truncate relation to specified number of blocks. * neon_truncate() -- Truncate relation to specified number of blocks.
*/ */
void static void
neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks) neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
{ {
XLogRecPtr lsn; XLogRecPtr lsn;
@@ -3518,7 +3532,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
* crash before the next checkpoint syncs the newly-inactive segment, that * crash before the next checkpoint syncs the newly-inactive segment, that
* segment may survive recovery, reintroducing unwanted data into the table. * segment may survive recovery, reintroducing unwanted data into the table.
*/ */
void static void
neon_immedsync(SMgrRelation reln, ForkNumber forknum) neon_immedsync(SMgrRelation reln, ForkNumber forknum)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
@@ -3548,8 +3562,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
} }
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
void static void
neon_regisersync(SMgrRelation reln, ForkNumber forknum) neon_registersync(SMgrRelation reln, ForkNumber forknum)
{ {
switch (reln->smgr_relpersistence) switch (reln->smgr_relpersistence)
{ {
@@ -3733,6 +3747,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
SlruKind kind; SlruKind kind;
int n_blocks; int n_blocks;
shardno_t shard_no = 0; /* All SLRUs are at shard 0 */ shardno_t shard_no = 0; /* All SLRUs are at shard 0 */
NeonResponse *resp;
NeonGetSlruSegmentRequest request;
/* /*
* Compute a request LSN to use, similar to neon_get_request_lsns() but the * Compute a request LSN to use, similar to neon_get_request_lsns() but the
@@ -3771,8 +3787,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
else else
return -1; return -1;
NeonResponse *resp; request = (NeonGetSlruSegmentRequest) {
NeonGetSlruSegmentRequest request = {
.req.tag = T_NeonGetSlruSegmentRequest, .req.tag = T_NeonGetSlruSegmentRequest,
.req.lsn = request_lsn, .req.lsn = request_lsn,
.req.not_modified_since = not_modified_since, .req.not_modified_since = not_modified_since,
@@ -3879,7 +3894,7 @@ static const struct f_smgr neon_smgr =
.smgr_truncate = neon_truncate, .smgr_truncate = neon_truncate,
.smgr_immedsync = neon_immedsync, .smgr_immedsync = neon_immedsync,
#if PG_MAJORVERSION_NUM >= 17 #if PG_MAJORVERSION_NUM >= 17
.smgr_registersync = neon_regisersync, .smgr_registersync = neon_registersync,
#endif #endif
.smgr_start_unlogged_build = neon_start_unlogged_build, .smgr_start_unlogged_build = neon_start_unlogged_build,
.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1, .smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,

View File

@@ -252,8 +252,6 @@ WalProposerPoll(WalProposer *wp)
/* timeout expired: poll state */ /* timeout expired: poll state */
if (rc == 0 || TimeToReconnect(wp, now) <= 0) if (rc == 0 || TimeToReconnect(wp, now) <= 0)
{ {
TimestampTz now;
/* /*
* If no WAL was generated during timeout (and we have already * If no WAL was generated during timeout (and we have already
* collected the quorum), then send empty keepalive message * collected the quorum), then send empty keepalive message
@@ -269,8 +267,7 @@ WalProposerPoll(WalProposer *wp)
now = wp->api.get_current_timestamp(wp); now = wp->api.get_current_timestamp(wp);
for (int i = 0; i < wp->n_safekeepers; i++) for (int i = 0; i < wp->n_safekeepers; i++)
{ {
Safekeeper *sk = &wp->safekeeper[i]; sk = &wp->safekeeper[i];
if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now, if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
wp->config->safekeeper_connection_timeout)) wp->config->safekeeper_connection_timeout))
{ {
@@ -1080,7 +1077,7 @@ SendProposerElected(Safekeeper *sk)
ProposerElected msg; ProposerElected msg;
TermHistory *th; TermHistory *th;
term_t lastCommonTerm; term_t lastCommonTerm;
int i; int idx;
/* Now that we are ready to send it's a good moment to create WAL reader */ /* Now that we are ready to send it's a good moment to create WAL reader */
wp->api.wal_reader_allocate(sk); wp->api.wal_reader_allocate(sk);
@@ -1099,15 +1096,15 @@ SendProposerElected(Safekeeper *sk)
/* We must start somewhere. */ /* We must start somewhere. */
Assert(wp->propTermHistory.n_entries >= 1); Assert(wp->propTermHistory.n_entries >= 1);
for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++) for (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++)
{ {
if (wp->propTermHistory.entries[i].term != th->entries[i].term) if (wp->propTermHistory.entries[idx].term != th->entries[idx].term)
break; break;
/* term must begin everywhere at the same point */ /* term must begin everywhere at the same point */
Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn); Assert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn);
} }
i--; /* step back to the last common term */ idx--; /* step back to the last common term */
if (i < 0) if (idx < 0)
{ {
/* safekeeper is empty or no common point, start from the beginning */ /* safekeeper is empty or no common point, start from the beginning */
sk->startStreamingAt = wp->propTermHistory.entries[0].lsn; sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
@@ -1128,14 +1125,14 @@ SendProposerElected(Safekeeper *sk)
* proposer, LSN it is currently writing, but then we just pick * proposer, LSN it is currently writing, but then we just pick
* safekeeper pos as it obviously can't be higher. * safekeeper pos as it obviously can't be higher.
*/ */
if (wp->propTermHistory.entries[i].term == wp->propTerm) if (wp->propTermHistory.entries[idx].term == wp->propTerm)
{ {
sk->startStreamingAt = sk->voteResponse.flushLsn; sk->startStreamingAt = sk->voteResponse.flushLsn;
} }
else else
{ {
XLogRecPtr propEndLsn = wp->propTermHistory.entries[i + 1].lsn; XLogRecPtr propEndLsn = wp->propTermHistory.entries[idx + 1].lsn;
XLogRecPtr skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn); XLogRecPtr skEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn);
sk->startStreamingAt = Min(propEndLsn, skEndLsn); sk->startStreamingAt = Min(propEndLsn, skEndLsn);
} }
@@ -1149,7 +1146,7 @@ SendProposerElected(Safekeeper *sk)
msg.termHistory = &wp->propTermHistory; msg.termHistory = &wp->propTermHistory;
msg.timelineStartLsn = wp->timelineStartLsn; msg.timelineStartLsn = wp->timelineStartLsn;
lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0; lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;
wp_log(LOG, wp_log(LOG,
"sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X", "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn)); sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
@@ -1641,7 +1638,7 @@ UpdateDonorShmem(WalProposer *wp)
* Process AppendResponse message from safekeeper. * Process AppendResponse message from safekeeper.
*/ */
static void static void
HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk) HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
{ {
XLogRecPtr candidateTruncateLsn; XLogRecPtr candidateTruncateLsn;
XLogRecPtr newCommitLsn; XLogRecPtr newCommitLsn;
@@ -1660,7 +1657,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
* and WAL is committed by the quorum. BroadcastAppendRequest() should be * and WAL is committed by the quorum. BroadcastAppendRequest() should be
* called to notify safekeepers about the new commitLsn. * called to notify safekeepers about the new commitLsn.
*/ */
wp->api.process_safekeeper_feedback(wp, sk); wp->api.process_safekeeper_feedback(wp, fromsk);
/* /*
* Try to advance truncateLsn -- the last record flushed to all * Try to advance truncateLsn -- the last record flushed to all

View File

@@ -725,7 +725,7 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
extern void WalProposerPoll(WalProposer *wp); extern void WalProposerPoll(WalProposer *wp);
extern void WalProposerFree(WalProposer *wp); extern void WalProposerFree(WalProposer *wp);
extern WalproposerShmemState *GetWalpropShmemState(); extern WalproposerShmemState *GetWalpropShmemState(void);
/* /*
* WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
@@ -745,7 +745,7 @@ extern TimeLineID walprop_pg_get_timeline_id(void);
* catch logging. * catch logging.
*/ */
#ifdef WALPROPOSER_LIB #ifdef WALPROPOSER_LIB
extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...); extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4);
#define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__) #define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)
#else #else
#define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__) #define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)

View File

@@ -286,6 +286,9 @@ safekeepers_cmp(char *old, char *new)
static void static void
assign_neon_safekeepers(const char *newval, void *extra) assign_neon_safekeepers(const char *newval, void *extra)
{ {
char *newval_copy;
char *oldval;
if (!am_walproposer) if (!am_walproposer)
return; return;
@@ -295,8 +298,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
} }
/* Copy values because we will modify them in split_safekeepers_list() */ /* Copy values because we will modify them in split_safekeepers_list() */
char *newval_copy = pstrdup(newval); newval_copy = pstrdup(newval);
char *oldval = pstrdup(wal_acceptors_list); oldval = pstrdup(wal_acceptors_list);
/* /*
* TODO: restarting through FATAL is stupid and introduces 1s delay before * TODO: restarting through FATAL is stupid and introduces 1s delay before
@@ -538,7 +541,7 @@ nwp_shmem_startup_hook(void)
} }
WalproposerShmemState * WalproposerShmemState *
GetWalpropShmemState() GetWalpropShmemState(void)
{ {
Assert(walprop_shared != NULL); Assert(walprop_shared != NULL);
return walprop_shared; return walprop_shared;

View File

@@ -191,13 +191,14 @@ NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
if (!wal_reader) if (!wal_reader)
{ {
XLogRecPtr epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn); XLogRecPtr basebackupLsn = GetRedoStartLsn();
if (epochStartLsn == 0) /* should never happen */
if (basebackupLsn == 0)
{ {
elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!"); elog(ERROR, "unable to start walsender when basebackupLsn is 0");
} }
wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] "); wal_reader = NeonWALReaderAllocate(wal_segment_size, basebackupLsn, "[walsender] ");
} }
xlr->page_read = NeonWALPageRead; xlr->page_read = NeonWALPageRead;
xlr->segment_open = NeonWALReadSegmentOpen; xlr->segment_open = NeonWALReadSegmentOpen;

View File

@@ -44,27 +44,6 @@ infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
appendStringInfoString(buf, "]"); appendStringInfoString(buf, "]");
} }
static void
truncate_flags_desc(StringInfo buf, uint8 flags)
{
appendStringInfoString(buf, "flags: [");
if (flags & XLH_TRUNCATE_CASCADE)
appendStringInfoString(buf, "CASCADE, ");
if (flags & XLH_TRUNCATE_RESTART_SEQS)
appendStringInfoString(buf, "RESTART_SEQS, ");
if (buf->data[buf->len - 1] == ' ')
{
/* Truncate-away final unneeded ", " */
Assert(buf->data[buf->len - 2] == ',');
buf->len -= 2;
buf->data[buf->len] = '\0';
}
appendStringInfoString(buf, "]");
}
void void
neon_rm_desc(StringInfo buf, XLogReaderState *record) neon_rm_desc(StringInfo buf, XLogReaderState *record)
{ {

View File

@@ -136,7 +136,7 @@ static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
static void GetPage(StringInfo input_message); static void GetPage(StringInfo input_message);
static void Ping(StringInfo input_message); static void Ping(StringInfo input_message);
static ssize_t buffered_read(void *buf, size_t count); static ssize_t buffered_read(void *buf, size_t count);
static void CreateFakeSharedMemoryAndSemaphores(); static void CreateFakeSharedMemoryAndSemaphores(void);
static BufferTag target_redo_tag; static BufferTag target_redo_tag;
@@ -170,6 +170,40 @@ close_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flag
return syscall(__NR_close_range, start_fd, count, flags); return syscall(__NR_close_range, start_fd, count, flags);
} }
static PgSeccompRule allowed_syscalls[] =
{
/* Hard requirements */
PG_SCMP_ALLOW(exit_group),
PG_SCMP_ALLOW(pselect6),
PG_SCMP_ALLOW(read),
PG_SCMP_ALLOW(select),
PG_SCMP_ALLOW(write),
/* Memory allocation */
PG_SCMP_ALLOW(brk),
#ifndef MALLOC_NO_MMAP
/* TODO: musl doesn't have mallopt */
PG_SCMP_ALLOW(mmap),
PG_SCMP_ALLOW(munmap),
#endif
/*
* getpid() is called on assertion failure, in ExceptionalCondition.
* It's not really needed, but seems pointless to hide it either. The
* system call unlikely to expose a kernel vulnerability, and the PID
* is stored in MyProcPid anyway.
*/
PG_SCMP_ALLOW(getpid),
/* Enable those for a proper shutdown. */
#if 0
PG_SCMP_ALLOW(munmap),
PG_SCMP_ALLOW(shmctl),
PG_SCMP_ALLOW(shmdt),
PG_SCMP_ALLOW(unlink), /* shm_unlink */
#endif
};
static void static void
enter_seccomp_mode(void) enter_seccomp_mode(void)
{ {
@@ -183,44 +217,12 @@ enter_seccomp_mode(void)
(errcode(ERRCODE_SYSTEM_ERROR), (errcode(ERRCODE_SYSTEM_ERROR),
errmsg("seccomp: could not close files >= fd 3"))); errmsg("seccomp: could not close files >= fd 3")));
PgSeccompRule syscalls[] =
{
/* Hard requirements */
PG_SCMP_ALLOW(exit_group),
PG_SCMP_ALLOW(pselect6),
PG_SCMP_ALLOW(read),
PG_SCMP_ALLOW(select),
PG_SCMP_ALLOW(write),
/* Memory allocation */
PG_SCMP_ALLOW(brk),
#ifndef MALLOC_NO_MMAP
/* TODO: musl doesn't have mallopt */
PG_SCMP_ALLOW(mmap),
PG_SCMP_ALLOW(munmap),
#endif
/*
* getpid() is called on assertion failure, in ExceptionalCondition.
* It's not really needed, but seems pointless to hide it either. The
* system call unlikely to expose a kernel vulnerability, and the PID
* is stored in MyProcPid anyway.
*/
PG_SCMP_ALLOW(getpid),
/* Enable those for a proper shutdown.
PG_SCMP_ALLOW(munmap),
PG_SCMP_ALLOW(shmctl),
PG_SCMP_ALLOW(shmdt),
PG_SCMP_ALLOW(unlink), // shm_unlink
*/
};
#ifdef MALLOC_NO_MMAP #ifdef MALLOC_NO_MMAP
/* Ask glibc not to use mmap() */ /* Ask glibc not to use mmap() */
mallopt(M_MMAP_MAX, 0); mallopt(M_MMAP_MAX, 0);
#endif #endif
seccomp_load_rules(syscalls, lengthof(syscalls)); seccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls));
} }
#endif /* HAVE_LIBSECCOMP */ #endif /* HAVE_LIBSECCOMP */
@@ -449,7 +451,7 @@ WalRedoMain(int argc, char *argv[])
* half-initialized postgres. * half-initialized postgres.
*/ */
static void static void
CreateFakeSharedMemoryAndSemaphores() CreateFakeSharedMemoryAndSemaphores(void)
{ {
PGShmemHeader *shim = NULL; PGShmemHeader *shim = NULL;
PGShmemHeader *hdr; PGShmemHeader *hdr;
@@ -992,7 +994,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
* If this block isn't one we are currently restoring, then return 'true' * If this block isn't one we are currently restoring, then return 'true'
* so that this gets ignored * so that this gets ignored
*/ */
return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag); return !BufferTagsEqual(&target_tag, &target_redo_tag);
} }
/* /*

View File

@@ -1,11 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import enum import enum
import os import os
import subprocess import subprocess
import sys import sys
from typing import List
@enum.unique @enum.unique
@@ -55,12 +56,12 @@ def mypy() -> str:
return "poetry run mypy" return "poetry run mypy"
def get_commit_files() -> List[str]: def get_commit_files() -> list[str]:
files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split()) files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split())
return files.decode().splitlines() return files.decode().splitlines()
def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: bool = False): def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False):
print(f"Checking: {name} ", end="") print(f"Checking: {name} ", end="")
applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files)) applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
if not applicable_files: if not applicable_files:

View File

@@ -39,7 +39,7 @@ http.workspace = true
humantime.workspace = true humantime.workspace = true
humantime-serde.workspace = true humantime-serde.workspace = true
hyper0.workspace = true hyper0.workspace = true
hyper1 = { package = "hyper", version = "1.2", features = ["server"] } hyper = { workspace = true, features = ["server", "http1", "http2"] }
hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] } hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
http-body-util = { version = "0.1" } http-body-util = { version = "0.1" }
indexmap.workspace = true indexmap.workspace = true

View File

@@ -3,8 +3,8 @@ use crate::{
auth::{self, backend::ComputeCredentialKeys, AuthFlow}, auth::{self, backend::ComputeCredentialKeys, AuthFlow},
compute, compute,
config::AuthenticationConfig, config::AuthenticationConfig,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
sasl, sasl,
stream::{PqStream, Stream}, stream::{PqStream, Stream},
}; };

View File

@@ -1,8 +1,8 @@
use crate::{ use crate::{
auth, compute, auth, compute,
config::AuthenticationConfig, config::AuthenticationConfig,
console::{self, provider::NodeInfo},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{self, provider::NodeInfo},
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
stream::PqStream, stream::PqStream,
waiters, waiters,
@@ -70,7 +70,7 @@ pub(super) async fn authenticate(
let (psql_session_id, waiter) = loop { let (psql_session_id, waiter) = loop {
let psql_session_id = new_psql_session_id(); let psql_session_id = new_psql_session_id();
match console::mgmt::get_waiter(&psql_session_id) { match control_plane::mgmt::get_waiter(&psql_session_id) {
Ok(waiter) => break (psql_session_id, waiter), Ok(waiter) => break (psql_session_id, waiter),
Err(_e) => continue, Err(_e) => continue,
} }

View File

@@ -2,8 +2,8 @@ use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
use crate::{ use crate::{
auth::{self, AuthFlow}, auth::{self, AuthFlow},
config::AuthenticationConfig, config::AuthenticationConfig,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
intern::EndpointIdInt, intern::EndpointIdInt,
sasl, sasl,
stream::{self, Stream}, stream::{self, Stream},

View File

@@ -571,7 +571,7 @@ mod tests {
use bytes::Bytes; use bytes::Bytes;
use http::Response; use http::Response;
use http_body_util::Full; use http_body_util::Full;
use hyper1::service::service_fn; use hyper::service::service_fn;
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use rand::rngs::OsRng; use rand::rngs::OsRng;
use rsa::pkcs8::DecodePrivateKey; use rsa::pkcs8::DecodePrivateKey;
@@ -736,7 +736,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
}); });
let listener = TcpListener::bind("0.0.0.0:0").await.unwrap(); let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
let server = hyper1::server::conn::http1::Builder::new(); let server = hyper::server::conn::http1::Builder::new();
let addr = listener.local_addr().unwrap(); let addr = listener.local_addr().unwrap();
tokio::spawn(async move { tokio::spawn(async move {
loop { loop {

View File

@@ -5,11 +5,11 @@ use arc_swap::ArcSwapOption;
use crate::{ use crate::{
compute::ConnCfg, compute::ConnCfg,
console::{ context::RequestMonitoring,
control_plane::{
messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo}, messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo},
NodeInfo, NodeInfo,
}, },
context::RequestMonitoring,
intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag}, intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
EndpointId, EndpointId,
}; };

View File

@@ -1,27 +1,27 @@
mod classic; mod classic;
mod console_redirect;
mod hacks; mod hacks;
pub mod jwt; pub mod jwt;
pub mod local; pub mod local;
mod web;
use std::net::IpAddr; use std::net::IpAddr;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
pub(crate) use console_redirect::WebAuthError;
use ipnet::{Ipv4Net, Ipv6Net}; use ipnet::{Ipv4Net, Ipv6Net};
use local::LocalBackend; use local::LocalBackend;
use tokio::io::{AsyncRead, AsyncWrite}; use tokio::io::{AsyncRead, AsyncWrite};
use tokio_postgres::config::AuthKeys; use tokio_postgres::config::AuthKeys;
use tracing::{info, warn}; use tracing::{info, warn};
pub(crate) use web::WebAuthError;
use crate::auth::credentials::check_peer_addr_is_in_list; use crate::auth::credentials::check_peer_addr_is_in_list;
use crate::auth::{validate_password_and_exchange, AuthError}; use crate::auth::{validate_password_and_exchange, AuthError};
use crate::cache::Cached; use crate::cache::Cached;
use crate::console::errors::GetAuthInfoError;
use crate::console::provider::{CachedRoleSecret, ConsoleBackend};
use crate::console::{AuthSecret, NodeInfo};
use crate::context::RequestMonitoring; use crate::context::RequestMonitoring;
use crate::control_plane::errors::GetAuthInfoError;
use crate::control_plane::provider::{CachedRoleSecret, ControlPlaneBackend};
use crate::control_plane::{AuthSecret, NodeInfo};
use crate::intern::EndpointIdInt; use crate::intern::EndpointIdInt;
use crate::metrics::Metrics; use crate::metrics::Metrics;
use crate::proxy::connect_compute::ComputeConnectBackend; use crate::proxy::connect_compute::ComputeConnectBackend;
@@ -31,7 +31,7 @@ use crate::stream::Stream;
use crate::{ use crate::{
auth::{self, ComputeUserInfoMaybeEndpoint}, auth::{self, ComputeUserInfoMaybeEndpoint},
config::AuthenticationConfig, config::AuthenticationConfig,
console::{ control_plane::{
self, self,
provider::{CachedAllowedIps, CachedNodeInfo}, provider::{CachedAllowedIps, CachedNodeInfo},
Api, Api,
@@ -67,19 +67,19 @@ impl<T> std::ops::Deref for MaybeOwned<'_, T> {
/// backends which require them for the authentication process. /// backends which require them for the authentication process.
pub enum Backend<'a, T, D> { pub enum Backend<'a, T, D> {
/// Cloud API (V2). /// Cloud API (V2).
Console(MaybeOwned<'a, ConsoleBackend>, T), ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T),
/// Authentication via a web browser. /// Authentication via a web browser.
Web(MaybeOwned<'a, url::ApiUrl>, D), ConsoleRedirect(MaybeOwned<'a, url::ApiUrl>, D),
/// Local proxy uses configured auth credentials and does not wake compute /// Local proxy uses configured auth credentials and does not wake compute
Local(MaybeOwned<'a, LocalBackend>), Local(MaybeOwned<'a, LocalBackend>),
} }
#[cfg(test)] #[cfg(test)]
pub(crate) trait TestBackend: Send + Sync + 'static { pub(crate) trait TestBackend: Send + Sync + 'static {
fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>; fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>;
fn get_allowed_ips_and_secret( fn get_allowed_ips_and_secret(
&self, &self,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>; ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>;
fn dyn_clone(&self) -> Box<dyn TestBackend>; fn dyn_clone(&self) -> Box<dyn TestBackend>;
} }
@@ -93,18 +93,23 @@ impl Clone for Box<dyn TestBackend> {
impl std::fmt::Display for Backend<'_, (), ()> { impl std::fmt::Display for Backend<'_, (), ()> {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::Console(api, ()) => match &**api { Self::ControlPlane(api, ()) => match &**api {
ConsoleBackend::Console(endpoint) => { ControlPlaneBackend::Management(endpoint) => fmt
fmt.debug_tuple("Console").field(&endpoint.url()).finish() .debug_tuple("ControlPlane::Management")
} .field(&endpoint.url())
.finish(),
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
ConsoleBackend::Postgres(endpoint) => { ControlPlaneBackend::PostgresMock(endpoint) => fmt
fmt.debug_tuple("Postgres").field(&endpoint.url()).finish() .debug_tuple("ControlPlane::PostgresMock")
} .field(&endpoint.url())
.finish(),
#[cfg(test)] #[cfg(test)]
ConsoleBackend::Test(_) => fmt.debug_tuple("Test").finish(), ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
}, },
Self::Web(url, ()) => fmt.debug_tuple("Web").field(&url.as_str()).finish(), Self::ConsoleRedirect(url, ()) => fmt
.debug_tuple("ConsoleRedirect")
.field(&url.as_str())
.finish(),
Self::Local(_) => fmt.debug_tuple("Local").finish(), Self::Local(_) => fmt.debug_tuple("Local").finish(),
} }
} }
@@ -115,8 +120,8 @@ impl<T, D> Backend<'_, T, D> {
/// This helps us pass structured config to async tasks. /// This helps us pass structured config to async tasks.
pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> { pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> {
match self { match self {
Self::Console(c, x) => Backend::Console(MaybeOwned::Borrowed(c), x), Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),
Self::Web(c, x) => Backend::Web(MaybeOwned::Borrowed(c), x), Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(MaybeOwned::Borrowed(c), x),
Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)), Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
} }
} }
@@ -128,8 +133,8 @@ impl<'a, T, D> Backend<'a, T, D> {
/// a function to a contained value. /// a function to a contained value.
pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> { pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> {
match self { match self {
Self::Console(c, x) => Backend::Console(c, f(x)), Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),
Self::Web(c, x) => Backend::Web(c, x), Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(c, x),
Self::Local(l) => Backend::Local(l), Self::Local(l) => Backend::Local(l),
} }
} }
@@ -139,8 +144,8 @@ impl<'a, T, D, E> Backend<'a, Result<T, E>, D> {
/// This is most useful for error handling. /// This is most useful for error handling.
pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> { pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> {
match self { match self {
Self::Console(c, x) => x.map(|x| Backend::Console(c, x)), Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
Self::Web(c, x) => Ok(Backend::Web(c, x)), Self::ConsoleRedirect(c, x) => Ok(Backend::ConsoleRedirect(c, x)),
Self::Local(l) => Ok(Backend::Local(l)), Self::Local(l) => Ok(Backend::Local(l)),
} }
} }
@@ -290,7 +295,7 @@ impl AuthenticationConfig {
/// All authentication flows will emit an AuthenticationOk message if successful. /// All authentication flows will emit an AuthenticationOk message if successful.
async fn auth_quirks( async fn auth_quirks(
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
api: &impl console::Api, api: &impl control_plane::Api,
user_info: ComputeUserInfoMaybeEndpoint, user_info: ComputeUserInfoMaybeEndpoint,
client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>, client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
allow_cleartext: bool, allow_cleartext: bool,
@@ -412,8 +417,8 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
/// Get username from the credentials. /// Get username from the credentials.
pub(crate) fn get_user(&self) -> &str { pub(crate) fn get_user(&self) -> &str {
match self { match self {
Self::Console(_, user_info) => &user_info.user, Self::ControlPlane(_, user_info) => &user_info.user,
Self::Web(_, ()) => "web", Self::ConsoleRedirect(_, ()) => "web",
Self::Local(_) => "local", Self::Local(_) => "local",
} }
} }
@@ -429,7 +434,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
endpoint_rate_limiter: Arc<EndpointRateLimiter>, endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> { ) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> {
let res = match self { let res = match self {
Self::Console(api, user_info) => { Self::ControlPlane(api, user_info) => {
info!( info!(
user = &*user_info.user, user = &*user_info.user,
project = user_info.endpoint(), project = user_info.endpoint(),
@@ -446,15 +451,15 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
endpoint_rate_limiter, endpoint_rate_limiter,
) )
.await?; .await?;
Backend::Console(api, credentials) Backend::ControlPlane(api, credentials)
} }
// NOTE: this auth backend doesn't use client credentials. // NOTE: this auth backend doesn't use client credentials.
Self::Web(url, ()) => { Self::ConsoleRedirect(url, ()) => {
info!("performing web authentication"); info!("performing web authentication");
let info = web::authenticate(ctx, config, &url, client).await?; let info = console_redirect::authenticate(ctx, config, &url, client).await?;
Backend::Web(url, info) Backend::ConsoleRedirect(url, info)
} }
Self::Local(_) => { Self::Local(_) => {
return Err(auth::AuthError::bad_auth_method("invalid for local proxy")) return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
@@ -472,8 +477,8 @@ impl Backend<'_, ComputeUserInfo, &()> {
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedRoleSecret, GetAuthInfoError> { ) -> Result<CachedRoleSecret, GetAuthInfoError> {
match self { match self {
Self::Console(api, user_info) => api.get_role_secret(ctx, user_info).await, Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
Self::Web(_, ()) => Ok(Cached::new_uncached(None)), Self::ConsoleRedirect(_, ()) => Ok(Cached::new_uncached(None)),
Self::Local(_) => Ok(Cached::new_uncached(None)), Self::Local(_) => Ok(Cached::new_uncached(None)),
} }
} }
@@ -483,8 +488,10 @@ impl Backend<'_, ComputeUserInfo, &()> {
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> { ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
match self { match self {
Self::Console(api, user_info) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::ControlPlane(api, user_info) => {
Self::Web(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)), api.get_allowed_ips_and_secret(ctx, user_info).await
}
Self::ConsoleRedirect(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)), Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
} }
} }
@@ -495,18 +502,18 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, NodeInfo> {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
match self { match self {
Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await, Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
Self::Web(_, info) => Ok(Cached::new_uncached(info.clone())), Self::ConsoleRedirect(_, info) => Ok(Cached::new_uncached(info.clone())),
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())), Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
} }
} }
fn get_keys(&self) -> &ComputeCredentialKeys { fn get_keys(&self) -> &ComputeCredentialKeys {
match self { match self {
Self::Console(_, creds) => &creds.keys, Self::ControlPlane(_, creds) => &creds.keys,
Self::Web(_, _) => &ComputeCredentialKeys::None, Self::ConsoleRedirect(_, _) => &ComputeCredentialKeys::None,
Self::Local(_) => &ComputeCredentialKeys::None, Self::Local(_) => &ComputeCredentialKeys::None,
} }
} }
@@ -517,10 +524,10 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
match self { match self {
Self::Console(api, creds) => api.wake_compute(ctx, &creds.info).await, Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
Self::Web(_, ()) => { Self::ConsoleRedirect(_, ()) => {
unreachable!("web auth flow doesn't support waking the compute") unreachable!("web auth flow doesn't support waking the compute")
} }
Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())), Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
@@ -529,8 +536,8 @@ impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
fn get_keys(&self) -> &ComputeCredentialKeys { fn get_keys(&self) -> &ComputeCredentialKeys {
match self { match self {
Self::Console(_, creds) => &creds.keys, Self::ControlPlane(_, creds) => &creds.keys,
Self::Web(_, ()) => &ComputeCredentialKeys::None, Self::ConsoleRedirect(_, ()) => &ComputeCredentialKeys::None,
Self::Local(_) => &ComputeCredentialKeys::None, Self::Local(_) => &ComputeCredentialKeys::None,
} }
} }
@@ -553,12 +560,12 @@ mod tests {
use crate::{ use crate::{
auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern}, auth::{backend::MaskedIp, ComputeUserInfoMaybeEndpoint, IpPattern},
config::AuthenticationConfig, config::AuthenticationConfig,
console::{ context::RequestMonitoring,
control_plane::{
self, self,
provider::{self, CachedAllowedIps, CachedRoleSecret}, provider::{self, CachedAllowedIps, CachedRoleSecret},
CachedNodeInfo, CachedNodeInfo,
}, },
context::RequestMonitoring,
proxy::NeonOptions, proxy::NeonOptions,
rate_limiter::{EndpointRateLimiter, RateBucketInfo}, rate_limiter::{EndpointRateLimiter, RateBucketInfo},
scram::{threadpool::ThreadPool, ServerSecret}, scram::{threadpool::ThreadPool, ServerSecret},
@@ -572,12 +579,12 @@ mod tests {
secret: AuthSecret, secret: AuthSecret,
} }
impl console::Api for Auth { impl control_plane::Api for Auth {
async fn get_role_secret( async fn get_role_secret(
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result<CachedRoleSecret, console::errors::GetAuthInfoError> { ) -> Result<CachedRoleSecret, control_plane::errors::GetAuthInfoError> {
Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone()))) Ok(CachedRoleSecret::new_uncached(Some(self.secret.clone())))
} }
@@ -585,8 +592,10 @@ mod tests {
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError> ) -> Result<
{ (CachedAllowedIps, Option<CachedRoleSecret>),
control_plane::errors::GetAuthInfoError,
> {
Ok(( Ok((
CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())), CachedAllowedIps::new_uncached(Arc::new(self.ips.clone())),
Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))), Some(CachedRoleSecret::new_uncached(Some(self.secret.clone()))),
@@ -605,7 +614,7 @@ mod tests {
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_user_info: &super::ComputeUserInfo, _user_info: &super::ComputeUserInfo,
) -> Result<CachedNodeInfo, console::errors::WakeComputeError> { ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
unimplemented!() unimplemented!()
} }
} }

View File

@@ -3,8 +3,8 @@
use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload}; use super::{backend::ComputeCredentialKeys, AuthErrorImpl, PasswordHackPayload};
use crate::{ use crate::{
config::TlsServerEndPoint, config::TlsServerEndPoint,
console::AuthSecret,
context::RequestMonitoring, context::RequestMonitoring,
control_plane::AuthSecret,
intern::EndpointIdInt, intern::EndpointIdInt,
sasl, sasl,
scram::{self, threadpool::ThreadPool}, scram::{self, threadpool::ThreadPool},

View File

@@ -18,7 +18,7 @@ pub(crate) use flow::*;
use tokio::time::error::Elapsed; use tokio::time::error::Elapsed;
use crate::{ use crate::{
console, control_plane,
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
}; };
use std::{io, net::IpAddr}; use std::{io, net::IpAddr};
@@ -34,7 +34,7 @@ pub(crate) enum AuthErrorImpl {
Web(#[from] backend::WebAuthError), Web(#[from] backend::WebAuthError),
#[error(transparent)] #[error(transparent)]
GetAuthInfo(#[from] console::errors::GetAuthInfoError), GetAuthInfo(#[from] control_plane::errors::GetAuthInfoError),
/// SASL protocol errors (includes [SCRAM](crate::scram)). /// SASL protocol errors (includes [SCRAM](crate::scram)).
#[error(transparent)] #[error(transparent)]

View File

@@ -12,7 +12,7 @@ use proxy::{
}, },
cancellation::CancellationHandlerMain, cancellation::CancellationHandlerMain,
config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig}, config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
console::{ control_plane::{
locks::ApiLocks, locks::ApiLocks,
messages::{EndpointJwksResponse, JwksSettings}, messages::{EndpointJwksResponse, JwksSettings},
}, },
@@ -77,10 +77,10 @@ struct LocalProxyCliArgs {
#[clap(long, default_value = "127.0.0.1:5432")] #[clap(long, default_value = "127.0.0.1:5432")]
compute: SocketAddr, compute: SocketAddr,
/// Path of the local proxy config file /// Path of the local proxy config file
#[clap(long, default_value = "./localproxy.json")] #[clap(long, default_value = "./local_proxy.json")]
config_path: Utf8PathBuf, config_path: Utf8PathBuf,
/// Path of the local proxy PID file /// Path of the local proxy PID file
#[clap(long, default_value = "./localproxy.pid")] #[clap(long, default_value = "./local_proxy.pid")]
pid_path: Utf8PathBuf, pid_path: Utf8PathBuf,
} }
@@ -305,7 +305,7 @@ async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
let mut jwks_set = vec![]; let mut jwks_set = vec![];
for jwks in data.jwks { for jwks in data.jwks.into_iter().flatten() {
let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?; let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
ensure!( ensure!(

View File

@@ -19,8 +19,8 @@ use proxy::config::CacheOptions;
use proxy::config::HttpConfig; use proxy::config::HttpConfig;
use proxy::config::ProjectInfoCacheOptions; use proxy::config::ProjectInfoCacheOptions;
use proxy::config::ProxyProtocolV2; use proxy::config::ProxyProtocolV2;
use proxy::console;
use proxy::context::parquet::ParquetUploadArgs; use proxy::context::parquet::ParquetUploadArgs;
use proxy::control_plane;
use proxy::http; use proxy::http;
use proxy::http::health_server::AppMetrics; use proxy::http::health_server::AppMetrics;
use proxy::metrics::Metrics; use proxy::metrics::Metrics;
@@ -495,7 +495,7 @@ async fn main() -> anyhow::Result<()> {
proxy: proxy::metrics::Metrics::get(), proxy: proxy::metrics::Metrics::get(),
}, },
)); ));
maintenance_tasks.spawn(console::mgmt::task_main(mgmt_listener)); maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));
if let Some(metrics_config) = &config.metric_collection { if let Some(metrics_config) = &config.metric_collection {
// TODO: Add gc regardles of the metric collection being enabled. // TODO: Add gc regardles of the metric collection being enabled.
@@ -506,8 +506,8 @@ async fn main() -> anyhow::Result<()> {
)); ));
} }
if let auth::Backend::Console(api, _) = &config.auth_backend { if let auth::Backend::ControlPlane(api, _) = &config.auth_backend {
if let proxy::console::provider::ConsoleBackend::Console(api) = &**api { if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api {
match (redis_notifications_client, regional_redis_client.clone()) { match (redis_notifications_client, regional_redis_client.clone()) {
(None, None) => {} (None, None) => {}
(client1, client2) => { (client1, client2) => {
@@ -623,7 +623,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
"Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}" "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
); );
info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}"); info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
let caches = Box::leak(Box::new(console::caches::ApiCaches::new( let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
wake_compute_cache_config, wake_compute_cache_config,
project_info_cache_config, project_info_cache_config,
endpoint_cache_config, endpoint_cache_config,
@@ -636,7 +636,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
timeout, timeout,
} = args.wake_compute_lock.parse()?; } = args.wake_compute_lock.parse()?;
info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)"); info!(?limiter, shards, ?epoch, "Using NodeLocks (wake_compute)");
let locks = Box::leak(Box::new(console::locks::ApiLocks::new( let locks = Box::leak(Box::new(control_plane::locks::ApiLocks::new(
"wake_compute_lock", "wake_compute_lock",
limiter, limiter,
shards, shards,
@@ -653,27 +653,27 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
RateBucketInfo::validate(&mut wake_compute_rps_limit)?; RateBucketInfo::validate(&mut wake_compute_rps_limit)?;
let wake_compute_endpoint_rate_limiter = let wake_compute_endpoint_rate_limiter =
Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit)); Arc::new(WakeComputeRateLimiter::new(wake_compute_rps_limit));
let api = console::provider::neon::Api::new( let api = control_plane::provider::neon::Api::new(
endpoint, endpoint,
caches, caches,
locks, locks,
wake_compute_endpoint_rate_limiter, wake_compute_endpoint_rate_limiter,
); );
let api = console::provider::ConsoleBackend::Console(api); let api = control_plane::provider::ControlPlaneBackend::Management(api);
auth::Backend::Console(MaybeOwned::Owned(api), ()) auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
} }
AuthBackendType::Web => { AuthBackendType::Web => {
let url = args.uri.parse()?; let url = args.uri.parse()?;
auth::Backend::Web(MaybeOwned::Owned(url), ()) auth::Backend::ConsoleRedirect(MaybeOwned::Owned(url), ())
} }
#[cfg(feature = "testing")] #[cfg(feature = "testing")]
AuthBackendType::Postgres => { AuthBackendType::Postgres => {
let url = args.auth_endpoint.parse()?; let url = args.auth_endpoint.parse()?;
let api = console::provider::mock::Api::new(url, !args.is_private_access_proxy); let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy);
let api = console::provider::ConsoleBackend::Postgres(api); let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
auth::Backend::Console(MaybeOwned::Owned(api), ()) auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
} }
}; };
@@ -689,7 +689,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
?epoch, ?epoch,
"Using NodeLocks (connect_compute)" "Using NodeLocks (connect_compute)"
); );
let connect_compute_locks = console::locks::ApiLocks::new( let connect_compute_locks = control_plane::locks::ApiLocks::new(
"connect_compute_lock", "connect_compute_lock",
limiter, limiter,
shards, shards,

View File

@@ -16,7 +16,7 @@ use tracing::{debug, info};
use crate::{ use crate::{
auth::IpPattern, auth::IpPattern,
config::ProjectInfoCacheOptions, config::ProjectInfoCacheOptions,
console::AuthSecret, control_plane::AuthSecret,
intern::{EndpointIdInt, ProjectIdInt, RoleNameInt}, intern::{EndpointIdInt, ProjectIdInt, RoleNameInt},
EndpointId, RoleName, EndpointId, RoleName,
}; };

View File

@@ -1,8 +1,8 @@
use crate::{ use crate::{
auth::parse_endpoint_param, auth::parse_endpoint_param,
cancellation::CancelClosure, cancellation::CancelClosure,
console::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{errors::WakeComputeError, messages::MetricsAuxInfo, provider::ApiLockError},
error::{ReportableError, UserFacingError}, error::{ReportableError, UserFacingError},
metrics::{Metrics, NumDbConnectionsGuard}, metrics::{Metrics, NumDbConnectionsGuard},
proxy::neon_option, proxy::neon_option,
@@ -20,7 +20,7 @@ use tokio_postgres::tls::MakeTlsConnect;
use tokio_postgres_rustls::MakeRustlsConnect; use tokio_postgres_rustls::MakeRustlsConnect;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node"; pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub(crate) enum ConnectionError { pub(crate) enum ConnectionError {

View File

@@ -3,7 +3,7 @@ use crate::{
self, self,
backend::{jwt::JwkCache, AuthRateLimiter}, backend::{jwt::JwkCache, AuthRateLimiter},
}, },
console::locks::ApiLocks, control_plane::locks::ApiLocks,
rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig}, rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig},
scram::threadpool::ThreadPool, scram::threadpool::ThreadPool,
serverless::{cancel_set::CancelSet, GlobalConnPoolOptions}, serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
@@ -372,7 +372,7 @@ pub struct EndpointCacheConfig {
} }
impl EndpointCacheConfig { impl EndpointCacheConfig {
/// Default options for [`crate::console::provider::NodeInfoCache`]. /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
/// Notice that by default the limiter is empty, which means that cache is disabled. /// Notice that by default the limiter is empty, which means that cache is disabled.
pub const CACHE_DEFAULT_OPTIONS: &'static str = pub const CACHE_DEFAULT_OPTIONS: &'static str =
"initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s"; "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
@@ -447,7 +447,7 @@ pub struct CacheOptions {
} }
impl CacheOptions { impl CacheOptions {
/// Default options for [`crate::console::provider::NodeInfoCache`]. /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m"; pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
/// Parse cache options passed via cmdline. /// Parse cache options passed via cmdline.
@@ -503,7 +503,7 @@ pub struct ProjectInfoCacheOptions {
} }
impl ProjectInfoCacheOptions { impl ProjectInfoCacheOptions {
/// Default options for [`crate::console::provider::NodeInfoCache`]. /// Default options for [`crate::control_plane::provider::NodeInfoCache`].
pub const CACHE_DEFAULT_OPTIONS: &'static str = pub const CACHE_DEFAULT_OPTIONS: &'static str =
"size=10000,ttl=4m,max_roles=10,gc_interval=60m"; "size=10000,ttl=4m,max_roles=10,gc_interval=60m";
@@ -622,9 +622,9 @@ pub struct ConcurrencyLockOptions {
} }
impl ConcurrencyLockOptions { impl ConcurrencyLockOptions {
/// Default options for [`crate::console::provider::ApiLocks`]. /// Default options for [`crate::control_plane::provider::ApiLocks`].
pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0"; pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
/// Default options for [`crate::console::provider::ApiLocks`]. /// Default options for [`crate::control_plane::provider::ApiLocks`].
pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str = pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
"shards=64,permits=100,epoch=10m,timeout=10ms"; "shards=64,permits=100,epoch=10m,timeout=10ms";

View File

@@ -11,7 +11,7 @@ use try_lock::TryLock;
use uuid::Uuid; use uuid::Uuid;
use crate::{ use crate::{
console::messages::{ColdStartInfo, MetricsAuxInfo}, control_plane::messages::{ColdStartInfo, MetricsAuxInfo},
error::ErrorKind, error::ErrorKind,
intern::{BranchIdInt, ProjectIdInt}, intern::{BranchIdInt, ProjectIdInt},
metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting}, metrics::{ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting},

View File

@@ -10,14 +10,14 @@ use crate::proxy::retry::CouldRetry;
/// Generic error response with human-readable description. /// Generic error response with human-readable description.
/// Note that we can't always present it to user as is. /// Note that we can't always present it to user as is.
#[derive(Debug, Deserialize, Clone)] #[derive(Debug, Deserialize, Clone)]
pub(crate) struct ConsoleError { pub(crate) struct ControlPlaneError {
pub(crate) error: Box<str>, pub(crate) error: Box<str>,
#[serde(skip)] #[serde(skip)]
pub(crate) http_status_code: http::StatusCode, pub(crate) http_status_code: http::StatusCode,
pub(crate) status: Option<Status>, pub(crate) status: Option<Status>,
} }
impl ConsoleError { impl ControlPlaneError {
pub(crate) fn get_reason(&self) -> Reason { pub(crate) fn get_reason(&self) -> Reason {
self.status self.status
.as_ref() .as_ref()
@@ -51,7 +51,7 @@ impl ConsoleError {
} }
} }
impl Display for ConsoleError { impl Display for ControlPlaneError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let msg: &str = self let msg: &str = self
.status .status
@@ -62,7 +62,7 @@ impl Display for ConsoleError {
} }
} }
impl CouldRetry for ConsoleError { impl CouldRetry for ControlPlaneError {
fn could_retry(&self) -> bool { fn could_retry(&self) -> bool {
// If the error message does not have a status, // If the error message does not have a status,
// the error is unknown and probably should not retry automatically // the error is unknown and probably should not retry automatically

View File

@@ -1,5 +1,5 @@
use crate::{ use crate::{
console::messages::{DatabaseInfo, KickSession}, control_plane::messages::{DatabaseInfo, KickSession},
waiters::{self, Waiter, Waiters}, waiters::{self, Waiter, Waiters},
}; };
use anyhow::Context; use anyhow::Context;

View File

@@ -10,7 +10,7 @@ use crate::{
use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl}; use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
use crate::{auth::IpPattern, cache::Cached}; use crate::{auth::IpPattern, cache::Cached};
use crate::{ use crate::{
console::{ control_plane::{
messages::MetricsAuxInfo, messages::MetricsAuxInfo,
provider::{CachedAllowedIps, CachedRoleSecret}, provider::{CachedAllowedIps, CachedRoleSecret},
}, },
@@ -166,7 +166,7 @@ impl Api {
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::console::messages::ColdStartInfo::Warm, cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,
}, },
allow_self_signed_compute: false, allow_self_signed_compute: false,
}; };

View File

@@ -2,7 +2,7 @@
pub mod mock; pub mod mock;
pub mod neon; pub mod neon;
use super::messages::{ConsoleError, MetricsAuxInfo}; use super::messages::{ControlPlaneError, MetricsAuxInfo};
use crate::{ use crate::{
auth::{ auth::{
backend::{ backend::{
@@ -28,7 +28,7 @@ use tracing::info;
pub(crate) mod errors { pub(crate) mod errors {
use crate::{ use crate::{
console::messages::{self, ConsoleError, Reason}, control_plane::messages::{self, ControlPlaneError, Reason},
error::{io_error, ErrorKind, ReportableError, UserFacingError}, error::{io_error, ErrorKind, ReportableError, UserFacingError},
proxy::retry::CouldRetry, proxy::retry::CouldRetry,
}; };
@@ -44,7 +44,7 @@ pub(crate) mod errors {
pub(crate) enum ApiError { pub(crate) enum ApiError {
/// Error returned by the console itself. /// Error returned by the console itself.
#[error("{REQUEST_FAILED} with {0}")] #[error("{REQUEST_FAILED} with {0}")]
Console(ConsoleError), ControlPlane(ControlPlaneError),
/// Various IO errors like broken pipe or malformed payload. /// Various IO errors like broken pipe or malformed payload.
#[error("{REQUEST_FAILED}: {0}")] #[error("{REQUEST_FAILED}: {0}")]
@@ -55,7 +55,7 @@ pub(crate) mod errors {
/// Returns HTTP status code if it's the reason for failure. /// Returns HTTP status code if it's the reason for failure.
pub(crate) fn get_reason(&self) -> messages::Reason { pub(crate) fn get_reason(&self) -> messages::Reason {
match self { match self {
ApiError::Console(e) => e.get_reason(), ApiError::ControlPlane(e) => e.get_reason(),
ApiError::Transport(_) => messages::Reason::Unknown, ApiError::Transport(_) => messages::Reason::Unknown,
} }
} }
@@ -65,7 +65,7 @@ pub(crate) mod errors {
fn to_string_client(&self) -> String { fn to_string_client(&self) -> String {
match self { match self {
// To minimize risks, only select errors are forwarded to users. // To minimize risks, only select errors are forwarded to users.
ApiError::Console(c) => c.get_user_facing_message(), ApiError::ControlPlane(c) => c.get_user_facing_message(),
ApiError::Transport(_) => REQUEST_FAILED.to_owned(), ApiError::Transport(_) => REQUEST_FAILED.to_owned(),
} }
} }
@@ -74,7 +74,7 @@ pub(crate) mod errors {
impl ReportableError for ApiError { impl ReportableError for ApiError {
fn get_error_kind(&self) -> crate::error::ErrorKind { fn get_error_kind(&self) -> crate::error::ErrorKind {
match self { match self {
ApiError::Console(e) => match e.get_reason() { ApiError::ControlPlane(e) => match e.get_reason() {
Reason::RoleProtected => ErrorKind::User, Reason::RoleProtected => ErrorKind::User,
Reason::ResourceNotFound => ErrorKind::User, Reason::ResourceNotFound => ErrorKind::User,
Reason::ProjectNotFound => ErrorKind::User, Reason::ProjectNotFound => ErrorKind::User,
@@ -91,12 +91,12 @@ pub(crate) mod errors {
Reason::LockAlreadyTaken => ErrorKind::ControlPlane, Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
Reason::RunningOperations => ErrorKind::ControlPlane, Reason::RunningOperations => ErrorKind::ControlPlane,
Reason::Unknown => match &e { Reason::Unknown => match &e {
ConsoleError { ControlPlaneError {
http_status_code: http_status_code:
http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE, http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE,
.. ..
} => crate::error::ErrorKind::User, } => crate::error::ErrorKind::User,
ConsoleError { ControlPlaneError {
http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY, http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY,
error, error,
.. ..
@@ -105,7 +105,7 @@ pub(crate) mod errors {
{ {
crate::error::ErrorKind::User crate::error::ErrorKind::User
} }
ConsoleError { ControlPlaneError {
http_status_code: http::StatusCode::LOCKED, http_status_code: http::StatusCode::LOCKED,
error, error,
.. ..
@@ -114,11 +114,11 @@ pub(crate) mod errors {
{ {
crate::error::ErrorKind::User crate::error::ErrorKind::User
} }
ConsoleError { ControlPlaneError {
http_status_code: http::StatusCode::TOO_MANY_REQUESTS, http_status_code: http::StatusCode::TOO_MANY_REQUESTS,
.. ..
} => crate::error::ErrorKind::ServiceRateLimit, } => crate::error::ErrorKind::ServiceRateLimit,
ConsoleError { .. } => crate::error::ErrorKind::ControlPlane, ControlPlaneError { .. } => crate::error::ErrorKind::ControlPlane,
}, },
}, },
ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane, ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane,
@@ -131,7 +131,7 @@ pub(crate) mod errors {
match self { match self {
// retry some transport errors // retry some transport errors
Self::Transport(io) => io.could_retry(), Self::Transport(io) => io.could_retry(),
Self::Console(e) => e.could_retry(), Self::ControlPlane(e) => e.could_retry(),
} }
} }
} }
@@ -314,7 +314,8 @@ impl NodeInfo {
} }
} }
pub(crate) type NodeInfoCache = TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ConsoleError>>>; pub(crate) type NodeInfoCache =
TimedLru<EndpointCacheKey, Result<NodeInfo, Box<ControlPlaneError>>>;
pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>; pub(crate) type CachedNodeInfo = Cached<&'static NodeInfoCache, NodeInfo>;
pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>; pub(crate) type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>; pub(crate) type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
@@ -353,28 +354,28 @@ pub(crate) trait Api {
#[non_exhaustive] #[non_exhaustive]
#[derive(Clone)] #[derive(Clone)]
pub enum ConsoleBackend { pub enum ControlPlaneBackend {
/// Current Cloud API (V2). /// Current Management API (V2).
Console(neon::Api), Management(neon::Api),
/// Local mock of Cloud API (V2). /// Local mock control plane.
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Postgres(mock::Api), PostgresMock(mock::Api),
/// Internal testing /// Internal testing
#[cfg(test)] #[cfg(test)]
#[allow(private_interfaces)] #[allow(private_interfaces)]
Test(Box<dyn crate::auth::backend::TestBackend>), Test(Box<dyn crate::auth::backend::TestBackend>),
} }
impl Api for ConsoleBackend { impl Api for ControlPlaneBackend {
async fn get_role_secret( async fn get_role_secret(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<CachedRoleSecret, errors::GetAuthInfoError> { ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
match self { match self {
Self::Console(api) => api.get_role_secret(ctx, user_info).await, Self::Management(api) => api.get_role_secret(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::Postgres(api) => api.get_role_secret(ctx, user_info).await, Self::PostgresMock(api) => api.get_role_secret(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(_) => { Self::Test(_) => {
unreachable!("this function should never be called in the test backend") unreachable!("this function should never be called in the test backend")
@@ -388,9 +389,9 @@ impl Api for ConsoleBackend {
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> { ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError> {
match self { match self {
Self::Console(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::Management(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::Postgres(api) => api.get_allowed_ips_and_secret(ctx, user_info).await, Self::PostgresMock(api) => api.get_allowed_ips_and_secret(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(api) => api.get_allowed_ips_and_secret(), Self::Test(api) => api.get_allowed_ips_and_secret(),
} }
@@ -402,9 +403,9 @@ impl Api for ConsoleBackend {
endpoint: EndpointId, endpoint: EndpointId,
) -> anyhow::Result<Vec<AuthRule>> { ) -> anyhow::Result<Vec<AuthRule>> {
match self { match self {
Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await, Self::Management(api) => api.get_endpoint_jwks(ctx, endpoint).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await, Self::PostgresMock(api) => api.get_endpoint_jwks(ctx, endpoint).await,
#[cfg(test)] #[cfg(test)]
Self::Test(_api) => Ok(vec![]), Self::Test(_api) => Ok(vec![]),
} }
@@ -416,16 +417,16 @@ impl Api for ConsoleBackend {
user_info: &ComputeUserInfo, user_info: &ComputeUserInfo,
) -> Result<CachedNodeInfo, errors::WakeComputeError> { ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
match self { match self {
Self::Console(api) => api.wake_compute(ctx, user_info).await, Self::Management(api) => api.wake_compute(ctx, user_info).await,
#[cfg(any(test, feature = "testing"))] #[cfg(any(test, feature = "testing"))]
Self::Postgres(api) => api.wake_compute(ctx, user_info).await, Self::PostgresMock(api) => api.wake_compute(ctx, user_info).await,
#[cfg(test)] #[cfg(test)]
Self::Test(api) => api.wake_compute(), Self::Test(api) => api.wake_compute(),
} }
} }
} }
/// Various caches for [`console`](super). /// Various caches for [`control_plane`](super).
pub struct ApiCaches { pub struct ApiCaches {
/// Cache for the `wake_compute` API method. /// Cache for the `wake_compute` API method.
pub(crate) node_info: NodeInfoCache, pub(crate) node_info: NodeInfoCache,
@@ -454,7 +455,7 @@ impl ApiCaches {
} }
} }
/// Various caches for [`console`](super). /// Various caches for [`control_plane`](super).
pub struct ApiLocks<K> { pub struct ApiLocks<K> {
name: &'static str, name: &'static str,
node_locks: DashMap<K, Arc<DynamicLimiter>>, node_locks: DashMap<K, Arc<DynamicLimiter>>,
@@ -577,7 +578,7 @@ impl WakeComputePermit {
} }
} }
impl FetchAuthRules for ConsoleBackend { impl FetchAuthRules for ControlPlaneBackend {
async fn fetch_auth_rules( async fn fetch_auth_rules(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,

View File

@@ -1,7 +1,7 @@
//! Production console backend. //! Production console backend.
use super::{ use super::{
super::messages::{ConsoleError, GetRoleSecret, WakeCompute}, super::messages::{ControlPlaneError, GetRoleSecret, WakeCompute},
errors::{ApiError, GetAuthInfoError, WakeComputeError}, errors::{ApiError, GetAuthInfoError, WakeComputeError},
ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedAllowedIps, CachedNodeInfo, CachedRoleSecret,
NodeInfo, NodeInfo,
@@ -9,7 +9,7 @@ use super::{
use crate::{ use crate::{
auth::backend::{jwt::AuthRule, ComputeUserInfo}, auth::backend::{jwt::AuthRule, ComputeUserInfo},
compute, compute,
console::messages::{ColdStartInfo, EndpointJwksResponse, Reason}, control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
http, http,
metrics::{CacheOutcome, Metrics}, metrics::{CacheOutcome, Metrics},
rate_limiter::WakeComputeRateLimiter, rate_limiter::WakeComputeRateLimiter,
@@ -348,7 +348,7 @@ impl super::Api for Api {
let (cached, info) = cached.take_value(); let (cached, info) = cached.take_value();
let info = info.map_err(|c| { let info = info.map_err(|c| {
info!(key = &*key, "found cached wake_compute error"); info!(key = &*key, "found cached wake_compute error");
WakeComputeError::ApiError(ApiError::Console(*c)) WakeComputeError::ApiError(ApiError::ControlPlane(*c))
})?; })?;
debug!(key = &*key, "found cached compute node info"); debug!(key = &*key, "found cached compute node info");
@@ -395,9 +395,9 @@ impl super::Api for Api {
Ok(cached.map(|()| node)) Ok(cached.map(|()| node))
} }
Err(err) => match err { Err(err) => match err {
WakeComputeError::ApiError(ApiError::Console(err)) => { WakeComputeError::ApiError(ApiError::ControlPlane(err)) => {
let Some(status) = &err.status else { let Some(status) = &err.status else {
return Err(WakeComputeError::ApiError(ApiError::Console(err))); return Err(WakeComputeError::ApiError(ApiError::ControlPlane(err)));
}; };
let reason = status let reason = status
@@ -407,7 +407,7 @@ impl super::Api for Api {
// if we can retry this error, do not cache it. // if we can retry this error, do not cache it.
if reason.can_retry() { if reason.can_retry() {
return Err(WakeComputeError::ApiError(ApiError::Console(err))); return Err(WakeComputeError::ApiError(ApiError::ControlPlane(err)));
} }
// at this point, we should only have quota errors. // at this point, we should only have quota errors.
@@ -422,7 +422,7 @@ impl super::Api for Api {
Duration::from_secs(30), Duration::from_secs(30),
); );
Err(WakeComputeError::ApiError(ApiError::Console(err))) Err(WakeComputeError::ApiError(ApiError::ControlPlane(err)))
} }
err => return Err(err), err => return Err(err),
}, },
@@ -448,7 +448,7 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
// as the fact that the request itself has failed. // as the fact that the request itself has failed.
let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| { let mut body = serde_json::from_slice(&s).unwrap_or_else(|e| {
warn!("failed to parse error body: {e}"); warn!("failed to parse error body: {e}");
ConsoleError { ControlPlaneError {
error: "reason unclear (malformed error message)".into(), error: "reason unclear (malformed error message)".into(),
http_status_code: status, http_status_code: status,
status: None, status: None,
@@ -457,7 +457,7 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
body.http_status_code = status; body.http_status_code = status;
error!("console responded with an error ({status}): {body:?}"); error!("console responded with an error ({status}): {body:?}");
Err(ApiError::Console(body)) Err(ApiError::ControlPlane(body))
} }
fn parse_host_port(input: &str) -> Option<(&str, u16)> { fn parse_host_port(input: &str) -> Option<(&str, u16)> {

View File

@@ -1,5 +1,5 @@
use anyhow::{anyhow, bail}; use anyhow::{anyhow, bail};
use hyper::{header::CONTENT_TYPE, Body, Request, Response, StatusCode}; use hyper0::{header::CONTENT_TYPE, Body, Request, Response, StatusCode};
use measured::{text::BufferedTextEncoder, MetricGroup}; use measured::{text::BufferedTextEncoder, MetricGroup};
use metrics::NeonMetrics; use metrics::NeonMetrics;
use std::{ use std::{
@@ -21,7 +21,7 @@ async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
json_response(StatusCode::OK, "") json_response(StatusCode::OK, "")
} }
fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper::Body, ApiError> { fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper0::Body, ApiError> {
let state = Arc::new(Mutex::new(PrometheusHandler { let state = Arc::new(Mutex::new(PrometheusHandler {
encoder: BufferedTextEncoder::new(), encoder: BufferedTextEncoder::new(),
metrics, metrics,
@@ -45,7 +45,7 @@ pub async fn task_main(
let service = || RouterService::new(make_router(metrics).build()?); let service = || RouterService::new(make_router(metrics).build()?);
hyper::Server::from_tcp(http_listener)? hyper0::Server::from_tcp(http_listener)?
.serve(service().map_err(|e| anyhow!(e))?) .serve(service().map_err(|e| anyhow!(e))?)
.await?; .await?;

View File

@@ -9,7 +9,7 @@ use std::time::Duration;
use anyhow::bail; use anyhow::bail;
use bytes::Bytes; use bytes::Bytes;
use http_body_util::BodyExt; use http_body_util::BodyExt;
use hyper1::body::Body; use hyper::body::Body;
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
pub(crate) use reqwest::{Request, Response}; pub(crate) use reqwest::{Request, Response};

View File

@@ -90,15 +90,13 @@ use tokio::task::JoinError;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::warn; use tracing::warn;
extern crate hyper0 as hyper;
pub mod auth; pub mod auth;
pub mod cache; pub mod cache;
pub mod cancellation; pub mod cancellation;
pub mod compute; pub mod compute;
pub mod config; pub mod config;
pub mod console;
pub mod context; pub mod context;
pub mod control_plane;
pub mod error; pub mod error;
pub mod http; pub mod http;
pub mod intern; pub mod intern;

View File

@@ -11,7 +11,7 @@ use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};
use tokio::time::{self, Instant}; use tokio::time::{self, Instant};
use crate::console::messages::ColdStartInfo; use crate::control_plane::messages::ColdStartInfo;
#[derive(MetricGroup)] #[derive(MetricGroup)]
#[metric(new(thread_pool: Arc<ThreadPoolMetrics>))] #[metric(new(thread_pool: Arc<ThreadPoolMetrics>))]

View File

@@ -1,9 +1,10 @@
use crate::{ use crate::{
auth::backend::ComputeCredentialKeys, auth::backend::ComputeCredentialKeys,
compute::COULD_NOT_CONNECT,
compute::{self, PostgresConnection}, compute::{self, PostgresConnection},
config::RetryConfig, config::RetryConfig,
console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
context::RequestMonitoring, context::RequestMonitoring,
control_plane::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
error::ReportableError, error::ReportableError,
metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType}, metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType},
proxy::{ proxy::{
@@ -15,7 +16,7 @@ use crate::{
use async_trait::async_trait; use async_trait::async_trait;
use pq_proto::StartupMessageParams; use pq_proto::StartupMessageParams;
use tokio::time; use tokio::time;
use tracing::{error, info, warn}; use tracing::{debug, info, warn};
use super::retry::ShouldRetryWakeCompute; use super::retry::ShouldRetryWakeCompute;
@@ -25,7 +26,7 @@ const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
/// (e.g. the compute node's address might've changed at the wrong time). /// (e.g. the compute node's address might've changed at the wrong time).
/// Invalidate the cache entry (if any) to prevent subsequent errors. /// Invalidate the cache entry (if any) to prevent subsequent errors.
#[tracing::instrument(name = "invalidate_cache", skip_all)] #[tracing::instrument(name = "invalidate_cache", skip_all)]
pub(crate) fn invalidate_cache(node_info: console::CachedNodeInfo) -> NodeInfo { pub(crate) fn invalidate_cache(node_info: control_plane::CachedNodeInfo) -> NodeInfo {
let is_cached = node_info.cached(); let is_cached = node_info.cached();
if is_cached { if is_cached {
warn!("invalidating stalled compute node info cache entry"); warn!("invalidating stalled compute node info cache entry");
@@ -48,7 +49,7 @@ pub(crate) trait ConnectMechanism {
async fn connect_once( async fn connect_once(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
node_info: &console::CachedNodeInfo, node_info: &control_plane::CachedNodeInfo,
timeout: time::Duration, timeout: time::Duration,
) -> Result<Self::Connection, Self::ConnectError>; ) -> Result<Self::Connection, Self::ConnectError>;
@@ -60,7 +61,7 @@ pub(crate) trait ComputeConnectBackend {
async fn wake_compute( async fn wake_compute(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
) -> Result<CachedNodeInfo, console::errors::WakeComputeError>; ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError>;
fn get_keys(&self) -> &ComputeCredentialKeys; fn get_keys(&self) -> &ComputeCredentialKeys;
} }
@@ -83,7 +84,7 @@ impl ConnectMechanism for TcpMechanism<'_> {
async fn connect_once( async fn connect_once(
&self, &self,
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
node_info: &console::CachedNodeInfo, node_info: &control_plane::CachedNodeInfo,
timeout: time::Duration, timeout: time::Duration,
) -> Result<PostgresConnection, Self::Error> { ) -> Result<PostgresConnection, Self::Error> {
let host = node_info.config.get_host()?; let host = node_info.config.get_host()?;
@@ -116,7 +117,6 @@ where
node_info.set_keys(user_info.get_keys()); node_info.set_keys(user_info.get_keys());
node_info.allow_self_signed_compute = allow_self_signed_compute; node_info.allow_self_signed_compute = allow_self_signed_compute;
// let mut node_info = credentials.get_node_info(ctx, user_info).await?;
mechanism.update_connect_config(&mut node_info.config); mechanism.update_connect_config(&mut node_info.config);
let retry_type = RetryType::ConnectToCompute; let retry_type = RetryType::ConnectToCompute;
@@ -139,10 +139,10 @@ where
Err(e) => e, Err(e) => e,
}; };
error!(error = ?err, "could not connect to compute node"); debug!(error = ?err, COULD_NOT_CONNECT);
let node_info = if !node_info.cached() || !err.should_retry_wake_compute() { let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
// If we just recieved this from cplane and dodn't get it from cache, we shouldn't retry. // If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
// Do not need to retrieve a new node_info, just return the old one. // Do not need to retrieve a new node_info, just return the old one.
if should_retry(&err, num_retries, connect_to_compute_retry_config) { if should_retry(&err, num_retries, connect_to_compute_retry_config) {
Metrics::get().proxy.retries_metric.observe( Metrics::get().proxy.retries_metric.observe(
@@ -191,7 +191,7 @@ where
} }
Err(e) => { Err(e) => {
if !should_retry(&e, num_retries, connect_to_compute_retry_config) { if !should_retry(&e, num_retries, connect_to_compute_retry_config) {
error!(error = ?e, num_retries, retriable = false, "couldn't connect to compute node"); // Don't log an error here, caller will print the error
Metrics::get().proxy.retries_metric.observe( Metrics::get().proxy.retries_metric.observe(
RetriesMetricGroup { RetriesMetricGroup {
outcome: ConnectOutcome::Failed, outcome: ConnectOutcome::Failed,
@@ -202,7 +202,7 @@ where
return Err(e.into()); return Err(e.into());
} }
warn!(error = ?e, num_retries, retriable = true, "couldn't connect to compute node"); warn!(error = ?e, num_retries, retriable = true, COULD_NOT_CONNECT);
} }
}; };

View File

@@ -1,7 +1,7 @@
use crate::{ use crate::{
cancellation, cancellation,
compute::PostgresConnection, compute::PostgresConnection,
console::messages::MetricsAuxInfo, control_plane::messages::MetricsAuxInfo,
metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard}, metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard},
stream::Stream, stream::Stream,
usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS}, usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS},

View File

@@ -11,9 +11,11 @@ use crate::auth::backend::{
ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend, ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend,
}; };
use crate::config::{CertResolver, RetryConfig}; use crate::config::{CertResolver, RetryConfig};
use crate::console::messages::{ConsoleError, Details, MetricsAuxInfo, Status}; use crate::control_plane::messages::{ControlPlaneError, Details, MetricsAuxInfo, Status};
use crate::console::provider::{CachedAllowedIps, CachedRoleSecret, ConsoleBackend, NodeInfoCache}; use crate::control_plane::provider::{
use crate::console::{self, CachedNodeInfo, NodeInfo}; CachedAllowedIps, CachedRoleSecret, ControlPlaneBackend, NodeInfoCache,
};
use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
use crate::error::ErrorKind; use crate::error::ErrorKind;
use crate::{sasl, scram, BranchId, EndpointId, ProjectId}; use crate::{sasl, scram, BranchId, EndpointId, ProjectId};
use anyhow::{bail, Context}; use anyhow::{bail, Context};
@@ -459,7 +461,7 @@ impl ConnectMechanism for TestConnectMechanism {
async fn connect_once( async fn connect_once(
&self, &self,
_ctx: &RequestMonitoring, _ctx: &RequestMonitoring,
_node_info: &console::CachedNodeInfo, _node_info: &control_plane::CachedNodeInfo,
_timeout: std::time::Duration, _timeout: std::time::Duration,
) -> Result<Self::Connection, Self::ConnectError> { ) -> Result<Self::Connection, Self::ConnectError> {
let mut counter = self.counter.lock().unwrap(); let mut counter = self.counter.lock().unwrap();
@@ -483,23 +485,23 @@ impl ConnectMechanism for TestConnectMechanism {
} }
impl TestBackend for TestConnectMechanism { impl TestBackend for TestConnectMechanism {
fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError> { fn wake_compute(&self) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
let mut counter = self.counter.lock().unwrap(); let mut counter = self.counter.lock().unwrap();
let action = self.sequence[*counter]; let action = self.sequence[*counter];
*counter += 1; *counter += 1;
match action { match action {
ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)), ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)),
ConnectAction::WakeFail => { ConnectAction::WakeFail => {
let err = console::errors::ApiError::Console(ConsoleError { let err = control_plane::errors::ApiError::ControlPlane(ControlPlaneError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
error: "TEST".into(), error: "TEST".into(),
status: None, status: None,
}); });
assert!(!err.could_retry()); assert!(!err.could_retry());
Err(console::errors::WakeComputeError::ApiError(err)) Err(control_plane::errors::WakeComputeError::ApiError(err))
} }
ConnectAction::WakeRetry => { ConnectAction::WakeRetry => {
let err = console::errors::ApiError::Console(ConsoleError { let err = control_plane::errors::ApiError::ControlPlane(ControlPlaneError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
error: "TEST".into(), error: "TEST".into(),
status: Some(Status { status: Some(Status {
@@ -507,13 +509,15 @@ impl TestBackend for TestConnectMechanism {
message: "error".into(), message: "error".into(),
details: Details { details: Details {
error_info: None, error_info: None,
retry_info: Some(console::messages::RetryInfo { retry_delay_ms: 1 }), retry_info: Some(control_plane::messages::RetryInfo {
retry_delay_ms: 1,
}),
user_facing_message: None, user_facing_message: None,
}, },
}), }),
}); });
assert!(err.could_retry()); assert!(err.could_retry());
Err(console::errors::WakeComputeError::ApiError(err)) Err(control_plane::errors::WakeComputeError::ApiError(err))
} }
x => panic!("expecting action {x:?}, wake_compute is called instead"), x => panic!("expecting action {x:?}, wake_compute is called instead"),
} }
@@ -521,7 +525,7 @@ impl TestBackend for TestConnectMechanism {
fn get_allowed_ips_and_secret( fn get_allowed_ips_and_secret(
&self, &self,
) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError> ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), control_plane::errors::GetAuthInfoError>
{ {
unimplemented!("not used in tests") unimplemented!("not used in tests")
} }
@@ -538,7 +542,7 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::console::messages::ColdStartInfo::Warm, cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,
}, },
allow_self_signed_compute: false, allow_self_signed_compute: false,
}; };
@@ -549,8 +553,8 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn
fn helper_create_connect_info( fn helper_create_connect_info(
mechanism: &TestConnectMechanism, mechanism: &TestConnectMechanism,
) -> auth::Backend<'static, ComputeCredentials, &()> { ) -> auth::Backend<'static, ComputeCredentials, &()> {
let user_info = auth::Backend::Console( let user_info = auth::Backend::ControlPlane(
MaybeOwned::Owned(ConsoleBackend::Test(Box::new(mechanism.clone()))), MaybeOwned::Owned(ControlPlaneBackend::Test(Box::new(mechanism.clone()))),
ComputeCredentials { ComputeCredentials {
info: ComputeUserInfo { info: ComputeUserInfo {
endpoint: "endpoint".into(), endpoint: "endpoint".into(),

View File

@@ -1,13 +1,13 @@
use crate::config::RetryConfig; use crate::config::RetryConfig;
use crate::console::messages::{ConsoleError, Reason};
use crate::console::{errors::WakeComputeError, provider::CachedNodeInfo};
use crate::context::RequestMonitoring; use crate::context::RequestMonitoring;
use crate::control_plane::messages::{ControlPlaneError, Reason};
use crate::control_plane::{errors::WakeComputeError, provider::CachedNodeInfo};
use crate::metrics::{ use crate::metrics::{
ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType, ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
WakeupFailureKind, WakeupFailureKind,
}; };
use crate::proxy::retry::{retry_after, should_retry}; use crate::proxy::retry::{retry_after, should_retry};
use hyper1::StatusCode; use hyper::StatusCode;
use tracing::{error, info, warn}; use tracing::{error, info, warn};
use super::connect_compute::ComputeConnectBackend; use super::connect_compute::ComputeConnectBackend;
@@ -59,11 +59,11 @@ pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
} }
fn report_error(e: &WakeComputeError, retry: bool) { fn report_error(e: &WakeComputeError, retry: bool) {
use crate::console::errors::ApiError; use crate::control_plane::errors::ApiError;
let kind = match e { let kind = match e {
WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress, WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError, WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
WakeComputeError::ApiError(ApiError::Console(e)) => match e.get_reason() { WakeComputeError::ApiError(ApiError::ControlPlane(e)) => match e.get_reason() {
Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest, Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest, Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest,
Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest, Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest,
@@ -80,7 +80,7 @@ fn report_error(e: &WakeComputeError, retry: bool) {
Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked, Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked,
Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked, Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked,
Reason::Unknown => match e { Reason::Unknown => match e {
ConsoleError { ControlPlaneError {
http_status_code: StatusCode::LOCKED, http_status_code: StatusCode::LOCKED,
ref error, ref error,
.. ..
@@ -89,27 +89,27 @@ fn report_error(e: &WakeComputeError, retry: bool) {
{ {
WakeupFailureKind::QuotaExceeded WakeupFailureKind::QuotaExceeded
} }
ConsoleError { ControlPlaneError {
http_status_code: StatusCode::UNPROCESSABLE_ENTITY, http_status_code: StatusCode::UNPROCESSABLE_ENTITY,
ref error, ref error,
.. ..
} if error.contains("compute time quota of non-primary branches is exceeded") => { } if error.contains("compute time quota of non-primary branches is exceeded") => {
WakeupFailureKind::QuotaExceeded WakeupFailureKind::QuotaExceeded
} }
ConsoleError { ControlPlaneError {
http_status_code: StatusCode::LOCKED, http_status_code: StatusCode::LOCKED,
.. ..
} => WakeupFailureKind::ApiConsoleLocked, } => WakeupFailureKind::ApiConsoleLocked,
ConsoleError { ControlPlaneError {
http_status_code: StatusCode::BAD_REQUEST, http_status_code: StatusCode::BAD_REQUEST,
.. ..
} => WakeupFailureKind::ApiConsoleBadRequest, } => WakeupFailureKind::ApiConsoleBadRequest,
ConsoleError { ControlPlaneError {
http_status_code, .. http_status_code, ..
} if http_status_code.is_server_error() => { } if http_status_code.is_server_error() => {
WakeupFailureKind::ApiConsoleOtherServerError WakeupFailureKind::ApiConsoleOtherServerError
} }
ConsoleError { .. } => WakeupFailureKind::ApiConsoleOtherError, ControlPlaneError { .. } => WakeupFailureKind::ApiConsoleOtherError,
}, },
}, },
WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked, WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,

View File

@@ -12,13 +12,13 @@ use crate::{
}, },
compute, compute,
config::{AuthenticationConfig, ProxyConfig}, config::{AuthenticationConfig, ProxyConfig},
console::{ context::RequestMonitoring,
control_plane::{
errors::{GetAuthInfoError, WakeComputeError}, errors::{GetAuthInfoError, WakeComputeError},
locks::ApiLocks, locks::ApiLocks,
provider::ApiLockError, provider::ApiLockError,
CachedNodeInfo, CachedNodeInfo,
}, },
context::RequestMonitoring,
error::{ErrorKind, ReportableError, UserFacingError}, error::{ErrorKind, ReportableError, UserFacingError},
intern::EndpointIdInt, intern::EndpointIdInt,
proxy::{ proxy::{
@@ -114,7 +114,7 @@ impl PoolingBackend {
jwt: String, jwt: String,
) -> Result<(), AuthError> { ) -> Result<(), AuthError> {
match &self.config.auth_backend { match &self.config.auth_backend {
crate::auth::Backend::Console(console, ()) => { crate::auth::Backend::ControlPlane(console, ()) => {
config config
.jwks_cache .jwks_cache
.check_jwt( .check_jwt(
@@ -129,7 +129,7 @@ impl PoolingBackend {
Ok(()) Ok(())
} }
crate::auth::Backend::Web(_, ()) => Err(AuthError::auth_failed( crate::auth::Backend::ConsoleRedirect(_, ()) => Err(AuthError::auth_failed(
"JWT login over web auth proxy is not supported", "JWT login over web auth proxy is not supported",
)), )),
crate::auth::Backend::Local(_) => { crate::auth::Backend::Local(_) => {
@@ -257,7 +257,7 @@ pub(crate) enum LocalProxyConnError {
#[error("error with connection to local-proxy")] #[error("error with connection to local-proxy")]
Io(#[source] std::io::Error), Io(#[source] std::io::Error),
#[error("could not establish h2 connection")] #[error("could not establish h2 connection")]
H2(#[from] hyper1::Error), H2(#[from] hyper::Error),
} }
impl ReportableError for HttpConnError { impl ReportableError for HttpConnError {
@@ -481,7 +481,7 @@ async fn connect_http2(
}; };
}; };
let (client, connection) = hyper1::client::conn::http2::Builder::new(TokioExecutor::new()) let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new())
.timer(TokioTimer::new()) .timer(TokioTimer::new())
.keep_alive_interval(Duration::from_secs(20)) .keep_alive_interval(Duration::from_secs(20))
.keep_alive_while_idle(true) .keep_alive_while_idle(true)

View File

@@ -17,7 +17,7 @@ use tokio_postgres::tls::NoTlsStream;
use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket}; use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use crate::console::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::{ use crate::{
@@ -760,7 +760,7 @@ mod tests {
endpoint_id: (&EndpointId::from("endpoint")).into(), endpoint_id: (&EndpointId::from("endpoint")).into(),
project_id: (&ProjectId::from("project")).into(), project_id: (&ProjectId::from("project")).into(),
branch_id: (&BranchId::from("branch")).into(), branch_id: (&BranchId::from("branch")).into(),
cold_start_info: crate::console::messages::ColdStartInfo::Warm, cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,
}, },
conn_id: uuid::Uuid::new_v4(), conn_id: uuid::Uuid::new_v4(),
} }

View File

@@ -1,5 +1,5 @@
use dashmap::DashMap; use dashmap::DashMap;
use hyper1::client::conn::http2; use hyper::client::conn::http2;
use hyper_util::rt::{TokioExecutor, TokioIo}; use hyper_util::rt::{TokioExecutor, TokioIo};
use parking_lot::RwLock; use parking_lot::RwLock;
use rand::Rng; use rand::Rng;
@@ -8,7 +8,7 @@ use std::sync::atomic::{self, AtomicUsize};
use std::{sync::Arc, sync::Weak}; use std::{sync::Arc, sync::Weak};
use tokio::net::TcpStream; use tokio::net::TcpStream;
use crate::console::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics};
use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
use crate::{context::RequestMonitoring, EndpointCacheKey}; use crate::{context::RequestMonitoring, EndpointCacheKey};
@@ -18,9 +18,9 @@ use tracing::{info, info_span, Instrument};
use super::conn_pool::ConnInfo; use super::conn_pool::ConnInfo;
pub(crate) type Send = http2::SendRequest<hyper1::body::Incoming>; pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
pub(crate) type Connect = pub(crate) type Connect =
http2::Connection<TokioIo<TcpStream>, hyper1::body::Incoming, TokioExecutor>; http2::Connection<TokioIo<TcpStream>, hyper::body::Incoming, TokioExecutor>;
#[derive(Clone)] #[derive(Clone)]
struct ConnPoolEntry { struct ConnPoolEntry {

View File

@@ -11,7 +11,7 @@ use serde::Serialize;
use utils::http::error::ApiError; use utils::http::error::ApiError;
/// Like [`ApiError::into_response`] /// Like [`ApiError::into_response`]
pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper1::Error>> { pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper::Error>> {
match this { match this {
ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status( ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
format!("{err:#?}"), // use debug printing so that we give the cause format!("{err:#?}"), // use debug printing so that we give the cause
@@ -67,12 +67,12 @@ impl HttpErrorBody {
fn response_from_msg_and_status( fn response_from_msg_and_status(
msg: String, msg: String,
status: StatusCode, status: StatusCode,
) -> Response<BoxBody<Bytes, hyper1::Error>> { ) -> Response<BoxBody<Bytes, hyper::Error>> {
HttpErrorBody { msg }.to_response(status) HttpErrorBody { msg }.to_response(status)
} }
/// Same as [`utils::http::error::HttpErrorBody::to_response`] /// Same as [`utils::http::error::HttpErrorBody::to_response`]
fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper1::Error>> { fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper::Error>> {
Response::builder() Response::builder()
.status(status) .status(status)
.header(http::header::CONTENT_TYPE, "application/json") .header(http::header::CONTENT_TYPE, "application/json")
@@ -90,7 +90,7 @@ impl HttpErrorBody {
pub(crate) fn json_response<T: Serialize>( pub(crate) fn json_response<T: Serialize>(
status: StatusCode, status: StatusCode,
data: T, data: T,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
let json = serde_json::to_string(&data) let json = serde_json::to_string(&data)
.context("Failed to serialize JSON response") .context("Failed to serialize JSON response")
.map_err(ApiError::InternalServerError)?; .map_err(ApiError::InternalServerError)?;

View File

@@ -22,7 +22,7 @@ use futures::TryFutureExt;
use http::{Method, Response, StatusCode}; use http::{Method, Response, StatusCode};
use http_body_util::combinators::BoxBody; use http_body_util::combinators::BoxBody;
use http_body_util::{BodyExt, Empty}; use http_body_util::{BodyExt, Empty};
use hyper1::body::Incoming; use hyper::body::Incoming;
use hyper_util::rt::TokioExecutor; use hyper_util::rt::TokioExecutor;
use hyper_util::server::conn::auto::Builder; use hyper_util::server::conn::auto::Builder;
use rand::rngs::StdRng; use rand::rngs::StdRng;
@@ -302,7 +302,7 @@ async fn connection_handler(
let server = Builder::new(TokioExecutor::new()); let server = Builder::new(TokioExecutor::new());
let conn = server.serve_connection_with_upgrades( let conn = server.serve_connection_with_upgrades(
hyper_util::rt::TokioIo::new(conn), hyper_util::rt::TokioIo::new(conn),
hyper1::service::service_fn(move |req: hyper1::Request<Incoming>| { hyper::service::service_fn(move |req: hyper::Request<Incoming>| {
// First HTTP request shares the same session ID // First HTTP request shares the same session ID
let session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4); let session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4);
@@ -355,7 +355,7 @@ async fn connection_handler(
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
async fn request_handler( async fn request_handler(
mut request: hyper1::Request<Incoming>, mut request: hyper::Request<Incoming>,
config: &'static ProxyConfig, config: &'static ProxyConfig,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
ws_connections: TaskTracker, ws_connections: TaskTracker,
@@ -365,7 +365,7 @@ async fn request_handler(
// used to cancel in-flight HTTP requests. not used to cancel websockets // used to cancel in-flight HTTP requests. not used to cancel websockets
http_cancellation_token: CancellationToken, http_cancellation_token: CancellationToken,
endpoint_rate_limiter: Arc<EndpointRateLimiter>, endpoint_rate_limiter: Arc<EndpointRateLimiter>,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
let host = request let host = request
.headers() .headers()
.get("host") .get("host")

View File

@@ -12,14 +12,14 @@ use http::Method;
use http_body_util::combinators::BoxBody; use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt; use http_body_util::BodyExt;
use http_body_util::Full; use http_body_util::Full;
use hyper1::body::Body; use hyper::body::Body;
use hyper1::body::Incoming; use hyper::body::Incoming;
use hyper1::header; use hyper::header;
use hyper1::http::HeaderName; use hyper::http::HeaderName;
use hyper1::http::HeaderValue; use hyper::http::HeaderValue;
use hyper1::Response; use hyper::Response;
use hyper1::StatusCode; use hyper::StatusCode;
use hyper1::{HeaderMap, Request}; use hyper::{HeaderMap, Request};
use pq_proto::StartupMessageParamsBuilder; use pq_proto::StartupMessageParamsBuilder;
use serde::Serialize; use serde::Serialize;
use serde_json::Value; use serde_json::Value;
@@ -272,7 +272,7 @@ pub(crate) async fn handle(
request: Request<Incoming>, request: Request<Incoming>,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
cancel: CancellationToken, cancel: CancellationToken,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
let result = handle_inner(cancel, config, &ctx, request, backend).await; let result = handle_inner(cancel, config, &ctx, request, backend).await;
let mut response = match result { let mut response = match result {
@@ -435,7 +435,7 @@ impl UserFacingError for SqlOverHttpError {
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
pub(crate) enum ReadPayloadError { pub(crate) enum ReadPayloadError {
#[error("could not read the HTTP request body: {0}")] #[error("could not read the HTTP request body: {0}")]
Read(#[from] hyper1::Error), Read(#[from] hyper::Error),
#[error("could not parse the HTTP request body: {0}")] #[error("could not parse the HTTP request body: {0}")]
Parse(#[from] serde_json::Error), Parse(#[from] serde_json::Error),
} }
@@ -476,7 +476,7 @@ struct HttpHeaders {
} }
impl HttpHeaders { impl HttpHeaders {
fn try_parse(headers: &hyper1::http::HeaderMap) -> Result<Self, SqlOverHttpError> { fn try_parse(headers: &hyper::http::HeaderMap) -> Result<Self, SqlOverHttpError> {
// Determine the output options. Default behaviour is 'false'. Anything that is not // Determine the output options. Default behaviour is 'false'. Anything that is not
// strictly 'true' assumed to be false. // strictly 'true' assumed to be false.
let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE); let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
@@ -529,7 +529,7 @@ async fn handle_inner(
ctx: &RequestMonitoring, ctx: &RequestMonitoring,
request: Request<Incoming>, request: Request<Incoming>,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
let _requeset_gauge = Metrics::get() let _requeset_gauge = Metrics::get()
.proxy .proxy
.connection_requests .connection_requests
@@ -577,7 +577,7 @@ async fn handle_db_inner(
conn_info: ConnInfo, conn_info: ConnInfo,
auth: AuthData, auth: AuthData,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
// //
// Determine the destination and connection params // Determine the destination and connection params
// //
@@ -744,7 +744,7 @@ async fn handle_auth_broker_inner(
conn_info: ConnInfo, conn_info: ConnInfo,
jwt: String, jwt: String,
backend: Arc<PoolingBackend>, backend: Arc<PoolingBackend>,
) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
backend backend
.authenticate_with_jwt( .authenticate_with_jwt(
ctx, ctx,

View File

@@ -12,7 +12,7 @@ use anyhow::Context as _;
use bytes::{Buf, BufMut, Bytes, BytesMut}; use bytes::{Buf, BufMut, Bytes, BytesMut};
use framed_websockets::{Frame, OpCode, WebSocketServer}; use framed_websockets::{Frame, OpCode, WebSocketServer};
use futures::{Sink, Stream}; use futures::{Sink, Stream};
use hyper1::upgrade::OnUpgrade; use hyper::upgrade::OnUpgrade;
use hyper_util::rt::TokioIo; use hyper_util::rt::TokioIo;
use pin_project_lite::pin_project; use pin_project_lite::pin_project;

View File

@@ -485,49 +485,51 @@ async fn upload_events_chunk(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::{ use super::*;
net::TcpListener,
sync::{Arc, Mutex},
};
use crate::{http, BranchId, EndpointId};
use anyhow::Error; use anyhow::Error;
use chrono::Utc; use chrono::Utc;
use consumption_metrics::{Event, EventChunk}; use consumption_metrics::{Event, EventChunk};
use hyper::{ use http_body_util::BodyExt;
service::{make_service_fn, service_fn}, use hyper::{body::Incoming, server::conn::http1, service::service_fn, Request, Response};
Body, Response, use hyper_util::rt::TokioIo;
}; use std::sync::{Arc, Mutex};
use tokio::net::TcpListener;
use url::Url; use url::Url;
use super::*;
use crate::{http, BranchId, EndpointId};
#[tokio::test] #[tokio::test]
async fn metrics() { async fn metrics() {
let listener = TcpListener::bind("0.0.0.0:0").unwrap(); type Report = EventChunk<'static, Event<Ids, String>>;
let reports: Arc<Mutex<Vec<Report>>> = Arc::default();
let reports = Arc::new(Mutex::new(vec![])); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let reports2 = reports.clone(); let addr = listener.local_addr().unwrap();
tokio::spawn({
let server = hyper::server::Server::from_tcp(listener) let reports = reports.clone();
.unwrap() async move {
.serve(make_service_fn(move |_| { loop {
let reports = reports.clone(); if let Ok((stream, _addr)) = listener.accept().await {
async move {
Ok::<_, Error>(service_fn(move |req| {
let reports = reports.clone(); let reports = reports.clone();
async move { http1::Builder::new()
let bytes = hyper::body::to_bytes(req.into_body()).await?; .serve_connection(
let events: EventChunk<'static, Event<Ids, String>> = TokioIo::new(stream),
serde_json::from_slice(&bytes)?; service_fn(move |req: Request<Incoming>| {
reports.lock().unwrap().push(events); let reports = reports.clone();
Ok::<_, Error>(Response::new(Body::from(vec![]))) async move {
} let bytes = req.into_body().collect().await?.to_bytes();
})) let events = serde_json::from_slice(&bytes)?;
reports.lock().unwrap().push(events);
Ok::<_, Error>(Response::new(String::new()))
}
}),
)
.await
.unwrap();
}
} }
})); }
let addr = server.local_addr(); });
tokio::spawn(server);
let metrics = Metrics::default(); let metrics = Metrics::default();
let client = http::new_client(); let client = http::new_client();
@@ -536,7 +538,7 @@ mod tests {
// no counters have been registered // no counters have been registered
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports2.lock().unwrap()); let r = std::mem::take(&mut *reports.lock().unwrap());
assert!(r.is_empty()); assert!(r.is_empty());
// register a new counter // register a new counter
@@ -548,7 +550,7 @@ mod tests {
// the counter should be observed despite 0 egress // the counter should be observed despite 0 egress
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports2.lock().unwrap()); let r = std::mem::take(&mut *reports.lock().unwrap());
assert_eq!(r.len(), 1); assert_eq!(r.len(), 1);
assert_eq!(r[0].events.len(), 1); assert_eq!(r[0].events.len(), 1);
assert_eq!(r[0].events[0].value, 0); assert_eq!(r[0].events[0].value, 0);
@@ -558,7 +560,7 @@ mod tests {
// egress should be observered // egress should be observered
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports2.lock().unwrap()); let r = std::mem::take(&mut *reports.lock().unwrap());
assert_eq!(r.len(), 1); assert_eq!(r.len(), 1);
assert_eq!(r[0].events.len(), 1); assert_eq!(r[0].events.len(), 1);
assert_eq!(r[0].events[0].value, 1); assert_eq!(r[0].events[0].value, 1);
@@ -568,7 +570,7 @@ mod tests {
// we do not observe the counter // we do not observe the counter
collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await; collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
let r = std::mem::take(&mut *reports2.lock().unwrap()); let r = std::mem::take(&mut *reports.lock().unwrap());
assert!(r.is_empty()); assert!(r.is_empty());
// counter is unregistered // counter is unregistered

View File

@@ -97,5 +97,8 @@ select = [
"I", # isort "I", # isort
"W", # pycodestyle "W", # pycodestyle
"B", # bugbear "B", # bugbear
"UP032", # f-string "UP", # pyupgrade
] ]
[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true # Remove this stanza when we require Python 3.10

View File

@@ -23,6 +23,7 @@ crc32c.workspace = true
fail.workspace = true fail.workspace = true
hex.workspace = true hex.workspace = true
humantime.workspace = true humantime.workspace = true
http.workspace = true
hyper0.workspace = true hyper0.workspace = true
futures.workspace = true futures.workspace = true
once_cell.workspace = true once_cell.workspace = true

View File

@@ -12,8 +12,8 @@ use metrics::{
core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts}, core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
proto::MetricFamily, proto::MetricFamily,
register_histogram_vec, register_int_counter, register_int_counter_pair, register_histogram_vec, register_int_counter, register_int_counter_pair,
register_int_counter_pair_vec, register_int_counter_vec, Gauge, HistogramVec, IntCounter, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, Gauge,
IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec, HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec,
}; };
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
@@ -231,6 +231,14 @@ pub(crate) static EVICTION_EVENTS_COMPLETED: Lazy<IntCounterVec> = Lazy::new(||
.expect("Failed to register metric") .expect("Failed to register metric")
}); });
pub static NUM_EVICTED_TIMELINES: Lazy<IntGauge> = Lazy::new(|| {
register_int_gauge!(
"safekeeper_evicted_timelines",
"Number of currently evicted timelines"
)
.expect("Failed to register metric")
});
pub const LABEL_UNKNOWN: &str = "unknown"; pub const LABEL_UNKNOWN: &str = "unknown";
/// Labels for traffic metrics. /// Labels for traffic metrics.

View File

@@ -2,21 +2,29 @@ use utils::lsn::Lsn;
use crate::timeline_manager::StateSnapshot; use crate::timeline_manager::StateSnapshot;
/// Get oldest LSN we still need to keep. We hold WAL till it is consumed /// Get oldest LSN we still need to keep.
/// by all of 1) pageserver (remote_consistent_lsn) 2) peers 3) s3 ///
/// offloading. /// We hold WAL till it is consumed by
/// While it is safe to use inmem values for determining horizon, /// 1) pageserver (remote_consistent_lsn)
/// we use persistent to make possible normal states less surprising. /// 2) s3 offloading.
/// All segments covering LSNs before horizon_lsn can be removed. /// 3) Additionally we must store WAL since last local commit_lsn because
/// that's where we start looking for last WAL record on start.
///
/// If some peer safekeeper misses data it will fetch it from the remote
/// storage. While it is safe to use inmem values for determining horizon, we
/// use persistent to make possible normal states less surprising. All segments
/// covering LSNs before horizon_lsn can be removed.
pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn { pub(crate) fn calc_horizon_lsn(state: &StateSnapshot, extra_horizon_lsn: Option<Lsn>) -> Lsn {
use std::cmp::min; use std::cmp::min;
let mut horizon_lsn = min( let mut horizon_lsn = state.cfile_remote_consistent_lsn;
state.cfile_remote_consistent_lsn,
state.cfile_peer_horizon_lsn,
);
// we don't want to remove WAL that is not yet offloaded to s3 // we don't want to remove WAL that is not yet offloaded to s3
horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn); horizon_lsn = min(horizon_lsn, state.cfile_backup_lsn);
// Min by local commit_lsn to be able to begin reading WAL from somewhere on
// sk start. Technically we don't allow local commit_lsn to be higher than
// flush_lsn, but let's be double safe by including it as well.
horizon_lsn = min(horizon_lsn, state.cfile_commit_lsn);
horizon_lsn = min(horizon_lsn, state.flush_lsn);
if let Some(extra_horizon_lsn) = extra_horizon_lsn { if let Some(extra_horizon_lsn) = extra_horizon_lsn {
horizon_lsn = min(horizon_lsn, extra_horizon_lsn); horizon_lsn = min(horizon_lsn, extra_horizon_lsn);
} }

View File

@@ -631,13 +631,19 @@ impl Timeline {
return Err(e); return Err(e);
} }
self.bootstrap(conf, broker_active_set, partial_backup_rate_limiter); self.bootstrap(
shared_state,
conf,
broker_active_set,
partial_backup_rate_limiter,
);
Ok(()) Ok(())
} }
/// Bootstrap new or existing timeline starting background tasks. /// Bootstrap new or existing timeline starting background tasks.
pub fn bootstrap( pub fn bootstrap(
self: &Arc<Timeline>, self: &Arc<Timeline>,
_shared_state: &mut WriteGuardSharedState<'_>,
conf: &SafeKeeperConf, conf: &SafeKeeperConf,
broker_active_set: Arc<TimelinesSet>, broker_active_set: Arc<TimelinesSet>,
partial_backup_rate_limiter: RateLimiter, partial_backup_rate_limiter: RateLimiter,

View File

@@ -15,7 +15,9 @@ use tracing::{debug, info, instrument, warn};
use utils::crashsafe::durable_rename; use utils::crashsafe::durable_rename;
use crate::{ use crate::{
metrics::{EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED}, metrics::{
EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, NUM_EVICTED_TIMELINES,
},
rate_limit::rand_duration, rate_limit::rand_duration,
timeline_manager::{Manager, StateSnapshot}, timeline_manager::{Manager, StateSnapshot},
wal_backup, wal_backup,
@@ -93,6 +95,7 @@ impl Manager {
} }
info!("successfully evicted timeline"); info!("successfully evicted timeline");
NUM_EVICTED_TIMELINES.inc();
} }
/// Attempt to restore evicted timeline from remote storage; it must be /// Attempt to restore evicted timeline from remote storage; it must be
@@ -128,6 +131,7 @@ impl Manager {
tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident); tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident);
info!("successfully restored evicted timeline"); info!("successfully restored evicted timeline");
NUM_EVICTED_TIMELINES.dec();
} }
} }

View File

@@ -25,7 +25,10 @@ use utils::lsn::Lsn;
use crate::{ use crate::{
control_file::{FileStorage, Storage}, control_file::{FileStorage, Storage},
metrics::{MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS}, metrics::{
MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS,
NUM_EVICTED_TIMELINES,
},
rate_limit::{rand_duration, RateLimiter}, rate_limit::{rand_duration, RateLimiter},
recovery::recovery_main, recovery::recovery_main,
remove_wal::calc_horizon_lsn, remove_wal::calc_horizon_lsn,
@@ -47,7 +50,7 @@ pub(crate) struct StateSnapshot {
pub(crate) remote_consistent_lsn: Lsn, pub(crate) remote_consistent_lsn: Lsn,
// persistent control file values // persistent control file values
pub(crate) cfile_peer_horizon_lsn: Lsn, pub(crate) cfile_commit_lsn: Lsn,
pub(crate) cfile_remote_consistent_lsn: Lsn, pub(crate) cfile_remote_consistent_lsn: Lsn,
pub(crate) cfile_backup_lsn: Lsn, pub(crate) cfile_backup_lsn: Lsn,
@@ -70,7 +73,7 @@ impl StateSnapshot {
commit_lsn: state.inmem.commit_lsn, commit_lsn: state.inmem.commit_lsn,
backup_lsn: state.inmem.backup_lsn, backup_lsn: state.inmem.backup_lsn,
remote_consistent_lsn: state.inmem.remote_consistent_lsn, remote_consistent_lsn: state.inmem.remote_consistent_lsn,
cfile_peer_horizon_lsn: state.peer_horizon_lsn, cfile_commit_lsn: state.commit_lsn,
cfile_remote_consistent_lsn: state.remote_consistent_lsn, cfile_remote_consistent_lsn: state.remote_consistent_lsn,
cfile_backup_lsn: state.backup_lsn, cfile_backup_lsn: state.backup_lsn,
flush_lsn: read_guard.sk.flush_lsn(), flush_lsn: read_guard.sk.flush_lsn(),
@@ -251,6 +254,11 @@ pub async fn main_task(
mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone()))); mgr.recovery_task = Some(tokio::spawn(recovery_main(tli, mgr.conf.clone())));
} }
// If timeline is evicted, reflect that in the metric.
if mgr.is_offloaded {
NUM_EVICTED_TIMELINES.inc();
}
let last_state = 'outer: loop { let last_state = 'outer: loop {
MANAGER_ITERATIONS_TOTAL.inc(); MANAGER_ITERATIONS_TOTAL.inc();
@@ -367,6 +375,11 @@ pub async fn main_task(
mgr.update_wal_removal_end(res); mgr.update_wal_removal_end(res);
} }
// If timeline is deleted while evicted decrement the gauge.
if mgr.tli.is_cancelled() && mgr.is_offloaded {
NUM_EVICTED_TIMELINES.dec();
}
mgr.set_status(Status::Finished); mgr.set_status(Status::Finished);
} }

View File

@@ -165,12 +165,14 @@ impl GlobalTimelines {
match Timeline::load_timeline(&conf, ttid) { match Timeline::load_timeline(&conf, ttid) {
Ok(timeline) => { Ok(timeline) => {
let tli = Arc::new(timeline); let tli = Arc::new(timeline);
let mut shared_state = tli.write_shared_state().await;
TIMELINES_STATE TIMELINES_STATE
.lock() .lock()
.unwrap() .unwrap()
.timelines .timelines
.insert(ttid, tli.clone()); .insert(ttid, tli.clone());
tli.bootstrap( tli.bootstrap(
&mut shared_state,
&conf, &conf,
broker_active_set.clone(), broker_active_set.clone(),
partial_backup_rate_limiter.clone(), partial_backup_rate_limiter.clone(),
@@ -213,6 +215,7 @@ impl GlobalTimelines {
match Timeline::load_timeline(&conf, ttid) { match Timeline::load_timeline(&conf, ttid) {
Ok(timeline) => { Ok(timeline) => {
let tli = Arc::new(timeline); let tli = Arc::new(timeline);
let mut shared_state = tli.write_shared_state().await;
// TODO: prevent concurrent timeline creation/loading // TODO: prevent concurrent timeline creation/loading
{ {
@@ -227,8 +230,13 @@ impl GlobalTimelines {
state.timelines.insert(ttid, tli.clone()); state.timelines.insert(ttid, tli.clone());
} }
tli.bootstrap(&conf, broker_active_set, partial_backup_rate_limiter); tli.bootstrap(
&mut shared_state,
&conf,
broker_active_set,
partial_backup_rate_limiter,
);
drop(shared_state);
Ok(tli) Ok(tli)
} }
// If we can't load a timeline, it's bad. Caller will figure it out. // If we can't load a timeline, it's bad. Caller will figure it out.

View File

@@ -17,7 +17,9 @@ use std::time::Duration;
use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr; use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr;
use postgres_ffi::XLogFileName; use postgres_ffi::XLogFileName;
use postgres_ffi::{XLogSegNo, PG_TLI}; use postgres_ffi::{XLogSegNo, PG_TLI};
use remote_storage::{GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata}; use remote_storage::{
DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata,
};
use tokio::fs::File; use tokio::fs::File;
use tokio::select; use tokio::select;
@@ -503,8 +505,12 @@ pub async fn read_object(
let cancel = CancellationToken::new(); let cancel = CancellationToken::new();
let opts = DownloadOpts {
byte_start: std::ops::Bound::Included(offset),
..Default::default()
};
let download = storage let download = storage
.download_storage_object(Some((offset, None)), file_path, &cancel) .download(file_path, &opts, &cancel)
.await .await
.with_context(|| { .with_context(|| {
format!("Failed to open WAL segment download stream for remote path {file_path:?}") format!("Failed to open WAL segment download stream for remote path {file_path:?}")

Some files were not shown because too many files have changed in this diff Show More