Compare commits

..

81 Commits

Author SHA1 Message Date
Christian Schwarz
5d6254cbb7 use tokio-epoll-uring impl 2025-05-09 10:58:00 +02:00
Christian Schwarz
8c809e3586 Merge remote-tracking branch 'origin/main' into problame/fallocate 2025-05-09 10:37:13 +02:00
Conrad Ludgate
bef5954fd7 feat(proxy): track SNI usage by protocol, including for http (#11863)
## Problem

We want to see how many users of the legacy serverless driver are still
using the old URL for SQL-over-HTTP traffic.

## Summary of changes

Adds a protocol field to the connections_by_sni metric. Ensures it's
incremented for sql-over-http.
2025-05-08 16:46:57 +00:00
Christian Schwarz
8477d15f95 feat(direct IO): remove special case in test suite for compat tests (#11864)
PR
- https://github.com/neondatabase/neon/pull/11558
adds special treatment for compat snapshot binaries which don't
understand the `direct-rw` mode.

A new compat snapshot has been published since, so,
we can remove the special case.

refs:
- fixes https://github.com/neondatabase/neon/issues/11598
2025-05-08 16:11:45 +00:00
Arpad Müller
622b3b2993 Fixes for enabling --timelines-onto-safekeepers in tests (#11854)
Second PR with fixes extracted from #11712, relating to
`--timelines-onto-safekeepers`. Does the following:

* Moves safekeeper registration to `neon_local` instead of the test
fixtures
* Pass safekeeper JWT token if `--timelines-onto-safekeepers` is enabled
* Allow some warnings related to offline safekeepers (similarly to how
we allow them for offline pageservers)
* Enable generations on the compute's config if
`--timelines-onto-safekeepers` is enabled
* fix parallel `pull_timeline` race condition (the one that #11786 put
for later)

Fixes #11424
Part of #11670
2025-05-08 15:13:11 +00:00
Santosh Pingale
659366060d Reuse remote_client from the SnapshotDownloader instead of recreating in download function (#11812)
## Problem
At the moment, remote_client and target are recreated in download
function. We could reuse it from SnapshotDownloader instance. This isn't
a problem per se, just a quality of life improvement but it caught my
attention when we were trying out snapshot downloading in one of the
older version and ran into a curious case of s3 clients behaving in two
different manners. One client that used `force_path_style` and other one
didn't.

**Logs from this run:**
```
2025-05-02T12:56:22.384626Z DEBUG /data/snappie/2739e7da34e625e3934ef0b76fa12483/timelines/d44b831adb0a6ba96792dc3a5cc30910/000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014E8F20-00000000014E8F99-00000001 requires download...
2025-05-02T12:56:22.384689Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:apply_configuration: timeout settings for this operation: TimeoutConfig { connect_timeout: Set(3.1s), read_timeout: Disabled, operation_timeout: Disabled, operation_attempt_timeout: Disabled }
2025-05-02T12:56:22.384730Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op: entering 'serialization' phase
2025-05-02T12:56:22.384784Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op: entering 'before transmit' phase
2025-05-02T12:56:22.384813Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op: retry strategy has OKed initial request
2025-05-02T12:56:22.384841Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op: beginning attempt #1
2025-05-02T12:56:22.384870Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: resolving endpoint endpoint_params=EndpointResolverParams(TypeErasedBox[!Clone]:Params { bucket: Some("bucket"), region: Some("eu-north-1"), use_fips: false, use_dual_stack: false, endpoint: Some("https://s3.self-hosted.company.com"), force_path_style: false, accelerate: false, use_global_endpoint: false, use_object_lambda_endpoint: None, key: None, prefix: Some("/pageserver/tenants/2739e7da34e625e3934ef0b76fa12483/timelines/d44b831adb0a6ba96792dc3a5cc30910/000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000014E8F20-00000000014E8F99-00000001"), copy_source: None, disable_access_points: None, disable_multi_region_access_points: false, use_arn_region: None, use_s3_express_control_endpoint: None, disable_s3_express_session_auth: None }) endpoint_prefix=None
2025-05-02T12:56:22.384979Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: will use endpoint Endpoint { url: "https://neon.s3.self-hosted.company.com", headers: {}, properties: {"authSchemes": Array([Object({"signingRegion": String("eu-north-1"), "disableDoubleEncoding": Bool(true), "name": String("sigv4"), "signingName": String("s3")})])} }
2025-05-02T12:56:22.385042Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt:lazy_load_identity:provide_credentials{provider=default_chain}: loaded credentials provider=Environment
2025-05-02T12:56:22.385066Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt:lazy_load_identity: identity cache miss occurred; added new identity (took 35.958µs) new_expiration=2025-05-02T13:11:22.385028Z valid_for=899.999961437s partition=IdentityCachePartition(5)
2025-05-02T12:56:22.385090Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: loaded identity
2025-05-02T12:56:22.385162Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: entering 'transmit' phase
2025-05-02T12:56:22.385211Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: new TCP connector created in 361ns
2025-05-02T12:56:22.385288Z DEBUG resolving host="neon.s3.self-hosted.company.com"
2025-05-02T12:56:22.390796Z DEBUG invoke{service=s3 operation=ListObjectVersions sdk_invocation_id=7315885}:try_op:try_attempt: encountered orchestrator error; halting
```
2025-05-08 14:09:15 +00:00
Christian Schwarz
42d93031a1 fixup(#11819): broken macOS build (#11861)
refs
- fixes https://github.com/neondatabase/neon/issues/11860
2025-05-08 11:48:29 +00:00
Mark Novikov
d22377c754 Skip event triggers in dump-restore (#11794)
## Problem

Data import fails if the src db has any event triggers, because those
can only be restored by a superuser. Specifically imports from Heroku
and Supabase are guaranteed to fail.

Closes https://github.com/neondatabase/cloud/issues/27353

## Summary of changes

Depends on `pg_dump` patches per each supported PostgreSQL version:
- https://github.com/neondatabase/postgres/pull/630
- https://github.com/neondatabase/postgres/pull/629
- https://github.com/neondatabase/postgres/pull/627
- https://github.com/neondatabase/postgres/pull/628
2025-05-08 11:04:28 +00:00
Erik Grinaker
6c70789cfd storcon: increase drain+fill secondary warmup timeout from 20 to 30 seconds (#11848)
## Problem

During deployment drains/fills, we often see the storage controller
giving up on warmups after 20 seconds, when the warmup is nearly
complete (~90%). This can cause latency spikes for migrated tenants if
they block on layer downloads.

Touches https://github.com/neondatabase/cloud/issues/26193.

## Summary of changes

Increase the drain and fill secondary warmup timeout from 20 to 30
seconds.
2025-05-08 10:14:41 +00:00
Dmitrii Kovalkov
7e55497e13 tests: flush wal before waiting for last record lsn (#11726)
## Problem
Compute may flush WAL on page boundaries, leaving some records partially
flushed for a long time.
It leads to `wait_for_last_flush_lsn` stuck waiting for this partial
LSN.
- Closes: https://github.com/neondatabase/cloud/issues/27876

## Summary of changes
- Flush WAL via CHECKPOINT after requesting current_wal_lsn to make sure
that the record we point to is flushed in full
- Use proper endpoint in
`test_timeline_detach_with_aux_files_with_detach_v1`
2025-05-08 10:00:45 +00:00
Vlad Lazar
40f32ea326 pageserver: refactor import flow and add job concurrency limiting (#11816)
## Problem

Import code is one big block. Separating planning and execution will
help with reporting
progress of import to storcon (building block for resuming import).

## Summary of changes

Split up the import into planning and execution.
A concurrency limit driven by PS config is also added.
2025-05-08 09:19:14 +00:00
Christian Schwarz
1d1502bc16 fix(pageserver): flush task cancelled errors during timeline shutdown (#11853)
# Refs
- fixes https://github.com/neondatabase/neon/issues/11762

# Problem

PR #10993 introduced internal retries for BufferedWriter flushes.
PR #11052 added cancellation sensitivity to that retry loop.
That cancellation sensitivity is an error path that didn't exist before.

The result is that during timeline shutdown, after we
`Timeline::cancel`, compaction can now fail with error `flush task
cancelled`.
The problem with that:
1. We mis-classify this as an `error!`-worthy event.
2. This causes tests to become flaky because the error is not in global
`allowed_errors`.

Technically we also trip the `compaction_circuit_breaker` because the
resulting `CompactionError` is variant `::Other`.
But since this is Timeline shutdown, is doesn't matter practically
speaking.

# Solution / Changes

- Log the anyhow stack trace when classifying a compaction error as
`error!`.
  This was helpful to identify sources of `flush task cancelled` errors.
We only log at `error!` level in exceptional circumstances, so, it's ok
to have bit verbose logs.
- Introduce typed errors along the `BufferedWriter::write_*`=>
`BlobWriter::write_blob`
=> `{Delta,Image}LayerWriter::put_*` =>
`Split{Delta,Image}LayerWriter::put_{value,image}` chain.
- Proper mapping to `CompactionError`/`CreateImageLayersError` via new
`From` impls.

I am usually opposed to any magic `From` impls, but, it's how most of
the compaction code
works today.

# Testing

The symptoms are most prevalent in
`test_runner/regress/test_branch_and_gc.py::test_branch_and_gc`.
Before this PR, I was able to reproduce locally 1 or 2 times per 400
runs using
`DEFAULT_PG_VERSION=15 BUILD_TYPE=release poetry run pytest --count 400
-n 8`.
After this PR, it doesn't reproduce anymore after 2000 runs.

# Future Work

Technically the ingest path is also exposed to this new source of errors
because `InMemoryLayer` is backed by `BufferedWriter`.
But we haven't seen it occur in flaky tests yet.
Details and a fix in
- https://github.com/neondatabase/neon/pull/11851
2025-05-08 06:57:53 +00:00
Christian Schwarz
7eb85c56ac tokio-epoll-uring: avoid warn! noise due to ECANCELED during shutdowns (#11819)
# Problem

Before this PR, `test_pageserver_catchup_while_compute_down` would
occasionally fail due to scary-looking WARN log line

```
WARN ephemeral_file_buffered_writer{...}:flush_attempt{attempt=1}: \
 error flushing buffered writer buffer to disk, retrying after backoff err=Operation canceled (os error 125)
```

After lengthy investigation, the conclusion is that this is likely due
to a kernel bug related due to io_uring async workers (io-wq) and
signals.
The main indicator is that the error only ever happens in correlation
with pageserver shtudown when SIGTERM is received.
There is a fix that is merged in 6.14
kernels (`io-wq: backoff when retrying worker creation`).
However, even when I revert that patch, the issue is not reproducible
on 6.14, so, it remains a speculation.

It was ruled out that the ECANCELED is due to the executor thread
exiting before the async worker starts processing the operation.

# Solution

The workaround in this issue is to retry the operation on ECANCELED
once.
Retries are safe because the low-level io_engine operations are
idempotent.
(We don't use O_APPEND and I can't think of another flag that would make
 the APIs covered by this patch not idempotent.)

# Testing

With this PR, the warn! log no longer happens on [my reproducer
setup](https://github.com/neondatabase/neon/issues/11446#issuecomment-2843015111).
And the new rate-limited `info!`-level log line informing about the
internal retry shows up instead, as expected.

# Refs
- fixes https://github.com/neondatabase/neon/issues/11446
2025-05-08 06:33:29 +00:00
Dmitrii Kovalkov
24d62c647f storcon: add missing switch_timeline_membership method to sk client (#11850)
## Problem

`switch_timeline_membership` is implemented on safekeeper's server side,
but the is missing in the client.

- Part of https://github.com/neondatabase/neon/issues/11823

## Summary of changes
- Add `switch_timeline_membership` method to `SafekeeperClient`
2025-05-07 17:00:41 +00:00
Shockingly Good
4d2e4b19c3 fix(compute) Correct the PGXN s3 gateway URL. (#11796)
Corrects the postgres extension s3 gateway address to
be not just a domain name but a full base URL.

To make the code more readable, the option is renamed
to "remote_ext_base_url", while keeping the old name
also accessible by providing a clap argument alias.

Also provides a very simple and, perhaps, even redundant
unit test to confirm the logic behind parsing of the
corresponding CLI argument.

## Problem

As it is clearly stated in
https://github.com/neondatabase/cloud/issues/26005, using of the short
version of the domain name might work for now, but in the future, we
should get rid of using the `default` namespace and this is where it
will, most likely, break down.

## Summary of changes

The changes adjust the domain name of the extension s3 gateway to use
the proper base url format instead of the just domain name assuming the
"default" namespace and add a new CLI argument name for to reflect the
change and the expectance.
2025-05-07 16:34:08 +00:00
Alexey Kondratov
0691b73f53 fix(compute): Enforce cloud_admin role in compute_ctl connections (#11827)
## Problem

Users can override some configuration parameters on the DB level with
`ALTER DATABASE ... SET ...`. Some of these overrides, like `role` or
`default_transaction_read_only`, affect `compute_ctl`'s ability to
configure the DB schema properly.

## Summary of changes

Enforce `role=cloud_admin`, `statement_timeout=0`, and move
`default_transaction_read_only=off` override from control plane [1] to
`compute_ctl`. Also, enforce `search_path=public` just in case, although
we do not call any functions in user databases.

[1]:
133dd8c4db/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go (L70)

Fixes https://github.com/neondatabase/cloud/issues/28532
2025-05-07 12:14:24 +00:00
Vlad Lazar
3cf5e1386c pageserver: fix rough edges of pageserver tracing (#11842)
## Problem

There's a few rough edges around PS tracing.

## Summary of changes

* include compute request id in pageserver trace
* use the get page specific context for GET_REL_SIZE and GET_BATCH
* fix assertion in download layer trace


![image](https://github.com/user-attachments/assets/2ff6779c-7c2d-4102-8013-ada8203aa42f)
2025-05-07 10:13:26 +00:00
Alex Chi Z.
608afc3055 fix(scrubber): log download error (#11833)
## Problem

We use `head_object` to determine whether an object exists or not.
However, it does not always error due to a missing object.

## Summary of changes

Log the error so that we can have a better idea what's going on with the
scrubber errors in prod.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-05-07 09:21:17 +00:00
Tristan Partin
0ef6851219 Make the audience claim in compute JWTs a vector (#11845)
According to RFC 7519, `aud` is generally an array of StringOrURI, but
in special cases may be a single StringOrURI value. To accomodate future
control plane work where a single token may work for multiple services,
make the claim a vector.

Link: https://www.rfc-editor.org/rfc/rfc7519#section-4.1.3

Signed-off-by: Tristan Partin <tristan@neon.tech>
2025-05-06 22:19:15 +00:00
Mikhail
5c356c63eb endpoint_storage compute_ctl integration (#11550)
Add `/lfc/(prewarm|offload)` routes to `compute_ctl` which interact with
endpoint storage.

Add `prewarm_lfc_on_startup` spec option which, if enabled, downloads
LFC prewarm data on compute startup.

Resolves: https://github.com/neondatabase/cloud/issues/26343
2025-05-06 22:02:12 +00:00
Suhas Thalanki
384e3df2ad fix: pinned anon extension to v2.1.0 (#11844)
## Problem

Currently the setup for `anon` v2 in the compute image downloads the
latest version of the extension. This can be problematic as on a compute
start/restart it can download a version that is newer than what we have
tested and potentially break things, hence not giving us the ability to
control when the extension is updated.

We were also using `v2.2.0`, which is not ready for production yet and
has been clarified by the maintainer.

Additional context:
https://gitlab.com/dalibo/postgresql_anonymizer/-/issues/530

## Summary of changes

Changed the URL from which we download the `anon` extension to point to
`v2.1.0` instead of `latest`.
2025-05-06 21:52:15 +00:00
Tristan Partin
f9b3a2e059 Add scoping to compute_ctl JWT claims (#11639)
Currently we only have an admin scope which allows a user to bypass the
compute_id check. When the admin scope is provided, validate the
audience of the JWT to be "compute".

Closes: https://github.com/neondatabase/cloud/issues/27614

Signed-off-by: Tristan Partin <tristan@neon.tech>
2025-05-06 19:51:10 +00:00
Jakub Kołodziejczak
79ee78ea32 feat(compute): enable audit logs for pg_session_jwt extension (#11829)
related to https://github.com/neondatabase/cloud/issues/28480
related to https://github.com/neondatabase/pg_session_jwt/pull/36

cc @MihaiBojin @conradludgate @lneves12
2025-05-06 15:18:50 +00:00
Erik Grinaker
0e0ad073bf storcon: fix split aborts removing other tenants (#11837)
## Problem

When aborting a split, the code accidentally removes all other tenant
shards from the in-memory map that have the same shard count as the
aborted split, causing "tenant not found" errors. It will recover on a
storcon restart, when it loads the persisted state. This issue has been
present for at least a year.

Resolves https://github.com/neondatabase/cloud/issues/28589.

## Summary of changes

Only remove shards belonging to the relevant tenant when aborting a
split.

Also adds a regression test.
2025-05-06 13:57:34 +00:00
Alex Chi Z.
6827f2f58c fix(pageserver): only keep iter_with_options API, improve docs in gc-compact (#11804)
## Problem

Address comments in https://github.com/neondatabase/neon/pull/11709

## Summary of changes

- remove `iter` API, users always need to specify buffer size depending
on the expected memory usage.
- several doc improvements

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
Co-authored-by: Christian Schwarz <christian@neon.tech>
2025-05-06 12:27:16 +00:00
Peter Bendel
c82e363ed9 cleanup orphan projects created by python tests, too (#11836)
## Problem

- some projects are created during GitHub workflows but not by action
project_create but by python test scripts.

If the python test fails the project is not deleted

## Summary of changes

- make sure we cleanup those python created projects a few days after
they are no longer used, too
2025-05-06 12:26:13 +00:00
Alexander Bayandin
50dc2fae77 compute-node.Dockerfile: remove layer with duplicated name (#11807)
## Problem

Two `rust-extensions-build-pgrx14` layers were added independently in
two different PRs, and the layers are exactly the same

## Summary of changes
- Remove one of `rust-extensions-build-pgrx14` layers
2025-05-06 10:52:21 +00:00
Folke Behrens
62ac5b94b3 proxy: Include the exp/nbf timestamps in the errors (#11828)
## Problem

It's difficult to tell when the JWT expired from current logs and error
messages.

## Summary of changes

Add exp/nbf timestamps to the respective error variants.
Also use checked_add when deserializing a SystemTime from JWT.

Related to INC-509
2025-05-06 09:28:25 +00:00
Konstantin Knizhnik
f0e7b3e0ef Use unlogged build for gist_indexsortbuild_flush_ready_pages (#11753)
## Problem

See https://github.com/neondatabase/neon/issues/11718

GIST index can be constructed in two ways: GIST_SORTED_BUILD and
GIST_BUFFERING.
We used unlogged build in the second case but not in the first.

## Summary of changes

Use unlogged build in `gist_indexsortbuild_flush_ready_pages`

Correspondent Postgres PRsL:
https://github.com/neondatabase/postgres/pull/624
https://github.com/neondatabase/postgres/pull/625
https://github.com/neondatabase/postgres/pull/626

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
Co-authored-by: Heikki Linnakangas <heikki@neon.tech>
2025-05-06 07:24:27 +00:00
Dmitrii Kovalkov
c6ff18affc cosmetics(pgxn/neon): WP code small clean up (#11824)
## Problem
Some small cosmetic changes I made while reading the code. Should not
affect anything.

## Summary of changes
- Remove `n_votes` field because it's not used anymore
- Explicitly initialize `safekeepers_generation` with
`INVALID_GENERATION` if the generation is not present (the struct is
zero-initialized anyway, but the explicit initialization is better IMHO)
- Access SafekeeperId via pointer `sk_id` created above
2025-05-06 06:51:51 +00:00
Heikki Linnakangas
16ca74a3f4 Add SAFETY comment on libc::sysconf() call (#11581)
I got an 'undocumented_unsafe_blocks' clippy warning about it. Not sure
why I got the warning now and not before, but in any case a comment is a
good idea.
2025-05-06 06:49:23 +00:00
Peter Bendel
cb67f9a651 delete orphan left over projects (#11826)
## Problem

sometimes our benchmarking GitHub workflow is terminated by side-effects
beyond our control (e.g. GitHub runner looses connection to server) and
then we have left-over Neon projects created during the workflow

[Example where GitHub runner lost connection and project was not
deleted](https://github.com/neondatabase/neon/actions/runs/14017400543/job/39244816485)

Fixes https://github.com/neondatabase/cloud/issues/28546

## Summary of changes

- Add a cleanup step that cleans up left-over projects
- also give each project created during workflows a name that references
the testcase and GitHub runid

## Example run (test of new job steps)


https://github.com/neondatabase/neon/actions/runs/14837092399/job/41650741922#step:6:63

---------

Co-authored-by: a-masterov <72613290+a-masterov@users.noreply.github.com>
2025-05-05 14:30:13 +00:00
devin-ai-integration[bot]
baf425a2cd [pageserver/virtual_file] impr: Improve OpenOptions API ergonomics (#11789)
# Improve OpenOptions API ergonomics

Closes #11787

This PR improves the OpenOptions API ergonomics by:

1. Making OpenOptions methods take and return owned Self instead of &mut
self
2. Changing VirtualFile::open_with_options_v2 to take an owned
OpenOptions
3. Removing unnecessary .clone() and .to_owned() calls

These changes make the API more idiomatic Rust by leveraging the builder
pattern with owned values, which is cleaner and more ergonomic than the
previous approach.

Link to Devin run:
https://app.devin.ai/sessions/c2a4b24f7aca40a3b3777f4259bf8ee1
Requested by: christian@neon.tech

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: christian@neon.tech <christian@neon.tech>
2025-05-05 13:06:37 +00:00
Alex Chi Z.
0b243242df fix(test): allow flush error in gc-compaction tests (#11822)
## Problem

Part of https://github.com/neondatabase/neon/issues/11762

## Summary of changes

While #11762 needs some work to refactor the error propagating thing, we
can do a hacky fix for the gc-compaction tests to allow flush error
during shutdown. It does not affect correctness.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-05-05 12:15:22 +00:00
Conrad Ludgate
6131d86ec9 proxy: allow invalid SNI (#11792)
## Problem

Some PrivateLink customers are unable to use Private DNS. As such they
use an invalid domain name to address Neon. We currently are rejecting
those connections because we cannot resolve the correct certificate.

## Summary of changes

1. Ensure a certificate is always returned.
2. If there is an SNI field, use endpoint fallback if it doesn't match.

I suggest reviewing each commit separately.
2025-05-05 11:18:55 +00:00
Konstantin Knizhnik
4b9087651c Checked that stored LwLSN >= FirstNormalUnloggedLSN (#11750)
## Problem

Undo unintended change 60b9fb1baf

## Summary of changes

Add assert that we are not storing fake LSN in LwLSN.

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-05-02 19:27:59 +00:00
Konstantin Knizhnik
79699aebc8 Reserve in file descriptor pool sockets used for connections to page servers (#11798)
## Problem

See https://github.com/neondatabase/neon/issues/11790

The neon extension opens extensions to the pageservers, which consumes
file descriptors. Postgres has a mechanism to count how many FDs are in
use, but it doesn't know about those FDs. We should call
ReserveExternalFD() or AcquireExternalFD() to account for them.

## Summary of changes

Call `ReserveExternalFD()` for each shard

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
Co-authored-by: Mikhail Kot <mikhail@neon.tech>
2025-05-02 14:36:10 +00:00
Alexander Bayandin
22290eb7ba CI: notify relevant team about release deploy failures (#11797)
## Problem

We notify only Storage team about failed deploys, but Compute and Proxy
teams can also benefit from that

## Summary of changes
- Adjust `notify-storage-release-deploy-failure` to notify the relevant
team about failed deploy
2025-05-02 12:46:21 +00:00
Alex Chi Z.
bbc35e10b8 fix(test): increase timeouts for some tests (#11781)
## Problem

Those tests are timing out more frequently after
https://github.com/neondatabase/neon/pull/11585

## Summary of changes

Increase timeout for `test_pageserver_gc_compaction_smoke`

Increase rollback wait timeout for `test_tx_abort_with_many_relations`

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-05-01 18:36:26 +00:00
Alex Chi Z.
ae2c3ac12f test: revert relsizev2 config (#11759)
## Problem

part of https://github.com/neondatabase/neon/issues/9516

One thing I realized in the past few months is that "no-way-back" things
like this are scary to roll out without a fine-grained rollout infra.
The plan was to flip the flag in the repo and roll it out soon, but I
don't think rolling out would happen in the near future. So I'd rather
revert the flag to avoid creating a discrepancy between staging and the
regress tests.

## Summary of changes

Not using rel_size_v2 by default in unit tests; we still have a few
tests to explicitly test the new format so we still get some test
coverages.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-05-01 17:51:10 +00:00
Vlad Lazar
16d594b7b3 pagectl: list layers for given key in decreasing LSN order (#11799)
Adds an extra key CLI arg to `pagectl layer list-layer`. When provided,
only layers with key ranges containing the key will be listed in
decreasing LSN order (indices are preserved for `dump-layer`).
2025-05-01 15:56:43 +00:00
Suhas Thalanki
f999632327 Adding anon v2 support to the dockerfile (#11313)
## Problem

Removed `anon` v1 support as described here:
https://github.com/neondatabase/cloud/issues/22663

Adding `anon` v2 support to re-introduce the `pg_anon` extension. 
Related Issues: https://github.com/neondatabase/cloud/issues/20456


## Summary of changes

Adding `anon` v2 support by building it in the dockerfile
2025-05-01 15:22:01 +00:00
Shockingly Good
5bd850d15a Fix the leaked tracing context for the "compute_monitor:run". (#11791)
Removes the leaked tracing context for the "compute_monitor:run" log,
which either inherited the "start_compute" span or also the HTTP request
context.

## Problem

The problem is that the context of the monitor's trace is unnecessarily
populated with the span data inherited from previously within the same
thread.

## Summary of changes

The context is completely reset by moving the span from the thread
spawning the monitor into the thread where the monitor will actually
start working.

Addresses https://github.com/neondatabase/cloud/issues/28145

## Examples

### Before
```
2025-04-30T16:39:05.840298Z  INFO start_compute:compute_monitor:run: compute is not running, waiting before monitoring activity
```

### After

```
2025-04-30T16:39:05.840298Z  INFO compute_monitor:run: compute is not running, waiting before monitoring activity
```
2025-05-01 09:09:10 +00:00
Dmitrii Kovalkov
1b789e8d7c fix(pgxn/neon): Use proper member size in TermsCollectedMset and VotesCollectedMset (#11785)
## Problem
`TermsCollectedMset` and `VotesCollectedMset` accept a MemberSet
argument to find a quorum in. It may be either `wp->mconf.members` or
`wp->mconf.new_members`. But the loops inside always use
`wp->mconf.members.len`.

If the sizes of member sets are different, it may lead to these
functions not scanning all the safekeepers from `mset`.

We are not planning to change the member set size dynamically now, but
it's worth fixing anyway.

- Part of https://github.com/neondatabase/neon/issues/11669

## Summary of changes
- Use proper size of member set in `TermsCollectedMset` and
`VotesCollectedMset`
2025-04-30 16:50:21 +00:00
Arpad Müller
bec7427d9e pull_timeline and sk logging fixes (#11786)
This patch contains some fixes of issues I ran into for #11712:

* make `pull_timeline` return success for timeline that already exists.
This follows general API design of storage components: API endpoints are
retryable and converge to a status code, instead of starting to error.
We change the `pull_timeline`'s return type a little bit, because we
might not actually have a source sk to pull from. Note that the fix is
not enough, there is still a race when two `pull_timeline` instances
happen in parallel: we might try to enter both pulled timelines at the
same time. That can be fixed later.
* make `pull_timeline` support one safekeeper being down. In general, if
one safekeeper is down, that's not a problem. the added comment explains
a potential situation (found in the `test_lagging_sk` test for example)
* don't log very long errors when computes try to connect to safekeepers
that don't have the timeline yet, if `allow_timeline_creation` is false.
That flag is enabled when a sk connection string with generation numbers
is passed to the compute, so we'll hit this code path more often. E.g.
when a safekeeper missed a timeline creation, but the compute connects
to it first before the `pull_timeline` gets requested by the storcon
reconciler: this is a perfectly normal situation. So don't log the whole
error backtrace, and don't log it on the error log level, but only on
info.

part of #11670
2025-04-30 16:24:01 +00:00
Alex Chi Z.
e2db76b9be feat(pageserver): ondemand download reason observability (#11780)
## Problem

Part of https://github.com/neondatabase/neon/issues/11615

## Summary of changes

We don't understand the root cause of why we get resident size surge
every now and then. This patch adds observability for that, and in the
next week, we might have a better understanding of what's going on.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-04-30 16:04:00 +00:00
Alex Chi Z.
6b4b8e0d8b fix(pageserver): do not increase basebackup err counter when shutdown (#11778)
## Problem

We occasionally see basebackup errors alerts but there were no errors
logged. Looking at the code, the only codepath that will cause this is
shutting down.

## Summary of changes

Do not increase any counter (ok/err) when basebackup request gets
cancelled due to shutdowns.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-04-30 15:50:12 +00:00
Konstantin Knizhnik
1d68577fbd Check target slot state in prefetch_wait_for (#11779)
## Problem

See https://neondb.slack.com/archives/C04DGM6SMTM/p1745599814030679

Assume the following scenario: prefetch_wait_for is doing
`CHECK_FOR_INTERRUPTS` which tries to load prefetch responses.
In case of error is calls pageserver_disconnect which aborts all
in-flight requests. But such failure is not detected by
`prefetch_wait_for` which returns true. As a result
`communicator_read_at_lsnv` assumes that slot is received, but as far as
asserts are disables at prod, it is not actually checked.
Then it tries to interpret response and ... *SIGSEGV*

## Summary of changes

Check target slot state  in `prefetch_wait_for`.

Resolves https://github.com/neondatabase/cloud/issues/28258

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-04-30 12:44:59 +00:00
Arseny Sher
60f63c076f Make safekeeper proto version 3 default (#11518)
## Problem

We have been running compute <-> sk protocol version 3 for a while on
staging with no issues observed, and want to fully migrate to it
eventually.

## Summary of changes

Let's make v3 the default.

ref https://github.com/neondatabase/neon/issues/10326

---------

Co-authored-by: Arpad Müller <arpad@neon.tech>
2025-04-30 12:23:20 +00:00
Mikhail Kot
8da4ec9740 Postgres metrics for stuck getpage requests (#11710)
https://github.com/neondatabase/neon/issues/10327
Resolves: #11720 

New metrics:
- `compute_getpage_stuck_requests_total`
- `compute_getpage_max_inflight_stuck_time_ms`
2025-04-30 12:01:41 +00:00
Em Sharnoff
b48404952d Bump vm-builder: v0.42.2 -> v0.46.0 (#11782)
Bumped to pick up the changes from neondatabase/autoscaling#1366 —
specifically including `uname` in the logs.

Other changes included:

* neondatabase/autoscaling#1301
* neondatabase/autoscaling#1296
2025-04-30 11:32:25 +00:00
devin-ai-integration[bot]
1d06172d59 pageserver: remove resident size from billing metrics (#11699)
This is a rebase of PR #10739 by @henryliu2014 on the current main
branch.

## Problem

pageserver: remove resident size from billing metrics

Fixes #10388

## Summary of changes

The following changes have been made to remove resident size from
billing metrics:

* removed the metric "resident_size" and related codes in
consumption_metrics/metrics.rs
* removed the item of the description of metric "resident_size" in
consumption_metrics.md
* refactored the metric "resident_size" related test case

Requested by: John Spray (john@neon.tech)

---------

Co-authored-by: liuheqing <hq.liu@qq.com>
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: John Spray <john@neon.tech>
2025-04-29 18:34:56 +00:00
Elizabeth Murray
a08c1a23eb Upgrade the pgrag version in the compute Dockerfile. (#11687)
Update the compute Dockerfile to use a new version of pgrag. The new
version of pgrag uses the latest pgrx, and has a fix that terminates
background workers on postmaster exit.
2025-04-29 16:50:18 +00:00
John Spray
a2adc7dbd3 storcon: avoid multiple initdbs when shard 0 has stale locations (#11760)
## Problem

In #11727 I overlooked the case of multiple attached locations for shard
0.

I misread the code and thought `create_one` acts on one location, but it
actually acts on one _shard_, which is potentially multiple locations.

This was not a regression, but it meant that the fix was incomplete.

## Summary of changes

- In `create_one`, when updating shard zero, have any "other" locations
use the initdb from shard 0
2025-04-29 15:31:52 +00:00
Vlad Lazar
768a580373 pageserver: add not modified since lsn to get page span (#11774)
It's useful when debugging.
2025-04-29 14:07:23 +00:00
Folke Behrens
09247de8d5 proxy: Enable JSON logging by default (#11772)
This does not affect local_proxy.
2025-04-29 13:11:24 +00:00
Arpad Müller
0b35929211 Make SafekeeperReconciler parallel via semaphore (#11757)
Right now we only support running one reconciliation per safekeeper.
This is of course usually way below of what a safekeeper can do.
Therefore, introduce a semaphore and spawn the tasks asynchronously as
they come in.

Part of #11670
2025-04-29 12:46:15 +00:00
Ivan Efremov
b3db7f66ac fix(compute): Change the local_proxy log level (#11770)
Related to the INC-496
2025-04-29 11:49:16 +00:00
Christian Schwarz
6146a82771 incorporate findings from research 2025-04-29 13:27:39 +02:00
a-masterov
498d852bde Fix the empty region if run on schedule (#11764)
## Problem

When the workflow ran on a schedule, the `region_id` input was not set.
As a result, an empty region value was used, which caused errors during
execution.

## Summary of Changes

- Added fallback logic to set a default region (`aws-us-east-2`) when
`region_id` is not provided.
- Ensures the workflow works correctly both when triggered manually
(`workflow_dispatch`) and on schedule (`cron`).
2025-04-29 09:12:14 +00:00
Busra Kugler
7f8b1d79c0 Replace dorny/paths-filter with step-security maintained version (#11663)
## Problem
Our CI/CD security tool StepSecurity maintains safer forks of popular
GitHub Actions with low security scores. We're replacing
dorny/paths-filter with the maintained step-security/paths-filter
version to reduce risk of supply chain breaches and potential CVEs.

## Summary of changes
replace
```uses: dorny/paths-filter@de90cc6fb3 ```  with ```uses: step-security/paths-filter@v3```

This PR will fix: neondatabase/cloud#26141
2025-04-29 09:02:01 +00:00
JC Grünhage
d15f2ff57a fix(lint-release-pr): adjust lint and action to match (#11766)
## Problem
The `lint-release-pr` workflow run for
https://github.com/neondatabase/neon/pull/11763 failed, because the new
action did not match the lint.

## Summary of changes
Include time in expected merge message regex.
2025-04-29 08:56:44 +00:00
Konstantin Knizhnik
3593356c10 Prewarm sql api (#11742)
## Problem

Continue work on prewarm, see 
https://github.com/neondatabase/neon/pull/11740
https://github.com/neondatabase/neon/pull/11741

## Summary of changes

Add SQL API to prewarm

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-04-29 06:44:28 +00:00
Tristan Partin
9e8ab2ab4f Skip remote extensions WITH_LIB test when sanitizers are enabled (#11758)
In order for the test to work when sanitizers are enabled, we would need
to compile the dummy Postgres extension with the same sanitizer flags
that we compile Postgres and the neon extension with. Doing this work
would be a little more than trivial, so skipping is the best option, at
least for now.

Signed-off-by: Tristan Partin <tristan@neon.tech>
2025-04-28 19:13:35 +00:00
Alex Chi Z.
c1ff7db187 fix(pageserver): consider tombstones in replorigin (#11752)
## Problem

We didn't consider tombstones in replorigin read path in the past. This
was fine because tombstones are stored as LSN::Invalid before we
universally define what the tombstone is for sparse keyspaces.

Now we remove non-inherited keys during detach ancestor and write the
universal tombstone "empty image". So we need to consider it across all
the read paths.

related: https://github.com/neondatabase/neon/pull/11299

## Summary of changes

Empty value gets ignored for replorigin scans.

---------

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-04-28 18:54:26 +00:00
Konstantin Knizhnik
6d6b83e737 Prewarm implementation (#11741)
## Problem

Continue work on prewarm started in PR
https://github.com/neondatabase/neon/pull/11740

## Summary of changes

Implement prewarm using prefetch

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-04-28 18:17:03 +00:00
John Spray
0482690534 pageserver: make control_plane_api & generations fully mandatory (#10715)
## Problem

We had retained the ability to run in a generation-less mode to support
test_generations_upgrade, which was replaced with a cleaner backward
compat test in https://github.com/neondatabase/neon/pull/10701

## Summary of changes

- Remove all the special cases for "if no generation" or "if no control
plane api"
- Make control_plane_api config mandatory

---------

Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
2025-04-28 17:24:55 +00:00
Tristan Partin
a750026c2e Fix compiler warning in libpagestore.c when WITH_SANITIZERS=yes (#11755)
Postgres has a nice self-documenting macro called pg_unreachable() when
you want to assert that a location in code won't be hit.

Warning in question:

```
/home/tristan957/Projects/work/neon//pgxn/neon/libpagestore.c: In function ‘pageserver_connect’:
/home/tristan957/Projects/work/neon//pgxn/neon/libpagestore.c:739:1: warning: control reaches end of non-void function [-Wreturn-type]
  739 | }
      | ^
```

Signed-off-by: Tristan Partin <tristan@neon.tech>
2025-04-28 17:09:48 +00:00
John Spray
998d2c2ce9 storcon: use shard 0's initdb for timeline creation (#11727)
## Problem

In princple, pageservers with different postgres binaries might generate
different initdbs, resulting in inconsistency between shards. To avoid
that, we should have shard 0 generate the initdb and other shards re-use
it.

Fixes: https://github.com/neondatabase/neon/issues/11340

## Summary of changes

- For shards with index greater than zero, set
`existing_initdb_timeline_id` in timeline creation to consume the
existing initdb rather than creating a new one
2025-04-28 16:43:35 +00:00
JC Grünhage
b1fa68f659 impr(ci): switch release PR creation over to use python based action (#11679)
## Problem
Our different repositories had both had code to achieve very similar
results in terms of release PR creation, but they were structured
differently and had different extensions. This was likely to cause
maintainability problems in the long run.

## Summary of changes
Switch to a python cli based composite action for creating the release
PRs that will also be introduced in our other repos later.

## To Do
- [ ] Adjust our docs to reflect the changes from this.
2025-04-28 16:37:36 +00:00
devin-ai-integration[bot]
84bc3380cc Remove SAFEKEEPER_AUTH_TOKEN env var parsing from safekeeper (#11698)
# Remove SAFEKEEPER_AUTH_TOKEN env var parsing from safekeeper

This PR is a follow-up to #11443 that removes the parsing of the
`SAFEKEEPER_AUTH_TOKEN` environment variable from the safekeeper
codebase while keeping the `auth_token_path` CLI flag functionality.

## Changes:
- Removed code that checks for the `SAFEKEEPER_AUTH_TOKEN` environment
variable
- Updated comments to reflect that only the `auth_token_path` CLI flag
is now used

As mentioned in PR #11443, the environment variable approach was planned
to be deprecated and removed in favor of the file-based approach, which
is more secure since environment variables can be quite public in both
procfs and unit files.

Link to Devin run:
https://app.devin.ai/sessions/d6f56cf1b4164ea9880a9a06358a58ac

Requested by: arpad@neon.tech

---------

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: arpad@neon.tech <arpad@neon.tech>
Co-authored-by: Arpad Müller <arpad-m@users.noreply.github.com>
2025-04-28 15:34:47 +00:00
Alex Chi Z.
11f6044338 fix(pageserver): report synthetic size = 1 if all tls offloaded (2) (#11731)
## Problem

https://github.com/neondatabase/neon/pull/11648 did this for resident
size instead of synthetic size.

## Summary of changes

Report synthetic_size == 1 if all timelines are offloaded.

Signed-off-by: Alex Chi Z <chi@neon.tech>
2025-04-28 13:45:45 +00:00
Konstantin Knizhnik
692c0f3fb8 Prepare to prewarm support (#11740)
## Problem

See
(original prewarm implementation)
https://github.com/neondatabase/neon/pull/9197
(functions for storing/restoring LFC state)
https://github.com/neondatabase/neon/pull/9587
(store prefetch results in LFC)
https://github.com/neondatabase/neon/pull/10442

## Summary of changes

Preparation for prewarm implementation.

---------

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-04-28 13:24:18 +00:00
Christian Schwarz
80a07f6319 WIP: fallocate files before writing 2025-04-28 15:18:17 +02:00
Alexander Bayandin
2b1d2a55d6 CI: fix typo oicd -> oidc (#11747)
## Problem

It's OIDC (OpenID Connect), not OICD

## Summary of changes
- Rename actions input `aws-oicd-role-arn` -> `aws-oidc-role-arn`
2025-04-28 12:44:28 +00:00
Konstantin Knizhnik
60b9fb1baf Ignore unlogged LSNs in set last written LSN (#11743)
## Problem

See https://github.com/neondatabase/neon/issues/11718
and https://neondb.slack.com/archives/C033RQ5SPDH/p1745122797538509

GIST other indexes performing "unlogged build" are using so called fake
LSNs - not a real LSN, but something like 0/1. Been stored in lwlsn
cache they cause incorrect lookup at PS.

## Summary of changes

Do not store fake LSNs in LwLSN hash.

Co-authored-by: Konstantin Knizhnik <knizhnik@neon.tech>
2025-04-28 12:16:29 +00:00
Erik Grinaker
606f14034e pageserver: improve pageserver_smgr_query_seconds buckets (#11680)
## Problem

The `pageserver_smgr_query_seconds` buckets are too coarse, using powers
of 10: 1 µs, 10 µs, 100 µs, 1 ms, 10 ms, 100 ms, 1 s, 10 s, 100 s. This
is one of our most crucial latency metrics, and needs better resolution.

Touches #11594.

## Summary of changes

This patch uses buckets with better resolution around 1 ms (the typical
latency):

* 0.6 ms
* 1 ms
* 3 ms
* 6 ms
* 10 ms
* 30 ms
* 100 ms
* 1 s
* 3 s

These will be the same as the compute's `compute_getpage_wait_seconds`,
to make them comparable across the compute and Pageserver:
https://github.com/neondatabase/flux-fleet/pull/579. We sacrifice
buckets above 3 s, since these can already be considered "too slow".

This does not change the previously used `CRITICAL_OP_BUCKETS`, which is
also used for other operations on different timescales (e.g. LSN waits).
We should consider replacing this with more appropriate buckets for
specific operations, since it covers a large span with low resolution.
2025-04-28 11:52:44 +00:00
Conrad Ludgate
32393b4393 pg-sni-router: support compute TLS on different port (#11732)
## Problem

pg-sni-router isn't aware of compute TLS

## Summary of changes

If connections come in on port 4433, we require TLS to compute from
pg-sni-router
2025-04-28 11:29:44 +00:00
Alexander Bayandin
1a29f5672a CI(check-macos-build): trigger workflow automatically for PRs (#11706)
## Problem

- if-conditions for the `check-macos-build` workflow don't trigger it on
PRs with relevant changes (in Rust code or Postgres submodules).
- Jobs in the workflow depend on the presence of a cache, which is not
guaranteed.

## Summary of changes
- Fix if-conditions
- Use artifacts on top of cache whenever the workflow depends on it —
the cache might not be available
2025-04-28 09:03:10 +00:00
a-masterov
b8d47b5acf Run the extensions' tests on staging (#11164)
## Problem
We currently don't run end-to-end tests for PostgreSQL extensions on our
cloud infrastructure, which means we might miss problems that only occur
in a real cloud environment.

## Summary of changes
- Added a workflow to run extension tests against a cloud staging
instance
- Set up proper project configuration for extension testing
- Implemented test execution with appropriate environment settings
- Added error handling and reporting for test failures

---------

Co-authored-by: Alexander Bayandin <alexander@neon.tech>
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
2025-04-28 08:13:49 +00:00
Alexander Lakhin
97e01ae6fd Add workflow to run particular test(s) N times (#11050)
## Problem
Provide an easy way to run particular test(s) N times on CI.

## Summary of changes

* Allow for passing the test selection and the number of test runs to
the existing "Build and Test Locally" workflow
* Allow for running multiple selected tests by the "Pytest regression
tests" step
* Introduce a new workflow to run specified test(s) several times
* Store results in a separate database to distinguish between testing
tests for stability and usual testing
2025-04-28 04:04:37 +00:00
211 changed files with 4277 additions and 1444 deletions

View File

@@ -33,9 +33,14 @@ config-variables:
- REMOTE_STORAGE_AZURE_CONTAINER
- REMOTE_STORAGE_AZURE_REGION
- SLACK_CICD_CHANNEL_ID
- SLACK_COMPUTE_CHANNEL_ID
- SLACK_ON_CALL_DEVPROD_STREAM
- SLACK_ON_CALL_QA_STAGING_STREAM
- SLACK_ON_CALL_STORAGE_STAGING_STREAM
- SLACK_ONCALL_COMPUTE_GROUP
- SLACK_ONCALL_PROXY_GROUP
- SLACK_ONCALL_STORAGE_GROUP
- SLACK_PROXY_CHANNEL_ID
- SLACK_RUST_CHANNEL_ID
- SLACK_STORAGE_CHANNEL_ID
- SLACK_UPCOMING_RELEASE_CHANNEL_ID

View File

@@ -7,7 +7,7 @@ inputs:
type: boolean
required: false
default: false
aws-oicd-role-arn:
aws-oidc-role-arn:
description: 'OIDC role arn to interract with S3'
required: true
@@ -88,7 +88,7 @@ runs:
if: ${{ !cancelled() }}
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
# Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this

View File

@@ -8,7 +8,7 @@ inputs:
unique-key:
description: 'string to distinguish different results in the same run'
required: true
aws-oicd-role-arn:
aws-oidc-role-arn:
description: 'OIDC role arn to interract with S3'
required: true
@@ -39,7 +39,7 @@ runs:
if: ${{ !cancelled() }}
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
- name: Upload test results

View File

@@ -15,7 +15,7 @@ inputs:
prefix:
description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
aws-oicd-role-arn:
aws-oidc-role-arn:
description: 'OIDC role arn to interract with S3'
required: true
@@ -25,7 +25,7 @@ runs:
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
role-duration-seconds: 3600
- name: Download artifact

View File

@@ -49,6 +49,10 @@ inputs:
description: 'A JSON object with project settings'
required: false
default: '{}'
default_endpoint_settings:
description: 'A JSON object with the default endpoint settings'
required: false
default: '{}'
outputs:
dsn:
@@ -66,9 +70,9 @@ runs:
# A shell without `set -x` to not to expose password/dsn in logs
shell: bash -euo pipefail {0}
run: |
project=$(curl \
res=$(curl \
"https://${API_HOST}/api/v2/projects" \
--fail \
-w "%{http_code}" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" \
@@ -83,6 +87,15 @@ runs:
\"settings\": ${PROJECT_SETTINGS}
}
}")
code=${res: -3}
if [[ ${code} -ge 400 ]]; then
echo Request failed with error code ${code}
echo ${res::-3}
exit 1
else
project=${res::-3}
fi
# Mask password
echo "::add-mask::$(echo $project | jq --raw-output '.roles[] | select(.name != "web_access") | .password')"
@@ -126,6 +139,22 @@ runs:
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
fi
# XXX
# This is a workaround for the default endpoint settings, which currently do not allow some settings in the public API.
# https://github.com/neondatabase/cloud/issues/27108
if [[ -n ${DEFAULT_ENDPOINT_SETTINGS} && ${DEFAULT_ENDPOINT_SETTINGS} != "{}" ]] ; then
PROJECT_DATA=$(curl -X GET \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
-d "{\"scheduling\": \"Essential\"}"
)
NEW_DEFAULT_ENDPOINT_SETTINGS=$(echo ${PROJECT_DATA} | jq -rc ".project.default_endpoint_settings + ${DEFAULT_ENDPOINT_SETTINGS}")
curl -X POST --fail \
"https://${API_HOST}/regions/${REGION_ID}/api/v1/admin/projects/${project_id}/default_endpoint_settings" \
-H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer ${ADMIN_API_KEY}" \
--data "${NEW_DEFAULT_ENDPOINT_SETTINGS}"
fi
env:
API_HOST: ${{ inputs.api_host }}
@@ -142,3 +171,4 @@ runs:
PSQL: ${{ inputs.psql_path }}
LD_LIBRARY_PATH: ${{ inputs.libpq_lib_path }}
PROJECT_SETTINGS: ${{ inputs.project_settings }}
DEFAULT_ENDPOINT_SETTINGS: ${{ inputs.default_endpoint_settings }}

View File

@@ -53,7 +53,7 @@ inputs:
description: 'benchmark durations JSON'
required: false
default: '{}'
aws-oicd-role-arn:
aws-oidc-role-arn:
description: 'OIDC role arn to interract with S3'
required: true
@@ -66,7 +66,7 @@ runs:
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
path: /tmp/neon
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
- name: Download Neon binaries for the previous release
if: inputs.build_type != 'remote'
@@ -75,7 +75,7 @@ runs:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
path: /tmp/neon-previous
prefix: latest
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
- name: Download compatibility snapshot
if: inputs.build_type != 'remote'
@@ -87,7 +87,7 @@ runs:
# The lack of compatibility snapshot (for example, for the new Postgres version)
# shouldn't fail the whole job. Only relevant test should fail.
skip-if-does-not-exist: true
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
- name: Checkout
if: inputs.needs_postgres_source == 'true'
@@ -228,13 +228,13 @@ runs:
# The lack of compatibility snapshot shouldn't fail the job
# (for example if we didn't run the test for non build-and-test workflow)
skip-if-does-not-exist: true
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
- uses: aws-actions/configure-aws-credentials@v4
if: ${{ !cancelled() }}
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
role-duration-seconds: 3600 # 1 hour should be more than enough to upload report
- name: Upload test results
@@ -243,4 +243,4 @@ runs:
with:
report-dir: /tmp/test_output/allure/results
unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

View File

@@ -14,11 +14,11 @@ runs:
name: coverage-data-artifact
path: /tmp/coverage
skip-if-does-not-exist: true # skip if there's no previous coverage to download
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}
- name: Upload coverage data
uses: ./.github/actions/upload
with:
name: coverage-data-artifact
path: /tmp/coverage
aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
aws-oidc-role-arn: ${{ inputs.aws-oidc-role-arn }}

View File

@@ -14,7 +14,7 @@ inputs:
prefix:
description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
required: false
aws-oicd-role-arn:
aws-oidc-role-arn:
description: "the OIDC role arn for aws auth"
required: false
default: ""
@@ -61,7 +61,7 @@ runs:
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: eu-central-1
role-to-assume: ${{ inputs.aws-oicd-role-arn }}
role-to-assume: ${{ inputs.aws-oidc-role-arn }}
role-duration-seconds: 3600
- name: Upload artifact

View File

@@ -41,7 +41,7 @@ echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}"
LAST_COMMIT=$(git rev-parse HEAD)
MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}")
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$"
EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2} UTC$"
if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then
report_error "Merge commit message does not match expected pattern: '<component> release YYYY-MM-DD'

View File

@@ -81,7 +81,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# we create a table that has one row for each database that we want to restore with the status whether the restore is done
- name: Create benchmark_restore_status table if it does not exist

View File

@@ -28,6 +28,16 @@ on:
required: false
default: 'disabled'
type: string
test-selection:
description: 'specification of selected test(s) to run'
required: false
default: ''
type: string
test-run-count:
description: 'number of runs to perform for selected tests'
required: false
default: 1
type: number
defaults:
run:
@@ -313,7 +323,7 @@ jobs:
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
path: /tmp/neon
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Check diesel schema
if: inputs.build-type == 'release' && inputs.arch == 'x64'
@@ -381,14 +391,15 @@ jobs:
run_with_real_s3: true
real_s3_bucket: neon-github-ci-tests
real_s3_region: eu-central-1
rerun_failed: true
rerun_failed: ${{ inputs.test-run-count == 1 }}
pg_version: ${{ matrix.pg_version }}
sanitizers: ${{ inputs.sanitizers }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
# Attempt to stop tests gracefully to generate test reports
# until they are forcibly stopped by the stricter `timeout-minutes` limit.
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }}
extra_params: --session-timeout=${{ inputs.sanitizers != 'enabled' && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
env:
TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

View File

@@ -1,103 +0,0 @@
name: Create Release PR
on:
workflow_call:
inputs:
component-name:
description: 'Component name'
required: true
type: string
source-branch:
description: 'Source branch'
required: true
type: string
secrets:
ci-access-token:
description: 'CI access token'
required: true
defaults:
run:
shell: bash -euo pipefail {0}
permissions:
contents: read
jobs:
create-release-branch:
runs-on: ubuntu-22.04
permissions:
contents: write # for `git push`
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{ inputs.source-branch }}
fetch-depth: 0
- name: Set variables
id: vars
env:
COMPONENT_NAME: ${{ inputs.component-name }}
RELEASE_BRANCH: >-
${{
false
|| inputs.component-name == 'Storage' && 'release'
|| inputs.component-name == 'Proxy' && 'release-proxy'
|| inputs.component-name == 'Compute' && 'release-compute'
}}
run: |
now_date=$(date -u +'%Y-%m-%d')
now_time=$(date -u +'%H-%M-%Z')
{
echo "title=${COMPONENT_NAME} release ${now_date}"
echo "rc-branch=rc/${RELEASE_BRANCH}/${now_date}_${now_time}"
echo "release-branch=${RELEASE_BRANCH}"
} | tee -a ${GITHUB_OUTPUT}
- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
- name: Create RC branch
env:
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
git switch -c "${RC_BRANCH}"
# Manually create a merge commit on the current branch, keeping the
# tree and setting the parents to the current HEAD and the HEAD of the
# release branch. This commit is what we'll fast-forward the release
# branch to when merging the release branch.
# For details on why, look at
# https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs
current_tree=$(git rev-parse 'HEAD^{tree}')
release_head=$(git rev-parse "origin/${RELEASE_BRANCH}")
current_head=$(git rev-parse HEAD)
merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}")
# Fast-forward the current branch to the newly created merge_commit
git merge --ff-only ${merge_commit}
git push origin "${RC_BRANCH}"
- name: Create a PR into ${{ steps.vars.outputs.release-branch }}
env:
GH_TOKEN: ${{ secrets.ci-access-token }}
RC_BRANCH: ${{ steps.vars.outputs.rc-branch }}
RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }}
TITLE: ${{ steps.vars.outputs.title }}
run: |
gh pr create --title "${TITLE}" \
--body "" \
--head "${RC_BRANCH}" \
--base "${RELEASE_BRANCH}"

View File

@@ -53,6 +53,77 @@ concurrency:
cancel-in-progress: true
jobs:
cleanup:
runs-on: [ self-hosted, us-east-2, x64 ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
env:
ORG_ID: org-solitary-dew-09443886
LIMIT: 100
SEARCH: "GITHUB_RUN_ID="
BASE_URL: https://console-stage.neon.build/api/v2
DRY_RUN: "false" # Set to "true" to just test out the workflow
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Cleanup inactive Neon projects left over from prior runs
env:
API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
run: |
set -euo pipefail
NOW=$(date -u +%s)
DAYS_AGO=$((NOW - 5 * 86400))
REQUEST_URL="$BASE_URL/projects?limit=$LIMIT&search=$(printf '%s' "$SEARCH" | jq -sRr @uri)&org_id=$ORG_ID"
echo "Requesting project list from:"
echo "$REQUEST_URL"
response=$(curl -s -X GET "$REQUEST_URL" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}" )
echo "Response:"
echo "$response" | jq .
projects_to_delete=$(echo "$response" | jq --argjson cutoff "$DAYS_AGO" '
.projects[]
| select(.compute_last_active_at != null)
| select((.compute_last_active_at | fromdateiso8601) < $cutoff)
| {id, name, compute_last_active_at}
')
if [ -z "$projects_to_delete" ]; then
echo "No projects eligible for deletion."
exit 0
fi
echo "Projects that will be deleted:"
echo "$projects_to_delete" | jq -r '.id'
if [ "$DRY_RUN" = "false" ]; then
echo "$projects_to_delete" | jq -r '.id' | while read -r project_id; do
echo "Deleting project: $project_id"
curl -s -X DELETE "$BASE_URL/projects/$project_id" \
--header "Accept: application/json" \
--header "Content-Type: application/json" \
--header "Authorization: Bearer ${API_KEY}"
done
else
echo "Dry run enabled — no projects were deleted."
fi
bench:
if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
permissions:
@@ -114,7 +185,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
id: create-neon-project
@@ -132,7 +203,7 @@ jobs:
run_in_parallel: false
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
# Set --sparse-ordering option of pytest-order plugin
# to ensure tests are running in order of appears in the file.
# It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
@@ -165,7 +236,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -222,8 +293,8 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Verify that cumulative statistics are preserved
uses: ./.github/actions/run-python-test-set
with:
@@ -233,7 +304,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 3600
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -282,7 +353,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Run Logical Replication benchmarks
uses: ./.github/actions/run-python-test-set
@@ -293,7 +364,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -310,7 +381,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 5400
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -322,7 +393,7 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
@@ -505,7 +576,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
if: contains(fromJSON('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
@@ -557,7 +628,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -573,7 +644,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -588,7 +659,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -603,7 +674,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -621,7 +692,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -694,7 +765,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Set up Connection String
id: set-up-connstr
@@ -726,7 +797,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -741,7 +812,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -752,7 +823,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -828,7 +899,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Set up Connection String
id: set-up-connstr
@@ -871,7 +942,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -885,7 +956,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -954,7 +1025,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Get Connstring Secret Name
run: |
@@ -1003,7 +1074,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_tpch
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -1015,7 +1086,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
@@ -1078,7 +1149,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Set up Connection String
id: set-up-connstr
@@ -1121,7 +1192,7 @@ jobs:
save_perf_report: ${{ env.SAVE_PERF_REPORT }}
extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -1132,7 +1203,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}

View File

@@ -34,11 +34,10 @@ permissions:
jobs:
build-pgxn:
if: |
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
)
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
timeout-minutes: 30
runs-on: macos-15
strategy:
@@ -100,13 +99,21 @@ jobs:
run: |
make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
- name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: pg_install--${{ matrix.postgres-version }}
path: pg_install/${{ matrix.postgres-version }}
# The artifact is supposed to be used by the next job in the same workflow,
# so theres no need to store it for too long.
retention-days: 1
build-walproposer-lib:
if: |
(inputs.pg_versions != '[]' || inputs.rebuild_everything) && (
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
)
inputs.pg_versions != '[]' || inputs.rebuild_everything ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
timeout-minutes: 30
runs-on: macos-15
needs: [build-pgxn]
@@ -127,12 +134,11 @@ jobs:
id: pg_rev
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
- name: Cache postgres v17 build
id: cache_pg
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
- name: Download "pg_install/v17" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--v17
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache walproposer-lib
id: cache_walproposer_lib
@@ -163,13 +169,21 @@ jobs:
run:
make walproposer-lib -j$(sysctl -n hw.ncpu)
- name: Upload "pg_install/build/walproposer-lib" artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: pg_install--build--walproposer-lib
path: pg_install/build/walproposer-lib
# The artifact is supposed to be used by the next job in the same workflow,
# so theres no need to store it for too long.
retention-days: 1
cargo-build:
if: |
(inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything) && (
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
)
inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
timeout-minutes: 30
runs-on: macos-15
needs: [build-pgxn, build-walproposer-lib]
@@ -188,45 +202,43 @@ jobs:
with:
submodules: true
- name: Set pg v14 for caching
id: pg_rev_v14
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) | tee -a "${GITHUB_OUTPUT}"
- name: Set pg v15 for caching
id: pg_rev_v15
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) | tee -a "${GITHUB_OUTPUT}"
- name: Set pg v16 for caching
id: pg_rev_v16
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) | tee -a "${GITHUB_OUTPUT}"
- name: Set pg v17 for caching
id: pg_rev_v17
run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
- name: Cache postgres v14 build
id: cache_pg
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
- name: Download "pg_install/v14" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--v14
path: pg_install/v14
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v14-${{ steps.pg_rev_v14.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v15 build
id: cache_pg_v15
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: pg_install/v15
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v15-${{ steps.pg_rev_v15.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v16 build
id: cache_pg_v16
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: pg_install/v16
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v16-${{ steps.pg_rev_v16.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache postgres v17 build
id: cache_pg_v17
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: pg_install/v17
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Cache cargo deps (only for v17)
- name: Download "pg_install/v15" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--v15
path: pg_install/v15
- name: Download "pg_install/v16" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--v16
path: pg_install/v16
- name: Download "pg_install/v17" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--v17
path: pg_install/v17
- name: Download "pg_install/build/walproposer-lib" artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
with:
name: pg_install--build--walproposer-lib
path: pg_install/build/walproposer-lib
# `actions/download-artifact` doesn't preserve permissions:
# https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
- name: Make pg_install/v*/bin/* executable
run: |
chmod +x pg_install/v*/bin/*
- name: Cache cargo deps
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: |
@@ -236,13 +248,6 @@ jobs:
target
key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust
- name: Cache walproposer-lib
id: cache_walproposer_lib
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
with:
path: pg_install/build/walproposer-lib
key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev_v17.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
- name: Install build dependencies
run: |
brew install flex bison openssl protobuf icu4c
@@ -252,8 +257,8 @@ jobs:
echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
- name: Run cargo build (only for v17)
- name: Run cargo build
run: cargo build --all --release -j$(sysctl -n hw.ncpu)
- name: Check that no warnings are produced (only for v17)
- name: Check that no warnings are produced
run: ./run_clippy.sh

View File

@@ -0,0 +1,120 @@
name: Build and Run Selected Test
on:
workflow_dispatch:
inputs:
test-selection:
description: 'Specification of selected test(s), as accepted by pytest -k'
required: true
type: string
run-count:
description: 'Number of test runs to perform'
required: true
type: number
archs:
description: 'Archs to run tests on, e. g.: ["x64", "arm64"]'
default: '["x64"]'
required: true
type: string
build-types:
description: 'Build types to run tests on, e. g.: ["debug", "release"]'
default: '["release"]'
required: true
type: string
pg-versions:
description: 'Postgres versions to use for testing, e.g,: [{"pg_version":"v16"}, {"pg_version":"v17"}])'
default: '[{"pg_version":"v17"}]'
required: true
type: string
defaults:
run:
shell: bash -euxo pipefail {0}
env:
RUST_BACKTRACE: 1
COPT: '-Werror'
jobs:
meta:
uses: ./.github/workflows/_meta.yml
with:
github-event-name: ${{ github.event_name }}
github-event-json: ${{ toJSON(github.event) }}
build-and-test-locally:
needs: [ meta ]
strategy:
fail-fast: false
matrix:
arch: ${{ fromJson(inputs.archs) }}
build-type: ${{ fromJson(inputs.build-types) }}
uses: ./.github/workflows/_build-and-test-locally.yml
with:
arch: ${{ matrix.arch }}
build-tools-image: ghcr.io/neondatabase/build-tools:pinned-bookworm
build-tag: ${{ needs.meta.outputs.build-tag }}
build-type: ${{ matrix.build-type }}
test-cfg: ${{ inputs.pg-versions }}
test-selection: ${{ inputs.test-selection }}
test-run-count: ${{ fromJson(inputs.run-count) }}
secrets: inherit
create-test-report:
needs: [ build-and-test-locally ]
if: ${{ !cancelled() }}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
pull-requests: write
outputs:
report-url: ${{ steps.create-allure-report.outputs.report-url }}
runs-on: [ self-hosted, small ]
container:
image: ghcr.io/neondatabase/build-tools:pinned-bookworm
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Create Allure report
if: ${{ !cancelled() }}
id: create-allure-report
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_DEV }}
- uses: actions/github-script@v7
if: ${{ !cancelled() }}
with:
# Retry script for 5XX server errors: https://github.com/actions/github-script#retries
retries: 5
script: |
const report = {
reportUrl: "${{ steps.create-allure-report.outputs.report-url }}",
reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
}
const coverage = {}
const script = require("./scripts/comment-test-report.js")
await script({
github,
context,
fetch,
report,
coverage,
})

View File

@@ -69,7 +69,7 @@ jobs:
submodules: true
- name: Check for file changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
uses: step-security/paths-filter@v3
id: files-changed
with:
token: ${{ secrets.GITHUB_TOKEN }}
@@ -317,7 +317,7 @@ jobs:
extra_params: --splits 5 --group ${{ matrix.pytest_split_group }}
benchmark_durations: ${{ needs.get-benchmarks-durations.outputs.json }}
pg_version: v16
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -384,7 +384,7 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
@@ -451,14 +451,14 @@ jobs:
with:
name: neon-${{ runner.os }}-${{ runner.arch }}-${{ matrix.build_type }}-artifact
path: /tmp/neon
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Get coverage artifact
uses: ./.github/actions/download
with:
name: coverage-data-artifact
path: /tmp/coverage
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Merge coverage data
run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge
@@ -824,7 +824,7 @@ jobs:
- pg: v17
debian: bookworm
env:
VM_BUILDER_VERSION: v0.42.2
VM_BUILDER_VERSION: v0.46.0
steps:
- name: Harden the runner (Audit all outbound calls)
@@ -1434,10 +1434,10 @@ jobs:
;;
esac
notify-storage-release-deploy-failure:
needs: [ deploy ]
notify-release-deploy-failure:
needs: [ meta, deploy ]
# We want this to run even if (transitive) dependencies are skipped, because deploy should really be successful on release branch workflow runs.
if: github.ref_name == 'release' && needs.deploy.result != 'success' && always()
if: contains(fromJSON('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind) && needs.deploy.result != 'success' && always()
runs-on: ubuntu-22.04
steps:
- name: Harden the runner (Audit all outbound calls)
@@ -1445,15 +1445,40 @@ jobs:
with:
egress-policy: audit
- name: Post release-deploy failure to team-storage slack channel
- name: Post release-deploy failure to team slack channel
uses: slackapi/slack-github-action@485a9d42d3a73031f12ec201c457e2162c45d02d # v2.0.0
env:
TEAM_ONCALL: >-
${{
fromJSON(format('{
"storage-release": "<!subteam^{0}|@oncall-storage>",
"compute-release": "<!subteam^{1}|@oncall-compute>",
"proxy-release": "<!subteam^{2}|@oncall-proxy>"
}',
vars.SLACK_ONCALL_STORAGE_GROUP,
vars.SLACK_ONCALL_COMPUTE_GROUP,
vars.SLACK_ONCALL_PROXY_GROUP
))[needs.meta.outputs.run-kind]
}}
CHANNEL: >-
${{
fromJSON(format('{
"storage-release": "{0}",
"compute-release": "{1}",
"proxy-release": "{2}"
}',
vars.SLACK_STORAGE_CHANNEL_ID,
vars.SLACK_COMPUTE_CHANNEL_ID,
vars.SLACK_PROXY_CHANNEL_ID
))[needs.meta.outputs.run-kind]
}}
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }}
channel: ${{ env.CHANNEL }}
text: |
🔴 <!subteam^S06CJ87UMNY|@oncall-storage>: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
🔴 ${{ env.TEAM_ONCALL }}: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>.
# The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
promote-compatibility-data:

View File

@@ -117,7 +117,7 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

112
.github/workflows/cloud-extensions.yml vendored Normal file
View File

@@ -0,0 +1,112 @@
name: Cloud Extensions Test
on:
schedule:
# * is a special character in YAML so you have to quote this string
# ┌───────────── minute (0 - 59)
# │ ┌───────────── hour (0 - 23)
# │ │ ┌───────────── day of the month (1 - 31)
# │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
# │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
- cron: '45 1 * * *' # run once a day, timezone is utc
workflow_dispatch: # adds ability to run this manually
inputs:
region_id:
description: 'Project region id. If not set, the default region will be used'
required: false
default: 'aws-us-east-2'
defaults:
run:
shell: bash -euxo pipefail {0}
permissions:
id-token: write # aws-actions/configure-aws-credentials
statuses: write
contents: write
jobs:
regress:
env:
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
TEST_OUTPUT: /tmp/test_output
BUILD_TYPE: remote
strategy:
fail-fast: false
matrix:
pg-version: [16, 17]
runs-on: [ self-hosted, small ]
container:
# We use the neon-test-extensions image here as it contains the source code for the extensions.
image: ghcr.io/neondatabase/neon-test-extensions-v${{ matrix.pg-version }}:latest
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
options: --init
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Evaluate the settings
id: project-settings
run: |
if [[ $((${{ matrix.pg-version }})) -lt 17 ]]; then
ULID=ulid
else
ULID=pgx_ulid
fi
LIBS=timescaledb:rag_bge_small_en_v15,rag_jina_reranker_v1_tiny_en:$ULID
settings=$(jq -c -n --arg libs $LIBS '{preload_libraries:{use_defaults:false,enabled_libraries:($libs| split(":"))}}')
echo settings=$settings >> $GITHUB_OUTPUT
- name: Create Neon Project
id: create-neon-project
uses: ./.github/actions/neon-project-create
with:
region_id: ${{ inputs.region_id || 'aws-us-east-2' }}
postgres_version: ${{ matrix.pg-version }}
project_settings: ${{ steps.project-settings.outputs.settings }}
# We need these settings to get the expected output results.
# We cannot use the environment variables e.g. PGTZ due to
# https://github.com/neondatabase/neon/issues/1287
default_endpoint_settings: >
{
"pg_settings": {
"DateStyle": "Postgres,MDY",
"TimeZone": "America/Los_Angeles",
"compute_query_id": "off",
"neon.allow_unstable_extensions": "on"
}
}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
- name: Run the regression tests
run: /run-tests.sh -r /ext-src
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
SKIP: "pg_hint_plan-src,pg_repack-src,pg_cron-src,plpgsql_check-src"
- name: Delete Neon Project
if: ${{ always() }}
uses: ./.github/actions/neon-project-delete
with:
project_id: ${{ steps.create-neon-project.outputs.project_id }}
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1
with:
channel-id: ${{ vars.SLACK_ON_CALL_QA_STAGING_STREAM }}
slack-message: |
Periodic extensions test on staging: ${{ job.status }}
<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

View File

@@ -89,7 +89,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create a new branch
id: create-branch
@@ -105,7 +105,7 @@ jobs:
test_selection: cloud_regress
pg_version: ${{matrix.pg-version}}
extra_params: -m remote_cluster
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{steps.create-branch.outputs.dsn}}
@@ -122,7 +122,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}

View File

@@ -32,7 +32,7 @@ jobs:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
- target_project: new_empty_project_stripe_size_2048
- target_project: new_empty_project_stripe_size_2048
stripe_size: 2048 # 16 MiB
postgres_version: 16
disable_sharding: false
@@ -98,7 +98,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
@@ -110,10 +110,10 @@ jobs:
compute_units: '[7, 7]' # we want to test large compute here to avoid compute-side bottleneck
api_key: ${{ secrets.NEON_STAGING_API_KEY }}
shard_split_project: ${{ matrix.stripe_size != null && 'true' || 'false' }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
admin_api_key: ${{ secrets.NEON_STAGING_ADMIN_API_KEY }}
shard_count: 8
stripe_size: ${{ matrix.stripe_size }}
disable_sharding: ${{ matrix.disable_sharding }}
disable_sharding: ${{ matrix.disable_sharding }}
- name: Initialize Neon project
if: ${{ startsWith(matrix.target_project, 'new_empty_project') }}
@@ -171,7 +171,7 @@ jobs:
extra_params: -s -m remote_cluster --timeout 86400 -k test_ingest_performance_using_pgcopydb
pg_version: v${{ matrix.postgres_version }}
save_perf_report: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_INGEST_SOURCE_CONNSTR: ${{ secrets.BENCHMARK_INGEST_SOURCE_CONNSTR }}
TARGET_PROJECT_TYPE: ${{ matrix.target_project }}

View File

@@ -33,9 +33,9 @@ jobs:
fail-fast: false # allow other variants to continue even if one fails
matrix:
include:
- target: new_branch
- target: new_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
- target: reuse_branch
- target: reuse_branch
custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100
max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
permissions:
@@ -43,7 +43,7 @@ jobs:
statuses: write
id-token: write # aws-actions/configure-aws-credentials
env:
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h
TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
PG_VERSION: 16 # pre-determined by pre-determined project
@@ -85,7 +85,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Branch for large tenant
if: ${{ matrix.target == 'new_branch' }}
@@ -129,7 +129,7 @@ jobs:
${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';"
echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs"
- name: Benchmark pgbench with custom-scripts
- name: Benchmark pgbench with custom-scripts
uses: ./.github/actions/run-python-test-set
with:
build_type: ${{ env.BUILD_TYPE }}
@@ -138,7 +138,7 @@ jobs:
save_perf_report: true
extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -153,7 +153,7 @@ jobs:
save_perf_report: true
extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance
pg_version: ${{ env.PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }}
VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -179,8 +179,8 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}
uses: slackapi/slack-github-action@fcfb566f8b0aab22203f066d80ca1d7e4b5d05b3 # v1.27.1

View File

@@ -53,7 +53,7 @@ jobs:
submodules: true
- name: Check for Postgres changes
uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242 #v3
uses: step-security/paths-filter@v3
id: files_changed
with:
token: ${{ github.token }}
@@ -69,10 +69,6 @@ jobs:
check-macos-build:
needs: [ check-permissions, files-changed ]
if: |
contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
github.ref_name == 'main'
uses: ./.github/workflows/build-macos.yml
with:
pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}

View File

@@ -147,7 +147,7 @@ jobs:
if: ${{ !cancelled() }}
uses: ./.github/actions/allure-report-generate
with:
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Post to a Slack channel
if: ${{ github.event.schedule && failure() }}

View File

@@ -103,7 +103,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
id: create-neon-project
@@ -122,7 +122,7 @@ jobs:
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
@@ -139,7 +139,7 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
@@ -178,7 +178,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Create Neon Project
id: create-neon-project
@@ -195,7 +195,7 @@ jobs:
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ env.DEFAULT_PG_VERSION }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
@@ -212,7 +212,7 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

View File

@@ -66,7 +66,7 @@ jobs:
name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
path: /tmp/neon/
prefix: latest
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
- name: Run tests
uses: ./.github/actions/run-python-test-set
@@ -76,7 +76,7 @@ jobs:
run_in_parallel: false
extra_params: -m remote_cluster
pg_version: ${{ matrix.pg-version }}
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
RANDOM_SEED: ${{ inputs.random_seed }}
@@ -88,6 +88,6 @@ jobs:
uses: ./.github/actions/allure-report-generate
with:
store-test-results-into-db: true
aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
env:
REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}

12
.github/workflows/release-compute.yml vendored Normal file
View File

@@ -0,0 +1,12 @@
name: Create compute release PR
on:
schedule:
- cron: '0 7 * * FRI'
jobs:
create-release-pr:
uses: ./.github/workflows/release.yml
with:
component: compute
secrets: inherit

12
.github/workflows/release-proxy.yml vendored Normal file
View File

@@ -0,0 +1,12 @@
name: Create proxy release PR
on:
schedule:
- cron: '0 6 * * TUE'
jobs:
create-release-pr:
uses: ./.github/workflows/release.yml
with:
component: proxy
secrets: inherit

12
.github/workflows/release-storage.yml vendored Normal file
View File

@@ -0,0 +1,12 @@
name: Create storage release PR
on:
schedule:
- cron: '0 6 * * FRI'
jobs:
create-release-pr:
uses: ./.github/workflows/release.yml
with:
component: storage
secrets: inherit

View File

@@ -1,25 +1,34 @@
name: Create Release Branch
name: Create release PR
on:
schedule:
# It should be kept in sync with if-condition in jobs
- cron: '0 6 * * TUE' # Proxy release
- cron: '0 6 * * FRI' # Storage release
- cron: '0 7 * * FRI' # Compute release
workflow_dispatch:
inputs:
create-storage-release-branch:
type: boolean
description: 'Create Storage release PR'
component:
description: "Component to release"
required: true
type: choice
options:
- compute
- proxy
- storage
cherry-pick:
description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
required: false
create-proxy-release-branch:
type: boolean
description: 'Create Proxy release PR'
required: false
create-compute-release-branch:
type: boolean
description: 'Create Compute release PR'
type: string
default: ''
workflow_call:
inputs:
component:
description: "Component to release"
required: true
type: string
cherry-pick:
description: "Commits to cherry-pick (space separated, makes this a hotfix based on previous release)"
required: false
type: string
default: ''
# No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
permissions: {}
@@ -29,41 +38,31 @@ defaults:
shell: bash -euo pipefail {0}
jobs:
create-storage-release-branch:
if: ${{ github.event.schedule == '0 6 * * FRI' || inputs.create-storage-release-branch }}
create-release-pr:
runs-on: ubuntu-22.04
permissions:
contents: write
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Storage'
source-branch: ${{ github.ref_name }}
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
steps:
- name: Harden the runner (Audit all outbound calls)
uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
with:
egress-policy: audit
create-proxy-release-branch:
if: ${{ github.event.schedule == '0 6 * * TUE' || inputs.create-proxy-release-branch }}
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
permissions:
contents: write
- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Proxy'
source-branch: ${{ github.ref_name }}
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
create-compute-release-branch:
if: ${{ github.event.schedule == '0 7 * * FRI' || inputs.create-compute-release-branch }}
permissions:
contents: write
uses: ./.github/workflows/_create-release-pr.yml
with:
component-name: 'Compute'
source-branch: ${{ github.ref_name }}
secrets:
ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }}
- name: Create release PR
uses: neondatabase/dev-actions/release-pr@290dec821d86fa8a93f019e8c69720f5865b5677
with:
component: ${{ inputs.component }}
cherry-pick: ${{ inputs.cherry-pick }}
env:
GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}

6
Cargo.lock generated
View File

@@ -1284,6 +1284,7 @@ name = "compute_tools"
version = "0.1.0"
dependencies = [
"anyhow",
"async-compression",
"aws-config",
"aws-sdk-kms",
"aws-sdk-s3",
@@ -1420,6 +1421,7 @@ dependencies = [
"clap",
"comfy-table",
"compute_api",
"endpoint_storage",
"futures",
"http-utils",
"humantime",
@@ -7197,7 +7199,7 @@ dependencies = [
[[package]]
name = "tokio-epoll-uring"
version = "0.1.0"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=devin%2F1746710596-expose-fallocate-modes#8bd6d0e99d4937096acfe40b64fd63aa9ad2e2ea"
dependencies = [
"futures",
"nix 0.26.4",
@@ -7808,7 +7810,7 @@ dependencies = [
[[package]]
name = "uring-common"
version = "0.1.0"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#781989bb540a1408b0b93daa1e9d1fa452195497"
source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=devin%2F1746710596-expose-fallocate-modes#8bd6d0e99d4937096acfe40b64fd63aa9ad2e2ea"
dependencies = [
"bytes",
"io-uring",

View File

@@ -187,7 +187,7 @@ thiserror = "1.0"
tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
tokio = { version = "1.43.1", features = ["macros"] }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "devin/1746710596-expose-fallocate-modes" }
tokio-io-timeout = "1.2.0"
tokio-postgres-rustls = "0.12.0"
tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
@@ -243,6 +243,7 @@ azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rus
## Local libraries
compute_api = { version = "0.1", path = "./libs/compute_api/" }
consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
http-utils = { version = "0.1", path = "./libs/http-utils/" }
metrics = { version = "0.1", path = "./libs/metrics/" }
pageserver = { path = "./pageserver" }

View File

@@ -1085,6 +1085,23 @@ RUN cargo install --locked --version 0.12.9 cargo-pgrx && \
USER root
#########################################################################################
#
# Layer "rust extensions pgrx14"
#
# Version 14 is now required by a few
# This layer should be used as a base for new pgrx extensions,
# and eventually get merged with `rust-extensions-build`
#
#########################################################################################
FROM pg-build-nonroot-with-cargo AS rust-extensions-build-pgrx14
ARG PG_VERSION
RUN cargo install --locked --version 0.14.1 cargo-pgrx && \
/bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
USER root
#########################################################################################
#
# Layers "pg-onnx-build" and "pgrag-build"
@@ -1100,11 +1117,11 @@ RUN wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.
mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
echo "#nothing to test here" > neon-test.sh
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz && \
echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
RUN wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.1.1.tar.gz -O pgrag.tar.gz && \
echo "087b2ecd11ba307dc968042ef2e9e43dc04d9ba60e8306e882c407bbe1350a50 pgrag.tar.gz" | sha256sum --check && \
mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C .
FROM rust-extensions-build-pgrx12 AS pgrag-build
FROM rust-extensions-build-pgrx14 AS pgrag-build
COPY --from=pgrag-src /ext-src/ /ext-src/
# Install build-time dependencies
@@ -1124,19 +1141,19 @@ RUN . venv/bin/activate && \
WORKDIR /ext-src/pgrag-src
RUN cd exts/rag && \
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
cargo pgrx install --release && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control
RUN cd exts/rag_bge_small_en_v15 && \
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
cargo pgrx install --release --features remote_onnx && \
echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control
RUN cd exts/rag_jina_reranker_v1_tiny_en && \
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
ORT_LIB_LOCATION=/ext-src/onnxruntime-src/build/Linux \
REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
cargo pgrx install --release --features remote_onnx && \
@@ -1305,8 +1322,8 @@ ARG PG_VERSION
# Do not update without approve from proxy team
# Make sure the version is reflected in proxy/src/serverless/local_conn_pool.rs
WORKDIR /ext-src
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.3.0.tar.gz -O pg_session_jwt.tar.gz && \
echo "19be2dc0b3834d643706ed430af998bb4c2cdf24b3c45e7b102bb3a550e8660c pg_session_jwt.tar.gz" | sha256sum --check && \
RUN wget https://github.com/neondatabase/pg_session_jwt/archive/refs/tags/v0.3.1.tar.gz -O pg_session_jwt.tar.gz && \
echo "62fec9e472cb805c53ba24a0765afdb8ea2720cfc03ae7813e61687b36d1b0ad pg_session_jwt.tar.gz" | sha256sum --check && \
mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.9", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
sed -i 's/version = "0.12.6"/version = "0.12.9"/g' pgrx-tests/Cargo.toml && \
@@ -1319,6 +1336,40 @@ COPY --from=pg_session_jwt-src /ext-src/ /ext-src/
WORKDIR /ext-src/pg_session_jwt-src
RUN cargo pgrx install --release
#########################################################################################
#
# Layer "pg-anon-pg-build"
# compile anon extension
#
#########################################################################################
FROM pg-build AS pg_anon-src
ARG PG_VERSION
COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
WORKDIR /ext-src
COPY compute/patches/anon_v2.patch .
# This is an experimental extension, never got to real production.
# !Do not remove! It can be present in shared_preload_libraries and compute will fail to start if library is not found.
ENV PATH="/usr/local/pgsql/bin/:$PATH"
RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/2.1.0/postgresql_anonymizer-latest.tar.gz -O pg_anon.tar.gz && \
echo "48e7f5ae2f1ca516df3da86c5c739d48dd780a4e885705704ccaad0faa89d6c0 pg_anon.tar.gz" | sha256sum --check && \
mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt && \
sed -i 's/pgrx = "0.14.1"/pgrx = { version = "=0.14.1", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
patch -p1 < /ext-src/anon_v2.patch
FROM rust-extensions-build-pgrx14 AS pg-anon-pg-build
ARG PG_VERSION
COPY --from=pg_anon-src /ext-src/ /ext-src/
WORKDIR /ext-src
RUN cd pg_anon-src && \
make -j $(getconf _NPROCESSORS_ONLN) extension PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config PGVER=pg$(echo "$PG_VERSION" | sed 's/^v//') && \
chmod -R a+r ../pg_anon-src && \
echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control;
########################################################################################
#########################################################################################
#
# Layer "wal2json-build"
@@ -1615,6 +1666,7 @@ COPY --from=pg_uuidv7-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_roaringbitmap-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_semver-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=wal2json-build /usr/local/pgsql /usr/local/pgsql
COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1800,8 +1852,8 @@ COPY compute/patches/pg_repack.patch /ext-src
RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /ext-src/pg_repack.patch
COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl\
&& apt clean && rm -rf /ext-src/*.tar.gz /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
&& apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
ENV PATH=/usr/local/pgsql/bin:$PATH
ENV PGHOST=compute
ENV PGPORT=55433

View File

@@ -23,6 +23,8 @@
import 'sql_exporter/getpage_prefetch_requests_total.libsonnet',
import 'sql_exporter/getpage_prefetches_buffered.libsonnet',
import 'sql_exporter/getpage_sync_requests_total.libsonnet',
import 'sql_exporter/compute_getpage_stuck_requests_total.libsonnet',
import 'sql_exporter/compute_getpage_max_inflight_stuck_time_ms.libsonnet',
import 'sql_exporter/getpage_wait_seconds_bucket.libsonnet',
import 'sql_exporter/getpage_wait_seconds_count.libsonnet',
import 'sql_exporter/getpage_wait_seconds_sum.libsonnet',

View File

@@ -0,0 +1,9 @@
{
metric_name: 'compute_getpage_max_inflight_stuck_time_ms',
type: 'gauge',
help: 'Max wait time for stuck requests among all backends. Includes only active stuck requests, terminated or disconnected ones are not accounted for',
values: [
'compute_getpage_max_inflight_stuck_time_ms',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -0,0 +1,9 @@
{
metric_name: 'compute_getpage_stuck_requests_total',
type: 'counter',
help: 'Total number of Getpage requests left without an answer for more than pageserver_response_log_timeout but less than pageserver_response_disconnect_timeout',
values: [
'compute_getpage_stuck_requests_total',
],
query_ref: 'neon_perf_counters',
}

View File

@@ -9,6 +9,8 @@ SELECT d.* FROM pg_catalog.jsonb_to_record((SELECT jb FROM c)) AS d(
getpage_wait_seconds_sum numeric,
getpage_prefetch_requests_total numeric,
getpage_sync_requests_total numeric,
compute_getpage_stuck_requests_total numeric,
compute_getpage_max_inflight_stuck_time_ms numeric,
getpage_prefetch_misses_total numeric,
getpage_prefetch_discards_total numeric,
getpage_prefetches_buffered numeric,

View File

@@ -0,0 +1,129 @@
diff --git a/sql/anon.sql b/sql/anon.sql
index 0cdc769..f6cc950 100644
--- a/sql/anon.sql
+++ b/sql/anon.sql
@@ -1141,3 +1141,8 @@ $$
-- TODO : https://en.wikipedia.org/wiki/L-diversity
-- TODO : https://en.wikipedia.org/wiki/T-closeness
+
+-- NEON Patches
+
+GRANT ALL ON SCHEMA anon to neon_superuser;
+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
diff --git a/sql/init.sql b/sql/init.sql
index 7da6553..9b6164b 100644
--- a/sql/init.sql
+++ b/sql/init.sql
@@ -74,50 +74,49 @@ $$
SECURITY LABEL FOR anon ON FUNCTION anon.load_csv IS 'UNTRUSTED';
--- load fake data from a given path
-CREATE OR REPLACE FUNCTION anon.init(
- datapath TEXT
-)
+CREATE OR REPLACE FUNCTION anon.load_fake_data()
RETURNS BOOLEAN
AS $$
DECLARE
- datapath_check TEXT;
success BOOLEAN;
+ sharedir TEXT;
+ datapath TEXT;
BEGIN
- IF anon.is_initialized() THEN
- RAISE NOTICE 'The anon extension is already initialized.';
- RETURN TRUE;
- END IF;
+ datapath := '/extension/anon/';
+ -- find the local extension directory
+ SELECT setting INTO sharedir
+ FROM pg_catalog.pg_config
+ WHERE name = 'SHAREDIR';
SELECT bool_or(results) INTO success
FROM unnest(array[
- anon.load_csv('anon.identifiers_category',datapath||'/identifiers_category.csv'),
- anon.load_csv('anon.identifier',datapath ||'/identifier.csv'),
- anon.load_csv('anon.address',datapath ||'/address.csv'),
- anon.load_csv('anon.city',datapath ||'/city.csv'),
- anon.load_csv('anon.company',datapath ||'/company.csv'),
- anon.load_csv('anon.country',datapath ||'/country.csv'),
- anon.load_csv('anon.email', datapath ||'/email.csv'),
- anon.load_csv('anon.first_name',datapath ||'/first_name.csv'),
- anon.load_csv('anon.iban',datapath ||'/iban.csv'),
- anon.load_csv('anon.last_name',datapath ||'/last_name.csv'),
- anon.load_csv('anon.postcode',datapath ||'/postcode.csv'),
- anon.load_csv('anon.siret',datapath ||'/siret.csv'),
- anon.load_csv('anon.lorem_ipsum',datapath ||'/lorem_ipsum.csv')
+ anon.load_csv('anon.identifiers_category',sharedir || datapath || '/identifiers_category.csv'),
+ anon.load_csv('anon.identifier',sharedir || datapath || '/identifier.csv'),
+ anon.load_csv('anon.address',sharedir || datapath || '/address.csv'),
+ anon.load_csv('anon.city',sharedir || datapath || '/city.csv'),
+ anon.load_csv('anon.company',sharedir || datapath || '/company.csv'),
+ anon.load_csv('anon.country',sharedir || datapath || '/country.csv'),
+ anon.load_csv('anon.email', sharedir || datapath || '/email.csv'),
+ anon.load_csv('anon.first_name',sharedir || datapath || '/first_name.csv'),
+ anon.load_csv('anon.iban',sharedir || datapath || '/iban.csv'),
+ anon.load_csv('anon.last_name',sharedir || datapath || '/last_name.csv'),
+ anon.load_csv('anon.postcode',sharedir || datapath || '/postcode.csv'),
+ anon.load_csv('anon.siret',sharedir || datapath || '/siret.csv'),
+ anon.load_csv('anon.lorem_ipsum',sharedir || datapath || '/lorem_ipsum.csv')
]) results;
RETURN success;
-
END;
$$
- LANGUAGE PLPGSQL
+ LANGUAGE plpgsql
VOLATILE
RETURNS NULL ON NULL INPUT
- PARALLEL UNSAFE -- because load_csv is unsafe
- SECURITY INVOKER
+ PARALLEL UNSAFE -- because of the EXCEPTION
+ SECURITY DEFINER
SET search_path=''
;
-SECURITY LABEL FOR anon ON FUNCTION anon.init(TEXT) IS 'UNTRUSTED';
+
+SECURITY LABEL FOR anon ON FUNCTION anon.load_fake_data IS 'UNTRUSTED';
-- People tend to forget the anon.init() step
-- This is a friendly notice for them
@@ -144,7 +143,7 @@ SECURITY LABEL FOR anon ON FUNCTION anon.notice_if_not_init IS 'UNTRUSTED';
CREATE OR REPLACE FUNCTION anon.load(TEXT)
RETURNS BOOLEAN AS
$$
- SELECT anon.init($1);
+ SELECT anon.init();
$$
LANGUAGE SQL
VOLATILE
@@ -159,16 +158,16 @@ SECURITY LABEL FOR anon ON FUNCTION anon.load(TEXT) IS 'UNTRUSTED';
CREATE OR REPLACE FUNCTION anon.init()
RETURNS BOOLEAN
AS $$
- WITH conf AS (
- -- find the local extension directory
- SELECT setting AS sharedir
- FROM pg_catalog.pg_config
- WHERE name = 'SHAREDIR'
- )
- SELECT anon.init(conf.sharedir || '/extension/anon/')
- FROM conf;
+BEGIN
+ IF anon.is_initialized() THEN
+ RAISE NOTICE 'The anon extension is already initialized.';
+ RETURN TRUE;
+ END IF;
+
+ RETURN anon.load_fake_data();
+END;
$$
- LANGUAGE SQL
+ LANGUAGE plpgsql
VOLATILE
PARALLEL UNSAFE -- because init is unsafe
SECURITY INVOKER

View File

@@ -22,7 +22,7 @@ commands:
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -22,7 +22,7 @@ commands:
- name: local_proxy
user: postgres
sysvInitAction: respawn
shell: 'RUST_LOG="info,proxy::serverless::sql_over_http=warn" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
shell: 'RUST_LOG="error" /usr/local/bin/local_proxy --config-path /etc/local_proxy/config.json --pid-path /etc/local_proxy/pid --http 0.0.0.0:10432'
- name: postgres-exporter
user: nobody
sysvInitAction: respawn

View File

@@ -10,6 +10,7 @@ default = []
testing = ["fail/failpoints"]
[dependencies]
async-compression.workspace = true
base64.workspace = true
aws-config.workspace = true
aws-sdk-s3.workspace = true

View File

@@ -60,12 +60,16 @@ use utils::failpoint_support;
// Compatibility hack: if the control plane specified any remote-ext-config
// use the default value for extension storage proxy gateway.
// Remove this once the control plane is updated to pass the gateway URL
fn parse_remote_ext_config(arg: &str) -> Result<String> {
if arg.starts_with("http") {
Ok(arg.trim_end_matches('/').to_string())
fn parse_remote_ext_base_url(arg: &str) -> Result<String> {
const FALLBACK_PG_EXT_GATEWAY_BASE_URL: &str =
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local";
Ok(if arg.starts_with("http") {
arg
} else {
Ok("http://pg-ext-s3-gateway".to_string())
FALLBACK_PG_EXT_GATEWAY_BASE_URL
}
.to_owned())
}
#[derive(Parser)]
@@ -74,8 +78,10 @@ struct Cli {
#[arg(short = 'b', long, default_value = "postgres", env = "POSTGRES_PATH")]
pub pgbin: String,
#[arg(short = 'r', long, value_parser = parse_remote_ext_config)]
pub remote_ext_config: Option<String>,
/// The base URL for the remote extension storage proxy gateway.
/// Should be in the form of `http(s)://<gateway-hostname>[:<port>]`.
#[arg(short = 'r', long, value_parser = parse_remote_ext_base_url, alias = "remote-ext-config")]
pub remote_ext_base_url: Option<String>,
/// The port to bind the external listening HTTP server to. Clients running
/// outside the compute will talk to the compute through this port. Keep
@@ -164,7 +170,7 @@ fn main() -> Result<()> {
pgversion: get_pg_version_string(&cli.pgbin),
external_http_port: cli.external_http_port,
internal_http_port: cli.internal_http_port,
ext_remote_storage: cli.remote_ext_config.clone(),
remote_ext_base_url: cli.remote_ext_base_url.clone(),
resize_swap_on_bind: cli.resize_swap_on_bind,
set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
#[cfg(target_os = "linux")]
@@ -265,4 +271,18 @@ mod test {
fn verify_cli() {
Cli::command().debug_assert()
}
#[test]
fn parse_pg_ext_gateway_base_url() {
let arg = "http://pg-ext-s3-gateway2";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(result, arg);
let arg = "pg-ext-s3-gateway";
let result = super::parse_remote_ext_base_url(arg).unwrap();
assert_eq!(
result,
"http://pg-ext-s3-gateway.pg-ext-s3-gateway.svc.cluster.local"
);
}
}

View File

@@ -348,6 +348,7 @@ async fn run_dump_restore(
"--no-security-labels".to_string(),
"--no-subscriptions".to_string(),
"--no-tablespaces".to_string(),
"--no-event-triggers".to_string(),
// format
"--format".to_string(),
"directory".to_string(),

View File

@@ -1,17 +1,10 @@
use std::collections::HashMap;
use std::os::unix::fs::{PermissionsExt, symlink};
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Condvar, Mutex, RwLock};
use std::time::{Duration, Instant};
use std::{env, fs};
use anyhow::{Context, Result};
use chrono::{DateTime, Utc};
use compute_api::privilege::Privilege;
use compute_api::responses::{ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus};
use compute_api::responses::{
ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
LfcPrewarmState,
};
use compute_api::spec::{
ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
};
@@ -25,6 +18,16 @@ use postgres;
use postgres::NoTls;
use postgres::error::SqlState;
use remote_storage::{DownloadError, RemotePath};
use std::collections::HashMap;
use std::net::SocketAddr;
use std::os::unix::fs::{PermissionsExt, symlink};
use std::path::Path;
use std::process::{Command, Stdio};
use std::str::FromStr;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, Condvar, Mutex, RwLock};
use std::time::{Duration, Instant};
use std::{env, fs};
use tokio::spawn;
use tracing::{Instrument, debug, error, info, instrument, warn};
use utils::id::{TenantId, TimelineId};
@@ -92,7 +95,7 @@ pub struct ComputeNodeParams {
pub internal_http_port: u16,
/// the address of extension storage proxy gateway
pub ext_remote_storage: Option<String>,
pub remote_ext_base_url: Option<String>,
}
/// Compute node info shared across several `compute_ctl` threads.
@@ -150,6 +153,9 @@ pub struct ComputeState {
/// set up the span relationship ourselves.
pub startup_span: Option<tracing::span::Span>,
pub lfc_prewarm_state: LfcPrewarmState,
pub lfc_offload_state: LfcOffloadState,
pub metrics: ComputeMetrics,
}
@@ -163,6 +169,8 @@ impl ComputeState {
pspec: None,
startup_span: None,
metrics: ComputeMetrics::default(),
lfc_prewarm_state: LfcPrewarmState::default(),
lfc_offload_state: LfcOffloadState::default(),
}
}
@@ -198,6 +206,8 @@ pub struct ParsedSpec {
pub pageserver_connstr: String,
pub safekeeper_connstrings: Vec<String>,
pub storage_auth_token: Option<String>,
pub endpoint_storage_addr: Option<SocketAddr>,
pub endpoint_storage_token: Option<String>,
}
impl TryFrom<ComputeSpec> for ParsedSpec {
@@ -251,6 +261,18 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
.or(Err("invalid timeline id"))?
};
let endpoint_storage_addr: Option<SocketAddr> = spec
.endpoint_storage_addr
.clone()
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"))
.unwrap_or_default()
.parse()
.ok();
let endpoint_storage_token = spec
.endpoint_storage_token
.clone()
.or_else(|| spec.cluster.settings.find("neon.endpoint_storage_token"));
Ok(ParsedSpec {
spec,
pageserver_connstr,
@@ -258,6 +280,8 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
storage_auth_token,
tenant_id,
timeline_id,
endpoint_storage_addr,
endpoint_storage_token,
})
}
}
@@ -305,11 +329,39 @@ struct StartVmMonitorResult {
impl ComputeNode {
pub fn new(params: ComputeNodeParams, config: ComputeConfig) -> Result<Self> {
let connstr = params.connstr.as_str();
let conn_conf = postgres::config::Config::from_str(connstr)
let mut conn_conf = postgres::config::Config::from_str(connstr)
.context("cannot build postgres config from connstr")?;
let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr)
let mut tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr)
.context("cannot build tokio postgres config from connstr")?;
// Users can set some configuration parameters per database with
// ALTER DATABASE ... SET ...
//
// There are at least these parameters:
//
// - role=some_other_role
// - default_transaction_read_only=on
// - statement_timeout=1, i.e., 1ms, which will cause most of the queries to fail
// - search_path=non_public_schema, this should be actually safe because
// we don't call any functions in user databases, but better to always reset
// it to public.
//
// that can affect `compute_ctl` and prevent it from properly configuring the database schema.
// Unset them via connection string options before connecting to the database.
// N.B. keep it in sync with `ZENITH_OPTIONS` in `get_maintenance_client()`.
//
// TODO(ololobus): we currently pass `-c default_transaction_read_only=off` from control plane
// as well. After rolling out this code, we can remove this parameter from control plane.
// In the meantime, double-passing is fine, the last value is applied.
// See: <https://github.com/neondatabase/cloud/blob/133dd8c4dbbba40edfbad475bf6a45073ca63faf/goapp/controlplane/internal/pkg/compute/provisioner/provisioner_common.go#L70>
const EXTRA_OPTIONS: &str = "-c role=cloud_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
let options = match conn_conf.get_options() {
Some(options) => format!("{} {}", options, EXTRA_OPTIONS),
None => EXTRA_OPTIONS.to_string(),
};
conn_conf.options(&options);
tokio_conn_conf.options(&options);
let mut new_state = ComputeState::new();
if let Some(spec) = config.spec {
let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
@@ -736,6 +788,9 @@ impl ComputeNode {
// Log metrics so that we can search for slow operations in logs
info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");
if pspec.spec.prewarm_lfc_on_startup {
self.prewarm_lfc();
}
Ok(())
}
@@ -1422,15 +1477,20 @@ impl ComputeNode {
Err(e) => match e.code() {
Some(&SqlState::INVALID_PASSWORD)
| Some(&SqlState::INVALID_AUTHORIZATION_SPECIFICATION) => {
// Connect with zenith_admin if cloud_admin could not authenticate
// Connect with `zenith_admin` if `cloud_admin` could not authenticate
info!(
"cannot connect to postgres: {}, retrying with `zenith_admin` username",
"cannot connect to Postgres: {}, retrying with 'zenith_admin' username",
e
);
let mut zenith_admin_conf = postgres::config::Config::from(conf.clone());
zenith_admin_conf.application_name("compute_ctl:apply_config");
zenith_admin_conf.user("zenith_admin");
// It doesn't matter what were the options before, here we just want
// to connect and create a new superuser role.
const ZENITH_OPTIONS: &str = "-c role=zenith_admin -c default_transaction_read_only=off -c search_path=public -c statement_timeout=0";
zenith_admin_conf.options(ZENITH_OPTIONS);
let mut client =
zenith_admin_conf.connect(NoTls)
.context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
@@ -1596,9 +1656,7 @@ impl ComputeNode {
self.pg_reload_conf()?;
if spec.mode == ComputeMode::Primary {
let mut conf =
tokio_postgres::Config::from_str(self.params.connstr.as_str()).unwrap();
conf.application_name("apply_config");
let conf = self.get_tokio_conn_conf(Some("compute_ctl:reconfigure"));
let conf = Arc::new(conf);
let spec = Arc::new(spec.clone());
@@ -1838,9 +1896,9 @@ LIMIT 100",
real_ext_name: String,
ext_path: RemotePath,
) -> Result<u64, DownloadError> {
let ext_remote_storage =
let remote_ext_base_url =
self.params
.ext_remote_storage
.remote_ext_base_url
.as_ref()
.ok_or(DownloadError::BadInput(anyhow::anyhow!(
"Remote extensions storage is not configured",
@@ -1902,7 +1960,7 @@ LIMIT 100",
let download_size = extension_server::download_extension(
&real_ext_name,
&ext_path,
ext_remote_storage,
remote_ext_base_url,
&self.params.pgbin,
)
.await
@@ -2011,7 +2069,7 @@ LIMIT 100",
&self,
spec: &ComputeSpec,
) -> Result<RemoteExtensionMetrics> {
if self.params.ext_remote_storage.is_none() {
if self.params.remote_ext_base_url.is_none() {
return Ok(RemoteExtensionMetrics {
num_ext_downloaded: 0,
largest_ext_size: 0,

View File

@@ -0,0 +1,202 @@
use crate::compute::ComputeNode;
use anyhow::{Context, Result, bail};
use async_compression::tokio::bufread::{ZstdDecoder, ZstdEncoder};
use compute_api::responses::LfcOffloadState;
use compute_api::responses::LfcPrewarmState;
use http::StatusCode;
use reqwest::Client;
use std::sync::Arc;
use tokio::{io::AsyncReadExt, spawn};
use tracing::{error, info};
#[derive(serde::Serialize, Default)]
pub struct LfcPrewarmStateWithProgress {
#[serde(flatten)]
base: LfcPrewarmState,
total: i32,
prewarmed: i32,
skipped: i32,
}
/// A pair of url and a token to query endpoint storage for LFC prewarm-related tasks
struct EndpointStoragePair {
url: String,
token: String,
}
const KEY: &str = "lfc_state";
impl TryFrom<&crate::compute::ParsedSpec> for EndpointStoragePair {
type Error = anyhow::Error;
fn try_from(pspec: &crate::compute::ParsedSpec) -> Result<Self, Self::Error> {
let Some(ref endpoint_id) = pspec.spec.endpoint_id else {
bail!("pspec.endpoint_id missing")
};
let Some(ref base_uri) = pspec.endpoint_storage_addr else {
bail!("pspec.endpoint_storage_addr missing")
};
let tenant_id = pspec.tenant_id;
let timeline_id = pspec.timeline_id;
let url = format!("http://{base_uri}/{tenant_id}/{timeline_id}/{endpoint_id}/{KEY}");
let Some(ref token) = pspec.endpoint_storage_token else {
bail!("pspec.endpoint_storage_token missing")
};
let token = token.clone();
Ok(EndpointStoragePair { url, token })
}
}
impl ComputeNode {
// If prewarm failed, we want to get overall number of segments as well as done ones.
// However, this function should be reliable even if querying postgres failed.
pub async fn lfc_prewarm_state(&self) -> LfcPrewarmStateWithProgress {
info!("requesting LFC prewarm state from postgres");
let mut state = LfcPrewarmStateWithProgress::default();
{
state.base = self.state.lock().unwrap().lfc_prewarm_state.clone();
}
let client = match ComputeNode::get_maintenance_client(&self.tokio_conn_conf).await {
Ok(client) => client,
Err(err) => {
error!(%err, "connecting to postgres");
return state;
}
};
let row = match client
.query_one("select * from get_prewarm_info()", &[])
.await
{
Ok(row) => row,
Err(err) => {
error!(%err, "querying LFC prewarm status");
return state;
}
};
state.total = row.try_get(0).unwrap_or_default();
state.prewarmed = row.try_get(1).unwrap_or_default();
state.skipped = row.try_get(2).unwrap_or_default();
state
}
pub fn lfc_offload_state(&self) -> LfcOffloadState {
self.state.lock().unwrap().lfc_offload_state.clone()
}
/// Returns false if there is a prewarm request ongoing, true otherwise
pub fn prewarm_lfc(self: &Arc<Self>) -> bool {
crate::metrics::LFC_PREWARM_REQUESTS.inc();
{
let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
if let LfcPrewarmState::Prewarming =
std::mem::replace(state, LfcPrewarmState::Prewarming)
{
return false;
}
}
let cloned = self.clone();
spawn(async move {
let Err(err) = cloned.prewarm_impl().await else {
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
return;
};
error!(%err);
cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
error: err.to_string(),
};
});
true
}
fn endpoint_storage_pair(&self) -> Result<EndpointStoragePair> {
let state = self.state.lock().unwrap();
state.pspec.as_ref().unwrap().try_into()
}
async fn prewarm_impl(&self) -> Result<()> {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair()?;
info!(%url, "requesting LFC state from endpoint storage");
let request = Client::new().get(&url).bearer_auth(token);
let res = request.send().await.context("querying endpoint storage")?;
let status = res.status();
if status != StatusCode::OK {
bail!("{status} querying endpoint storage")
}
let mut uncompressed = Vec::new();
let lfc_state = res
.bytes()
.await
.context("getting request body from endpoint storage")?;
ZstdDecoder::new(lfc_state.iter().as_slice())
.read_to_end(&mut uncompressed)
.await
.context("decoding LFC state")?;
let uncompressed_len = uncompressed.len();
info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
.await
.context("connecting to postgres")?
.query_one("select prewarm_local_cache($1)", &[&uncompressed])
.await
.context("loading LFC state into postgres")
.map(|_| ())
}
/// Returns false if there is an offload request ongoing, true otherwise
pub fn offload_lfc(self: &Arc<Self>) -> bool {
crate::metrics::LFC_OFFLOAD_REQUESTS.inc();
{
let state = &mut self.state.lock().unwrap().lfc_offload_state;
if let LfcOffloadState::Offloading =
std::mem::replace(state, LfcOffloadState::Offloading)
{
return false;
}
}
let cloned = self.clone();
spawn(async move {
let Err(err) = cloned.offload_lfc_impl().await else {
cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
return;
};
error!(%err);
cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
error: err.to_string(),
};
});
true
}
async fn offload_lfc_impl(&self) -> Result<()> {
let EndpointStoragePair { url, token } = self.endpoint_storage_pair()?;
info!(%url, "requesting LFC state from postgres");
let mut compressed = Vec::new();
ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
.await
.context("connecting to postgres")?
.query_one("select get_local_cache_state()", &[])
.await
.context("querying LFC state")?
.try_get::<usize, &[u8]>(0)
.context("deserializing LFC state")
.map(ZstdEncoder::new)?
.read_to_end(&mut compressed)
.await
.context("compressing LFC state")?;
let compressed_len = compressed.len();
info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");
let request = Client::new().put(url).bearer_auth(token).body(compressed);
match request.send().await {
Ok(res) if res.status() == StatusCode::OK => Ok(()),
Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
Err(err) => Err(err).context("writing to endpoint storage"),
}
}
}

View File

@@ -223,6 +223,9 @@ pub fn write_postgres_conf(
// TODO: tune this after performance testing
writeln!(file, "pgaudit.log_rotation_age=5")?;
// Enable audit logs for pg_session_jwt extension
writeln!(file, "pg_session_jwt.audit_log=on")?;
// Add audit shared_preload_libraries, if they are not present.
//
// The caller who sets the flag is responsible for ensuring that the necessary

View File

@@ -158,14 +158,14 @@ fn parse_pg_version(human_version: &str) -> PostgresMajorVersion {
pub async fn download_extension(
ext_name: &str,
ext_path: &RemotePath,
ext_remote_storage: &str,
remote_ext_base_url: &str,
pgbin: &str,
) -> Result<u64> {
info!("Download extension {:?} from {:?}", ext_name, ext_path);
// TODO add retry logic
let download_buffer =
match download_extension_tar(ext_remote_storage, &ext_path.to_string()).await {
match download_extension_tar(remote_ext_base_url, &ext_path.to_string()).await {
Ok(buffer) => buffer,
Err(error_message) => {
return Err(anyhow::anyhow!(
@@ -272,8 +272,8 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
// Do request to extension storage proxy, e.g.,
// curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst
// using HTTP GET and return the response body as bytes.
async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result<Bytes> {
let uri = format!("{}/{}", ext_remote_storage, ext_path);
async fn download_extension_tar(remote_ext_base_url: &str, ext_path: &str) -> Result<Bytes> {
let uri = format!("{}/{}", remote_ext_base_url, ext_path);
let filename = Path::new(ext_path)
.file_name()
.unwrap_or_else(|| std::ffi::OsStr::new("unknown"))

View File

@@ -1,12 +1,10 @@
use std::collections::HashSet;
use anyhow::{Result, anyhow};
use axum::{RequestExt, body::Body};
use axum_extra::{
TypedHeader,
headers::{Authorization, authorization::Bearer},
};
use compute_api::requests::ComputeClaims;
use compute_api::requests::{COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope};
use futures::future::BoxFuture;
use http::{Request, Response, StatusCode};
use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet};
@@ -25,13 +23,14 @@ pub(in crate::http) struct Authorize {
impl Authorize {
pub fn new(compute_id: String, jwks: JwkSet) -> Self {
let mut validation = Validation::new(Algorithm::EdDSA);
// Nothing is currently required
validation.required_spec_claims = HashSet::new();
validation.validate_exp = true;
// Unused by the control plane
validation.validate_aud = false;
// Unused by the control plane
validation.validate_nbf = false;
// Unused by the control plane
validation.validate_aud = false;
validation.set_audience(&[COMPUTE_AUDIENCE]);
// Nothing is currently required
validation.set_required_spec_claims(&[] as &[&str; 0]);
Self {
compute_id,
@@ -64,11 +63,47 @@ impl AsyncAuthorizeRequest<Body> for Authorize {
Err(e) => return Err(JsonResponse::error(StatusCode::UNAUTHORIZED, e)),
};
if data.claims.compute_id != compute_id {
return Err(JsonResponse::error(
StatusCode::UNAUTHORIZED,
"invalid compute ID in authorization token claims",
));
match data.claims.scope {
// TODO: We should validate audience for every token, but
// instead of this ad-hoc validation, we should turn
// [`Validation::validate_aud`] on. This is merely a stopgap
// while we roll out `aud` deployment. We return a 401
// Unauthorized because when we eventually do use
// [`Validation`], we will hit the above `Err` match arm which
// returns 401 Unauthorized.
Some(ComputeClaimsScope::Admin) => {
let Some(ref audience) = data.claims.audience else {
return Err(JsonResponse::error(
StatusCode::UNAUTHORIZED,
"missing audience in authorization token claims",
));
};
if !audience.iter().any(|a| a == COMPUTE_AUDIENCE) {
return Err(JsonResponse::error(
StatusCode::UNAUTHORIZED,
"invalid audience in authorization token claims",
));
}
}
// If the scope is not [`ComputeClaimsScope::Admin`], then we
// must validate the compute_id
_ => {
let Some(ref claimed_compute_id) = data.claims.compute_id else {
return Err(JsonResponse::error(
StatusCode::FORBIDDEN,
"missing compute_id in authorization token claims",
));
};
if *claimed_compute_id != compute_id {
return Err(JsonResponse::error(
StatusCode::FORBIDDEN,
"invalid compute ID in authorization token claims",
));
}
}
}
// Make claims available to any subsequent middleware or request

View File

@@ -22,7 +22,7 @@ pub(in crate::http) async fn download_extension(
State(compute): State<Arc<ComputeNode>>,
) -> Response {
// Don't even try to download extensions if no remote storage is configured
if compute.params.ext_remote_storage.is_none() {
if compute.params.remote_ext_base_url.is_none() {
return JsonResponse::error(
StatusCode::PRECONDITION_FAILED,
"remote storage is not configured",

View File

@@ -0,0 +1,39 @@
use crate::compute_prewarm::LfcPrewarmStateWithProgress;
use crate::http::JsonResponse;
use axum::response::{IntoResponse, Response};
use axum::{Json, http::StatusCode};
use compute_api::responses::LfcOffloadState;
type Compute = axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>;
pub(in crate::http) async fn prewarm_state(compute: Compute) -> Json<LfcPrewarmStateWithProgress> {
Json(compute.lfc_prewarm_state().await)
}
// Following functions are marked async for axum, as it's more convenient than wrapping these
// in async lambdas at call site
pub(in crate::http) async fn offload_state(compute: Compute) -> Json<LfcOffloadState> {
Json(compute.lfc_offload_state())
}
pub(in crate::http) async fn prewarm(compute: Compute) -> Response {
if compute.prewarm_lfc() {
StatusCode::ACCEPTED.into_response()
} else {
JsonResponse::error(
StatusCode::TOO_MANY_REQUESTS,
"Multiple requests for prewarm are not allowed",
)
}
}
pub(in crate::http) async fn offload(compute: Compute) -> Response {
if compute.offload_lfc() {
StatusCode::ACCEPTED.into_response()
} else {
JsonResponse::error(
StatusCode::TOO_MANY_REQUESTS,
"Multiple requests for prewarm offload are not allowed",
)
}
}

View File

@@ -11,6 +11,7 @@ pub(in crate::http) mod extensions;
pub(in crate::http) mod failpoints;
pub(in crate::http) mod grants;
pub(in crate::http) mod insights;
pub(in crate::http) mod lfc;
pub(in crate::http) mod metrics;
pub(in crate::http) mod metrics_json;
pub(in crate::http) mod status;

View File

@@ -23,7 +23,7 @@ use super::{
middleware::authorize::Authorize,
routes::{
check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
grants, insights, metrics, metrics_json, status, terminate,
grants, insights, lfc, metrics, metrics_json, status, terminate,
},
};
use crate::compute::ComputeNode;
@@ -85,6 +85,8 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
Router::<Arc<ComputeNode>>::new().route("/metrics", get(metrics::get_metrics));
let authenticated_router = Router::<Arc<ComputeNode>>::new()
.route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
.route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
.route("/check_writability", post(check_writability::is_writable))
.route("/configure", post(configure::configure))
.route("/database_schema", get(database_schema::get_schema_dump))

View File

@@ -11,6 +11,7 @@ pub mod http;
pub mod logger;
pub mod catalog;
pub mod compute;
pub mod compute_prewarm;
pub mod disk_quota;
pub mod extension_server;
pub mod installed_extensions;

View File

@@ -1,7 +1,7 @@
use metrics::core::{AtomicF64, AtomicU64, Collector, GenericCounter, GenericGauge};
use metrics::proto::MetricFamily;
use metrics::{
IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
IntCounter, IntCounterVec, IntGaugeVec, UIntGaugeVec, register_gauge, register_int_counter,
register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec,
};
use once_cell::sync::Lazy;
@@ -97,6 +97,24 @@ pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::
.expect("failed to define a metric")
});
/// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm.
/// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"compute_ctl_lfc_prewarm_requests_total",
"Total number of LFC prewarm requests made by compute_ctl",
)
.expect("failed to define a metric")
});
pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
register_int_counter!(
"compute_ctl_lfc_offload_requests_total",
"Total number of LFC offload requests made by compute_ctl",
)
.expect("failed to define a metric")
});
pub fn collect() -> Vec<MetricFamily> {
let mut metrics = COMPUTE_CTL_UP.collect();
metrics.extend(INSTALLED_EXTENSIONS.collect());
@@ -106,5 +124,7 @@ pub fn collect() -> Vec<MetricFamily> {
metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
metrics.extend(PG_CURR_DOWNTIME_MS.collect());
metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
metrics.extend(LFC_PREWARM_REQUESTS.collect());
metrics.extend(LFC_OFFLOAD_REQUESTS.collect());
metrics
}

View File

@@ -424,10 +424,10 @@ pub fn launch_monitor(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
experimental,
};
let span = span!(Level::INFO, "compute_monitor");
thread::Builder::new()
.name("compute-monitor".into())
.spawn(move || {
let span = span!(Level::INFO, "compute_monitor");
let _enter = span.enter();
monitor.run();
})

View File

@@ -30,6 +30,7 @@ mod pg_helpers_tests {
r#"fsync = off
wal_level = logical
hot_standby = on
prewarm_lfc_on_startup = off
neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
wal_log_hints = on
log_connections = on

View File

@@ -41,7 +41,7 @@ storage_broker.workspace = true
http-utils.workspace = true
utils.workspace = true
whoami.workspace = true
endpoint_storage.workspace = true
compute_api.workspace = true
workspace_hack.workspace = true
tracing.workspace = true

View File

@@ -16,10 +16,11 @@ use std::time::Duration;
use anyhow::{Context, Result, anyhow, bail};
use clap::Parser;
use compute_api::requests::ComputeClaimsScope;
use compute_api::spec::ComputeMode;
use control_plane::broker::StorageBroker;
use control_plane::endpoint::ComputeControlPlane;
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_PORT, EndpointStorage};
use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
use control_plane::local_env;
use control_plane::local_env::{
EndpointStorageConf, InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf,
@@ -643,9 +644,10 @@ struct EndpointStartCmdArgs {
#[clap(
long,
help = "Configure the remote extensions storage proxy gateway to request for extensions."
help = "Configure the remote extensions storage proxy gateway URL to request for extensions.",
alias = "remote-ext-config"
)]
remote_ext_config: Option<String>,
remote_ext_base_url: Option<String>,
#[clap(
long,
@@ -705,6 +707,9 @@ struct EndpointStopCmdArgs {
struct EndpointGenerateJwtCmdArgs {
#[clap(help = "Postgres endpoint id")]
endpoint_id: String,
#[clap(short = 's', long, help = "Scope to generate the JWT with", value_parser = ComputeClaimsScope::from_str)]
scope: Option<ComputeClaimsScope>,
}
#[derive(clap::Subcommand)]
@@ -1018,7 +1023,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
})
.collect(),
endpoint_storage: EndpointStorageConf {
port: ENDPOINT_STORAGE_DEFAULT_PORT,
listen_addr: ENDPOINT_STORAGE_DEFAULT_ADDR,
},
pg_distrib_dir: None,
neon_distrib_dir: None,
@@ -1410,9 +1415,16 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
EndpointCmd::Start(args) => {
let endpoint_id = &args.endpoint_id;
let pageserver_id = args.endpoint_pageserver_id;
let remote_ext_config = &args.remote_ext_config;
let remote_ext_base_url = &args.remote_ext_base_url;
let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new);
let default_generation = env
.storage_controller
.timelines_onto_safekeepers
.then_some(1);
let safekeepers_generation = args
.safekeepers_generation
.or(default_generation)
.map(SafekeeperGeneration::new);
// If --safekeepers argument is given, use only the listed
// safekeeper nodes; otherwise all from the env.
let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
@@ -1484,14 +1496,29 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
None
};
let exp = (std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH)?
+ Duration::from_secs(86400))
.as_secs();
let claims = endpoint_storage::claims::EndpointStorageClaims {
tenant_id: endpoint.tenant_id,
timeline_id: endpoint.timeline_id,
endpoint_id: endpoint_id.to_string(),
exp,
};
let endpoint_storage_token = env.generate_auth_token(&claims)?;
let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();
println!("Starting existing endpoint {endpoint_id}...");
endpoint
.start(
&auth_token,
endpoint_storage_token,
endpoint_storage_addr,
safekeepers_generation,
safekeepers,
pageservers,
remote_ext_config.as_ref(),
remote_ext_base_url.as_ref(),
stripe_size.0 as usize,
args.create_test_user,
args.start_timeout,
@@ -1540,12 +1567,16 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
endpoint.stop(&args.mode, args.destroy)?;
}
EndpointCmd::GenerateJwt(args) => {
let endpoint_id = &args.endpoint_id;
let endpoint = cplane
.endpoints
.get(endpoint_id)
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
let jwt = endpoint.generate_jwt()?;
let endpoint = {
let endpoint_id = &args.endpoint_id;
cplane
.endpoints
.get(endpoint_id)
.with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?
};
let jwt = endpoint.generate_jwt(args.scope)?;
print!("{jwt}");
}

View File

@@ -45,7 +45,9 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use anyhow::{Context, Result, anyhow, bail};
use compute_api::requests::{ComputeClaims, ConfigurationRequest};
use compute_api::requests::{
COMPUTE_AUDIENCE, ComputeClaims, ComputeClaimsScope, ConfigurationRequest,
};
use compute_api::responses::{
ComputeConfig, ComputeCtlConfig, ComputeStatus, ComputeStatusResponse, TlsConfig,
};
@@ -630,9 +632,17 @@ impl Endpoint {
}
/// Generate a JWT with the correct claims.
pub fn generate_jwt(&self) -> Result<String> {
pub fn generate_jwt(&self, scope: Option<ComputeClaimsScope>) -> Result<String> {
self.env.generate_auth_token(&ComputeClaims {
compute_id: self.endpoint_id.clone(),
audience: match scope {
Some(ComputeClaimsScope::Admin) => Some(vec![COMPUTE_AUDIENCE.to_owned()]),
_ => None,
},
compute_id: match scope {
Some(ComputeClaimsScope::Admin) => None,
_ => Some(self.endpoint_id.clone()),
},
scope,
})
}
@@ -640,10 +650,12 @@ impl Endpoint {
pub async fn start(
&self,
auth_token: &Option<String>,
endpoint_storage_token: String,
endpoint_storage_addr: String,
safekeepers_generation: Option<SafekeeperGeneration>,
safekeepers: Vec<NodeId>,
pageservers: Vec<(Host, u16)>,
remote_ext_config: Option<&String>,
remote_ext_base_url: Option<&String>,
shard_stripe_size: usize,
create_test_user: bool,
start_timeout: Duration,
@@ -733,6 +745,9 @@ impl Endpoint {
drop_subscriptions_before_start: self.drop_subscriptions_before_start,
audit_log_level: ComputeAudit::Disabled,
logs_export_host: None::<String>,
endpoint_storage_addr: Some(endpoint_storage_addr),
endpoint_storage_token: Some(endpoint_storage_token),
prewarm_lfc_on_startup: false,
};
// this strange code is needed to support respec() in tests
@@ -810,8 +825,8 @@ impl Endpoint {
.stderr(logfile.try_clone()?)
.stdout(logfile);
if let Some(remote_ext_config) = remote_ext_config {
cmd.args(["--remote-ext-config", remote_ext_config]);
if let Some(remote_ext_base_url) = remote_ext_base_url {
cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
}
let child = cmd.spawn()?;
@@ -903,7 +918,7 @@ impl Endpoint {
self.external_http_address.port()
),
)
.bearer_auth(self.generate_jwt()?)
.bearer_auth(self.generate_jwt(None::<ComputeClaimsScope>)?)
.send()
.await?;
@@ -980,7 +995,7 @@ impl Endpoint {
self.external_http_address.port()
))
.header(CONTENT_TYPE.as_str(), "application/json")
.bearer_auth(self.generate_jwt()?)
.bearer_auth(self.generate_jwt(None::<ComputeClaimsScope>)?)
.body(
serde_json::to_string(&ConfigurationRequest {
spec,

View File

@@ -3,17 +3,19 @@ use crate::local_env::LocalEnv;
use anyhow::{Context, Result};
use camino::Utf8PathBuf;
use std::io::Write;
use std::net::SocketAddr;
use std::time::Duration;
/// Directory within .neon which will be used by default for LocalFs remote storage.
pub const ENDPOINT_STORAGE_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/endpoint_storage";
pub const ENDPOINT_STORAGE_DEFAULT_PORT: u16 = 9993;
pub const ENDPOINT_STORAGE_DEFAULT_ADDR: SocketAddr =
SocketAddr::new(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST), 9993);
pub struct EndpointStorage {
pub bin: Utf8PathBuf,
pub data_dir: Utf8PathBuf,
pub pemfile: Utf8PathBuf,
pub port: u16,
pub addr: SocketAddr,
}
impl EndpointStorage {
@@ -22,7 +24,7 @@ impl EndpointStorage {
bin: Utf8PathBuf::from_path_buf(env.endpoint_storage_bin()).unwrap(),
data_dir: Utf8PathBuf::from_path_buf(env.endpoint_storage_data_dir()).unwrap(),
pemfile: Utf8PathBuf::from_path_buf(env.public_key_path.clone()).unwrap(),
port: env.endpoint_storage.port,
addr: env.endpoint_storage.listen_addr,
}
}
@@ -31,7 +33,7 @@ impl EndpointStorage {
}
fn listen_addr(&self) -> Utf8PathBuf {
format!("127.0.0.1:{}", self.port).into()
format!("{}:{}", self.addr.ip(), self.addr.port()).into()
}
pub fn init(&self) -> Result<()> {

View File

@@ -20,7 +20,9 @@ use utils::auth::encode_from_key_file;
use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
use crate::broker::StorageBroker;
use crate::endpoint_storage::{ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage};
use crate::endpoint_storage::{
ENDPOINT_STORAGE_DEFAULT_ADDR, ENDPOINT_STORAGE_REMOTE_STORAGE_DIR, EndpointStorage,
};
use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
use crate::safekeeper::SafekeeperNode;
@@ -151,10 +153,10 @@ pub struct NeonLocalInitConf {
pub generate_local_ssl_certs: bool,
}
#[derive(Serialize, Default, Deserialize, PartialEq, Eq, Clone, Debug)]
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
#[serde(default)]
pub struct EndpointStorageConf {
pub port: u16,
pub listen_addr: SocketAddr,
}
/// Broker config for cluster internal communication.
@@ -241,6 +243,14 @@ impl Default for NeonStorageControllerConf {
}
}
impl Default for EndpointStorageConf {
fn default() -> Self {
Self {
listen_addr: ENDPOINT_STORAGE_DEFAULT_ADDR,
}
}
}
impl NeonBroker {
pub fn client_url(&self) -> Url {
let url = if let Some(addr) = self.listen_https_addr {

View File

@@ -112,7 +112,7 @@ impl SafekeeperNode {
}
/// Initializes a safekeeper node by creating all necessary files,
/// e.g. SSL certificates.
/// e.g. SSL certificates and JWT token file.
pub fn initialize(&self) -> anyhow::Result<()> {
if self.env.generate_local_ssl_certs {
self.env.generate_ssl_cert(
@@ -120,6 +120,17 @@ impl SafekeeperNode {
&self.datadir_path().join("server.key"),
)?;
}
// Generate a token file for authentication with other safekeepers
if self.conf.auth_enabled {
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
let token_path = self.datadir_path().join("peer_jwt_token");
std::fs::write(token_path, token)?;
}
Ok(())
}
@@ -218,14 +229,26 @@ impl SafekeeperNode {
args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap()));
}
if self.conf.auth_enabled {
let token_path = self.datadir_path().join("peer_jwt_token");
let token_path_str = token_path
.to_str()
.with_context(|| {
format!("Token path {token_path:?} cannot be represented as a unicode string")
})?
.to_owned();
args.extend(["--auth-token-path".to_owned(), token_path_str]);
}
args.extend_from_slice(extra_opts);
let env_variables = Vec::new();
background_process::start_process(
&format!("safekeeper-{id}"),
&datadir,
&self.env.safekeeper_bin(),
&args,
self.safekeeper_env_variables()?,
env_variables,
background_process::InitialPidFile::Expect(self.pid_file()),
retry_timeout,
|| async {
@@ -239,18 +262,6 @@ impl SafekeeperNode {
.await
}
fn safekeeper_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
// Generate a token to connect from safekeeper to peers
if self.conf.auth_enabled {
let token = self
.env
.generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
Ok(vec![("SAFEKEEPER_AUTH_TOKEN".to_owned(), token)])
} else {
Ok(Vec::new())
}
}
///
/// Stop the server.
///

View File

@@ -10,7 +10,8 @@ use camino::{Utf8Path, Utf8PathBuf};
use hyper0::Uri;
use nix::unistd::Pid;
use pageserver_api::controller_api::{
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest,
SafekeeperSchedulingPolicyRequest, SkSchedulingPolicy, TenantCreateRequest,
TenantCreateResponse, TenantLocateResponse,
};
use pageserver_api::models::{
@@ -20,7 +21,7 @@ use pageserver_api::shard::TenantShardId;
use pageserver_client::mgmt_api::ResponseErrorMessageExt;
use pem::Pem;
use postgres_backend::AuthType;
use reqwest::Method;
use reqwest::{Method, Response};
use serde::de::DeserializeOwned;
use serde::{Deserialize, Serialize};
use tokio::process::Command;
@@ -570,6 +571,11 @@ impl StorageController {
let peer_jwt_token = encode_from_key_file(&peer_claims, private_key)
.expect("failed to generate jwt token");
args.push(format!("--peer-jwt-token={peer_jwt_token}"));
let claims = Claims::new(None, Scope::SafekeeperData);
let jwt_token =
encode_from_key_file(&claims, private_key).expect("failed to generate jwt token");
args.push(format!("--safekeeper-jwt-token={jwt_token}"));
}
if let Some(public_key) = &self.public_key {
@@ -614,6 +620,10 @@ impl StorageController {
self.env.base_data_dir.display()
));
if self.env.safekeepers.iter().any(|sk| sk.auth_enabled) && self.private_key.is_none() {
anyhow::bail!("Safekeeper set up for auth but no private key specified");
}
if self.config.timelines_onto_safekeepers {
args.push("--timelines-onto-safekeepers".to_string());
}
@@ -640,6 +650,10 @@ impl StorageController {
)
.await?;
if self.config.timelines_onto_safekeepers {
self.register_safekeepers().await?;
}
Ok(())
}
@@ -743,6 +757,23 @@ impl StorageController {
where
RQ: Serialize + Sized,
RS: DeserializeOwned + Sized,
{
let response = self.dispatch_inner(method, path, body).await?;
Ok(response
.json()
.await
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
}
/// Simple HTTP request wrapper for calling into storage controller
async fn dispatch_inner<RQ>(
&self,
method: reqwest::Method,
path: String,
body: Option<RQ>,
) -> anyhow::Result<Response>
where
RQ: Serialize + Sized,
{
// In the special case of the `storage_controller start` subcommand, we wish
// to use the API endpoint of the newly started storage controller in order
@@ -785,10 +816,31 @@ impl StorageController {
let response = builder.send().await?;
let response = response.error_from_body().await?;
Ok(response
.json()
.await
.map_err(pageserver_client::mgmt_api::Error::ReceiveBody)?)
Ok(response)
}
/// Register the safekeepers in the storage controller
#[instrument(skip(self))]
async fn register_safekeepers(&self) -> anyhow::Result<()> {
for sk in self.env.safekeepers.iter() {
let sk_id = sk.id;
let body = serde_json::json!({
"id": sk_id,
"created_at": "2023-10-25T09:11:25Z",
"updated_at": "2024-08-28T11:32:43Z",
"region_id": "aws-us-east-2",
"host": "127.0.0.1",
"port": sk.pg_port,
"http_port": sk.http_port,
"https_port": sk.https_port,
"version": 5957,
"availability_zone_id": format!("us-east-2b-{sk_id}"),
});
self.upsert_safekeeper(sk_id, body).await?;
self.safekeeper_scheduling_policy(sk_id, SkSchedulingPolicy::Active)
.await?;
}
Ok(())
}
/// Call into the attach_hook API, for use before handing out attachments to pageservers
@@ -816,6 +868,42 @@ impl StorageController {
Ok(response.generation)
}
#[instrument(skip(self))]
pub async fn upsert_safekeeper(
&self,
node_id: NodeId,
request: serde_json::Value,
) -> anyhow::Result<()> {
let resp = self
.dispatch_inner::<serde_json::Value>(
Method::POST,
format!("control/v1/safekeeper/{node_id}"),
Some(request),
)
.await?;
if !resp.status().is_success() {
anyhow::bail!(
"setting scheduling policy unsuccessful for safekeeper {node_id}: {}",
resp.status()
);
}
Ok(())
}
#[instrument(skip(self))]
pub async fn safekeeper_scheduling_policy(
&self,
node_id: NodeId,
scheduling_policy: SkSchedulingPolicy,
) -> anyhow::Result<()> {
self.dispatch::<SafekeeperSchedulingPolicyRequest, ()>(
Method::POST,
format!("control/v1/safekeeper/{node_id}/scheduling_policy"),
Some(SafekeeperSchedulingPolicyRequest { scheduling_policy }),
)
.await
}
#[instrument(skip(self))]
pub async fn inspect(
&self,

View File

@@ -65,7 +65,7 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
rm -rf "${TMPDIR}"
# Apply patches
docker compose exec -i neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
# We are running tests now
rm -f testout.txt testout_contrib.txt
docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,postgis-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \

View File

@@ -0,0 +1,99 @@
# PostgreSQL Extensions for Testing
This directory contains PostgreSQL extensions used primarily for:
1. Testing extension upgrades between different Compute versions
2. Running regression tests with regular users (mostly for cloud instances)
## Directory Structure
Each extension directory follows a standard structure:
- `extension-name-src/` - Directory containing test files for the extension
- `test-upgrade.sh` - Script for testing upgrade scenarios
- `regular-test.sh` - Script for testing with regular users
- Additional test files depending on the extension
## Available Extensions
This directory includes the following extensions:
- `hll-src` - HyperLogLog, a fixed-size data structure for approximating cardinality
- `hypopg-src` - Extension to create hypothetical indexes
- `ip4r-src` - IPv4/v6 and subnet data types
- `pg_cron-src` - Run periodic jobs in PostgreSQL
- `pg_graphql-src` - GraphQL support for PostgreSQL
- `pg_hint_plan-src` - Execution plan hints
- `pg_ivm-src` - Incremental view maintenance
- `pg_jsonschema-src` - JSON Schema validation
- `pg_repack-src` - Reorganize tables with minimal locks
- `pg_roaringbitmap-src` - Roaring bitmap implementation
- `pg_semver-src` - Semantic version data type
- `pg_session_jwt-src` - JWT authentication for PostgreSQL
- `pg_tiktoken-src` - OpenAI Tiktoken tokenizer
- `pg_uuidv7-src` - UUIDv7 implementation for PostgreSQL
- `pgjwt-src` - JWT tokens for PostgreSQL
- `pgrag-src` - Retrieval Augmented Generation for PostgreSQL
- `pgtap-src` - Unit testing framework for PostgreSQL
- `pgvector-src` - Vector similarity search
- `pgx_ulid-src` - ULID data type
- `plv8-src` - JavaScript language for PostgreSQL stored procedures
- `postgresql-unit-src` - SI units for PostgreSQL
- `prefix-src` - Prefix matching for strings
- `rag_bge_small_en_v15-src` - BGE embedding model for RAG
- `rag_jina_reranker_v1_tiny_en-src` - Jina reranker model for RAG
- `rum-src` - RUM access method for text search
## Usage
### Extension Upgrade Testing
The extensions in this directory are used by the `test-upgrade.sh` script to test upgrading extensions between different versions of Neon Compute nodes. The script:
1. Creates a database with extensions installed on an old Compute version
2. Creates timelines for each extension
3. Switches to a new Compute version and tests the upgrade process
4. Verifies extension functionality after upgrade
### Regular User Testing
For testing with regular users (particularly for cloud instances), each extension directory typically contains a `regular-test.sh` script that:
1. Drops the database if it exists
2. Creates a fresh test database
3. Installs the extension
4. Runs regression tests
A note about pg_regress: Since pg_regress attempts to set `lc_messages` for the database by default, which is forbidden for regular users, we create databases manually and use the `--use-existing` option to bypass this limitation.
### CI Workflows
Two main workflows use these extensions:
1. **Cloud Extensions Test** - Tests extensions on Neon cloud projects
2. **Force Test Upgrading of Extension** - Tests upgrading extensions between different Compute versions
These workflows are integrated into the build-and-test pipeline through shell scripts:
- `docker_compose_test.sh` - Tests extensions in a Docker Compose environment
- `test_extensions_upgrade.sh` - Tests extension upgrades between different Compute versions
## Adding New Extensions
To add a new extension for testing:
1. Create a directory named `extension-name-src` in this directory
2. Add at minimum:
- `regular-test.sh` for testing with regular users
- If `regular-test.sh` doesn't exist, the system will look for `neon-test.sh`
- If neither exists, it will try to run `make installcheck`
- `test-upgrade.sh` is only needed if you want to test upgrade scenarios
3. Update the list of extensions in the `test_extensions_upgrade.sh` script if needed for upgrade testing
### Patching Extension Sources
If you need to patch the extension sources:
1. Place the patch file in the extension's directory
2. Apply the patch in the appropriate script (`test-upgrade.sh`, `neon-test.sh`, `regular-test.sh`, or `Makefile`)
3. The patch will be applied during the testing process

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
dropdb --if-exists contrib_regression
createdb contrib_regression
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression setup add_agg agg_oob auto_sparse card_op cast_shape copy_binary cumulative_add_cardinality_correction cumulative_add_comprehensive_promotion cumulative_add_sparse_edge cumulative_add_sparse_random cumulative_add_sparse_step cumulative_union_comprehensive cumulative_union_explicit_explicit cumulative_union_explicit_promotion cumulative_union_probabilistic_probabilistic cumulative_union_sparse_full_representation cumulative_union_sparse_promotion cumulative_union_sparse_sparse disable_hashagg equal explicit_thresh hash hash_any meta_func murmur_bigint murmur_bytea nosparse notequal scalar_oob storedproc transaction typmod typmod_insert union_op

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exists contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --inputdir=test --dbname=contrib_regression hypopg hypo_brin hypo_index_part hypo_include hypo_hash hypo_hide_index

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression ip4r ip4r-softerr ip4r-v11

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_cron-test

View File

@@ -0,0 +1,23 @@
#!/bin/bash
set -ex
cd "$(dirname "${0}")"
PGXS="$(dirname "$(pg_config --pgxs)" )"
REGRESS="${PGXS}/../test/regress/pg_regress"
TESTDIR="test"
TESTS=$(ls "${TESTDIR}/sql" | sort )
TESTS=${TESTS//\.sql/}
TESTS=${TESTS/empty_mutations/}
TESTS=${TESTS/function_return_row_is_selectable/}
TESTS=${TESTS/issue_300/}
TESTS=${TESTS/permissions_connection_column/}
TESTS=${TESTS/permissions_functions/}
TESTS=${TESTS/permissions_node_column/}
TESTS=${TESTS/permissions_table_level/}
TESTS=${TESTS/permissions_types/}
TESTS=${TESTS/row_level_security/}
TESTS=${TESTS/sqli_connection/}
dropdb --if-exist contrib_regression
createdb contrib_regression
psql -v ON_ERROR_STOP=1 -f test/fixtures.sql -d contrib_regression
${REGRESS} --use-existing --dbname=contrib_regression --inputdir=${TESTDIR} ${TESTS}

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --encoding=UTF8 --dbname=contrib_regression init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-W ut-T ut-fini hints_anywhere plpgsql oldextversions

View File

@@ -0,0 +1,9 @@
#!/bin/sh
set -ex
dropdb --if-exist contrib_regression
createdb contrib_regression
cd "$(dirname ${0})"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
patch -p1 <regular.patch
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression pg_ivm create_immv refresh_immv
patch -R -p1 <regular.patch

View File

@@ -0,0 +1,309 @@
diff --git a/expected/pg_ivm.out b/expected/pg_ivm.out
index e8798ee..4081680 100644
--- a/expected/pg_ivm.out
+++ b/expected/pg_ivm.out
@@ -1363,61 +1363,6 @@ SELECT * FROM mv ORDER BY i;
| 2 | 4 | 2 | 2 | 2
(1 row)
-ROLLBACK;
--- IMMV containing user defined type
-BEGIN;
-CREATE TYPE mytype;
-CREATE FUNCTION mytype_in(cstring)
- RETURNS mytype AS 'int4in'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-NOTICE: return type mytype is only a shell
-CREATE FUNCTION mytype_out(mytype)
- RETURNS cstring AS 'int4out'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-NOTICE: argument type mytype is only a shell
-CREATE TYPE mytype (
- LIKE = int4,
- INPUT = mytype_in,
- OUTPUT = mytype_out
-);
-CREATE FUNCTION mytype_eq(mytype, mytype)
- RETURNS bool AS 'int4eq'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE FUNCTION mytype_lt(mytype, mytype)
- RETURNS bool AS 'int4lt'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE FUNCTION mytype_cmp(mytype, mytype)
- RETURNS integer AS 'btint4cmp'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE OPERATOR = (
- leftarg = mytype, rightarg = mytype,
- procedure = mytype_eq);
-CREATE OPERATOR < (
- leftarg = mytype, rightarg = mytype,
- procedure = mytype_lt);
-CREATE OPERATOR CLASS mytype_ops
- DEFAULT FOR TYPE mytype USING btree AS
- OPERATOR 1 <,
- OPERATOR 3 = ,
- FUNCTION 1 mytype_cmp(mytype,mytype);
-CREATE TABLE t_mytype (x mytype);
-SELECT create_immv('mv_mytype',
- 'SELECT * FROM t_mytype');
-NOTICE: could not create an index on immv "mv_mytype" automatically
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
-HINT: Create an index on the immv for efficient incremental maintenance.
- create_immv
--------------
- 0
-(1 row)
-
-INSERT INTO t_mytype VALUES ('1'::mytype);
-SELECT * FROM mv_mytype;
- x
----
- 1
-(1 row)
-
ROLLBACK;
-- outer join is not supported
SELECT create_immv('mv(a,b)',
@@ -1510,112 +1455,6 @@ SELECT create_immv('mv_ivm_only_values1', 'values(1)');
ERROR: VALUES is not supported on incrementally maintainable materialized view
SELECT create_immv('mv_ivm_only_values2', 'SELECT * FROM (values(1)) AS tmp');
ERROR: VALUES is not supported on incrementally maintainable materialized view
--- views containing base tables with Row Level Security
-DROP USER IF EXISTS ivm_admin;
-NOTICE: role "ivm_admin" does not exist, skipping
-DROP USER IF EXISTS ivm_user;
-NOTICE: role "ivm_user" does not exist, skipping
-CREATE USER ivm_admin;
-CREATE USER ivm_user;
---- create a table with RLS
-SET SESSION AUTHORIZATION ivm_admin;
-CREATE TABLE rls_tbl(id int, data text, owner name);
-INSERT INTO rls_tbl VALUES
- (1,'foo','ivm_user'),
- (2,'bar','postgres');
-CREATE TABLE num_tbl(id int, num text);
-INSERT INTO num_tbl VALUES
- (1,'one'),
- (2,'two'),
- (3,'three'),
- (4,'four'),
- (5,'five'),
- (6,'six');
---- Users can access only their own rows
-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
-GRANT ALL on rls_tbl TO PUBLIC;
-GRANT ALL on num_tbl TO PUBLIC;
---- create a view owned by ivm_user
-SET SESSION AUTHORIZATION ivm_user;
-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
-NOTICE: could not create an index on immv "ivm_rls" automatically
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
-HINT: Create an index on the immv for efficient incremental maintenance.
- create_immv
--------------
- 1
-(1 row)
-
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
- id | data | owner
-----+------+----------
- 1 | foo | ivm_user
-(1 row)
-
-RESET SESSION AUTHORIZATION;
---- inserts rows owned by different users
-INSERT INTO rls_tbl VALUES
- (3,'baz','ivm_user'),
- (4,'qux','postgres');
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
- id | data | owner
-----+------+----------
- 1 | foo | ivm_user
- 3 | baz | ivm_user
-(2 rows)
-
---- combination of diffent kinds of commands
-WITH
- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
-SELECT;
---
-(1 row)
-
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
- id | data | owner
-----+-------+----------
- 2 | bar | ivm_user
- 3 | baz | ivm_user
- 6 | corge | ivm_user
-(3 rows)
-
----
-SET SESSION AUTHORIZATION ivm_user;
-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
-NOTICE: could not create an index on immv "ivm_rls2" automatically
-DETAIL: This target list does not have all the primary key columns, or this view does not contain GROUP BY or DISTINCT clause.
-HINT: Create an index on the immv for efficient incremental maintenance.
- create_immv
--------------
- 3
-(1 row)
-
-RESET SESSION AUTHORIZATION;
-WITH
- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
-SELECT;
---
-(1 row)
-
-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
- id | data | owner | num
-----+-------+----------+---------
- 2 | bar | ivm_user | two
- 3 | baz_2 | ivm_user | three_2
- 6 | corge | ivm_user | six
-(3 rows)
-
-DROP TABLE rls_tbl CASCADE;
-NOTICE: drop cascades to 2 other objects
-DETAIL: drop cascades to table ivm_rls
-drop cascades to table ivm_rls2
-DROP TABLE num_tbl CASCADE;
-DROP USER ivm_user;
-DROP USER ivm_admin;
-- automatic index creation
BEGIN;
CREATE TABLE base_a (i int primary key, j int);
diff --git a/sql/pg_ivm.sql b/sql/pg_ivm.sql
index d3c1a01..203213d 100644
--- a/sql/pg_ivm.sql
+++ b/sql/pg_ivm.sql
@@ -454,53 +454,6 @@ DELETE FROM base_t WHERE v = 5;
SELECT * FROM mv ORDER BY i;
ROLLBACK;
--- IMMV containing user defined type
-BEGIN;
-
-CREATE TYPE mytype;
-CREATE FUNCTION mytype_in(cstring)
- RETURNS mytype AS 'int4in'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE FUNCTION mytype_out(mytype)
- RETURNS cstring AS 'int4out'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE TYPE mytype (
- LIKE = int4,
- INPUT = mytype_in,
- OUTPUT = mytype_out
-);
-
-CREATE FUNCTION mytype_eq(mytype, mytype)
- RETURNS bool AS 'int4eq'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE FUNCTION mytype_lt(mytype, mytype)
- RETURNS bool AS 'int4lt'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-CREATE FUNCTION mytype_cmp(mytype, mytype)
- RETURNS integer AS 'btint4cmp'
- LANGUAGE INTERNAL STRICT IMMUTABLE;
-
-CREATE OPERATOR = (
- leftarg = mytype, rightarg = mytype,
- procedure = mytype_eq);
-CREATE OPERATOR < (
- leftarg = mytype, rightarg = mytype,
- procedure = mytype_lt);
-
-CREATE OPERATOR CLASS mytype_ops
- DEFAULT FOR TYPE mytype USING btree AS
- OPERATOR 1 <,
- OPERATOR 3 = ,
- FUNCTION 1 mytype_cmp(mytype,mytype);
-
-CREATE TABLE t_mytype (x mytype);
-SELECT create_immv('mv_mytype',
- 'SELECT * FROM t_mytype');
-INSERT INTO t_mytype VALUES ('1'::mytype);
-SELECT * FROM mv_mytype;
-
-ROLLBACK;
-
-- outer join is not supported
SELECT create_immv('mv(a,b)',
'SELECT a.i, b.i FROM mv_base_a a LEFT JOIN mv_base_b b ON a.i=b.i');
@@ -579,71 +532,6 @@ SELECT create_immv('mv_ivm31', 'SELECT sum(i)/sum(j) FROM mv_base_a');
SELECT create_immv('mv_ivm_only_values1', 'values(1)');
SELECT create_immv('mv_ivm_only_values2', 'SELECT * FROM (values(1)) AS tmp');
-
--- views containing base tables with Row Level Security
-DROP USER IF EXISTS ivm_admin;
-DROP USER IF EXISTS ivm_user;
-CREATE USER ivm_admin;
-CREATE USER ivm_user;
-
---- create a table with RLS
-SET SESSION AUTHORIZATION ivm_admin;
-CREATE TABLE rls_tbl(id int, data text, owner name);
-INSERT INTO rls_tbl VALUES
- (1,'foo','ivm_user'),
- (2,'bar','postgres');
-CREATE TABLE num_tbl(id int, num text);
-INSERT INTO num_tbl VALUES
- (1,'one'),
- (2,'two'),
- (3,'three'),
- (4,'four'),
- (5,'five'),
- (6,'six');
-
---- Users can access only their own rows
-CREATE POLICY rls_tbl_policy ON rls_tbl FOR SELECT TO PUBLIC USING(owner = current_user);
-ALTER TABLE rls_tbl ENABLE ROW LEVEL SECURITY;
-GRANT ALL on rls_tbl TO PUBLIC;
-GRANT ALL on num_tbl TO PUBLIC;
-
---- create a view owned by ivm_user
-SET SESSION AUTHORIZATION ivm_user;
-SELECT create_immv('ivm_rls', 'SELECT * FROM rls_tbl');
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
-RESET SESSION AUTHORIZATION;
-
---- inserts rows owned by different users
-INSERT INTO rls_tbl VALUES
- (3,'baz','ivm_user'),
- (4,'qux','postgres');
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
-
---- combination of diffent kinds of commands
-WITH
- i AS (INSERT INTO rls_tbl VALUES(5,'quux','postgres'), (6,'corge','ivm_user')),
- u AS (UPDATE rls_tbl SET owner = 'postgres' WHERE id = 1),
- u2 AS (UPDATE rls_tbl SET owner = 'ivm_user' WHERE id = 2)
-SELECT;
-SELECT id, data, owner FROM ivm_rls ORDER BY 1,2,3;
-
----
-SET SESSION AUTHORIZATION ivm_user;
-SELECT create_immv('ivm_rls2', 'SELECT * FROM rls_tbl JOIN num_tbl USING(id)');
-RESET SESSION AUTHORIZATION;
-
-WITH
- x AS (UPDATE rls_tbl SET data = data || '_2' where id in (3,4)),
- y AS (UPDATE num_tbl SET num = num || '_2' where id in (3,4))
-SELECT;
-SELECT * FROM ivm_rls2 ORDER BY 1,2,3;
-
-DROP TABLE rls_tbl CASCADE;
-DROP TABLE num_tbl CASCADE;
-
-DROP USER ivm_user;
-DROP USER ivm_admin;
-
-- automatic index creation
BEGIN;
CREATE TABLE base_a (i int primary key, j int);

View File

@@ -1,8 +1,13 @@
EXTENSION = pg_jsonschema
DATA = pg_jsonschema--1.0.sql
REGRESS = jsonschema_valid_api jsonschema_edge_cases
REGRESS_OPTS = --load-extension=pg_jsonschema
PG_CONFIG ?= pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
.PHONY installcheck:
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression roaringbitmap

View File

@@ -0,0 +1,12 @@
#!/bin/bash
set -ex
# For v16 it's required to create a type which is impossible without superuser access
# do not run this test so far
if [[ "${PG_VERSION}" = v16 ]]; then
exit 0
fi
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression base corpus

View File

@@ -6,4 +6,10 @@ export PGOPTIONS = -c pg_session_jwt.jwk={"crv":"Ed25519","kty":"OKP","x":"R_Abz
PG_CONFIG ?= pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
.PHONY installcheck:
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION $(EXTENSION)"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -12,6 +12,7 @@ ERROR: invalid JWT encoding
-- Test creating a session with an expired JWT
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjE3NDI1NjQ0MzIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MjQyNDIsInN1YiI6InVzZXIxMjMifQ.A6FwKuaSduHB9O7Gz37g0uoD_U9qVS0JNtT7YABGVgB7HUD1AMFc9DeyhNntWBqncg8k5brv-hrNTuUh5JYMAw');
ERROR: Token used after it has expired
DETAIL: exp=1742564432
-- Test creating a session with a valid JWT
SELECT auth.jwt_session_init('eyJhbGciOiJFZERTQSJ9.eyJleHAiOjQ4OTYxNjQyNTIsImlhdCI6MTc0MjU2NDI1MiwianRpIjo0MzQzNDMsInN1YiI6InVzZXIxMjMifQ.2TXVgjb6JSUq6_adlvp-m_SdOxZSyGS30RS9TLB0xu2N83dMSs2NybwE1NMU8Fb0tcAZR_ET7M2rSxbTrphfCg');
jwt_session_init

View File

@@ -5,4 +5,6 @@ REGRESS = pg_tiktoken
installcheck: regression-test
regression-test:
$(PG_REGRESS) --inputdir=. --outputdir=. --dbname=contrib_regression $(REGRESS)
dropdb --if-exists contrib_regression
createdb contrib_regression
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname "${0}")"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --dbname=contrib_regression 001_setup 002_uuid_generate_v7 003_uuid_v7_to_timestamptz 004_uuid_timestamptz_to_v7 005_uuid_v7_to_timestamp 006_uuid_timestamp_to_v7

View File

@@ -1,4 +1,6 @@
#!/bin/bash
set -ex
cd "$(dirname "${0}")"
pg_prove test.sql
dropdb --if-exists contrib_regression
createdb contrib_regression
pg_prove -d contrib_regression test.sql

View File

@@ -0,0 +1,8 @@
#!/bin/sh
set -ex
cd "$(dirname "${0}")"
dropdb --if-exist contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --load-extension=vector --load-extension=rag --dbname=contrib_regression basic_functions text_processing api_keys chunking_functions document_processing embedding_api_functions voyageai_functions

View File

@@ -0,0 +1,10 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
make installcheck || true
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
sed -i '/hastap/d' test/build/run.sch
sed -Ei 's/\b(aretap|enumtap|ownership|privs|usergroup)\b//g' test/build/run.sch
${PG_REGRESS} --use-existing --dbname=contrib_regression --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --max-connections=879 --schedule test/schedule/main.sch --schedule test/build/run.sch

View File

@@ -0,0 +1,8 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION vector"
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --inputdir=test --use-existing --dbname=contrib_regression bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type

View File

@@ -4,13 +4,21 @@ PGFILEDESC = "pgx_ulid - ULID type for PostgreSQL"
PG_CONFIG ?= pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
PG_REGRESS = $(dir $(PGXS))/../../src/test/regress/pg_regress
PG_MAJOR_VERSION := $(word 2, $(subst ., , $(shell $(PG_CONFIG) --version)))
ifeq ($(shell test $(PG_MAJOR_VERSION) -lt 17; echo $$?),0)
REGRESS_OPTS = --load-extension=ulid
REGRESS = 00_ulid_generation 01_ulid_conversions 03_ulid_errors
EXTNAME = ulid
else
REGRESS_OPTS = --load-extension=pgx_ulid
REGRESS = 00_ulid_generation 01_ulid_conversions 02_ulid_conversions 03_ulid_errors
EXTNAME = pgx_ulid
endif
include $(PGXS)
.PHONY: installcheck
installcheck: regression-test
regression-test:
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION $(EXTNAME)"
$(PG_REGRESS) --inputdir=. --outputdir=. --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -0,0 +1,12 @@
#!/bin/bash
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
REGRESS="$(make -n installcheck | awk '{print substr($0,index($0,"init-extension"));}')"
REGRESS="${REGRESS/startup_perms/}"
REGRESS="${REGRESS/startup /}"
REGRESS="${REGRESS/find_function_perms/}"
REGRESS="${REGRESS/guc/}"
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression ${REGRESS}

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression extension tables unit binary unicode prefix units time temperature functions language_functions round derived compare aggregate iec custom crosstab convert

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --use-existing --inputdir=./ --bindir='/usr/local/pgsql/bin' --dbname=contrib_regression create_extension prefix falcon explain queries

View File

@@ -3,8 +3,13 @@ MODULE_big = rag_bge_small_en_v15
OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))
REGRESS = basic_functions embedding_functions basic_functions_enhanced embedding_functions_enhanced
REGRESS_OPTS = --load-extension=vector --load-extension=rag_bge_small_en_v15
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
.PHONY installcheck:
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_bge_small_en_v15"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -3,8 +3,13 @@ MODULE_big = rag_jina_reranker_v1_tiny_en
OBJS = $(patsubst %.rs,%.o,$(wildcard src/*.rs))
REGRESS = reranking_functions reranking_functions_enhanced
REGRESS_OPTS = --load-extension=vector --load-extension=rag_jina_reranker_v1_tiny_en
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
PG_REGRESS := $(dir $(PGXS))../../src/test/regress/pg_regress
.PHONY installcheck:
installcheck:
dropdb --if-exists contrib_regression
createdb contrib_regression
psql -d contrib_regression -c "CREATE EXTENSION vector" -c "CREATE EXTENSION rag_jina_reranker_v1_tiny_en"
$(PG_REGRESS) --use-existing --dbname=contrib_regression $(REGRESS)

View File

@@ -1,25 +1,27 @@
-- Reranking function tests
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
rerank_distance
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
round
--------
0.8989
(1 row)
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
array
-----------------
0.8989152
{0.8989,1.3018}
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
rerank_distance
-----------------------
{0.8989152,1.3018152}
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
round
---------
-0.8989
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
rerank_score
--------------
-0.8989152
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
rerank_score
-------------------------
{-0.8989152,-1.3018152}
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);
array
-------------------
{-0.8989,-1.3018}
(1 row)

View File

@@ -1,41 +1,41 @@
-- Reranking function tests - single passage
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
rerank_distance
-----------------
0.8989152
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
round
--------
0.8989
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
rerank_distance
-----------------
1.3018152
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
round
--------
1.3018
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
rerank_distance
-----------------
1.3133051
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);
round
--------
1.3133
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
rerank_distance
-----------------
0.7075559
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);
round
--------
0.7076
(1 row)
-- Reranking function tests - array of passages
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
rerank_distance
-----------------------
{0.8989152,1.3018152}
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
array
-----------------
{0.8989,1.3018}
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
rerank_distance
------------------------------------
{0.16591403,0.33475375,0.10132827}
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
array
------------------------
{0.1659,0.3348,0.1013}
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);
@@ -45,43 +45,43 @@ SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]:
(1 row)
-- Reranking score function tests - single passage
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
rerank_score
--------------
-0.8989152
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
round
---------
-0.8989
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
rerank_score
--------------
-1.3018152
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
round
---------
-1.3018
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
rerank_score
--------------
-1.3133051
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);
round
---------
-1.3133
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
rerank_score
--------------
-0.7075559
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);
round
---------
-0.7076
(1 row)
-- Reranking score function tests - array of passages
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
rerank_score
-------------------------
{-0.8989152,-1.3018152}
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
array
-------------------
{-0.8989,-1.3018}
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
rerank_score
---------------------------------------
{-0.16591403,-0.33475375,-0.10132827}
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
array
---------------------------
{-0.1659,-0.3348,-0.1013}
(1 row)
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);

View File

@@ -1,8 +1,10 @@
-- Reranking function tests
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) as x);

View File

@@ -1,35 +1,35 @@
-- Reranking function tests - single passage
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('query about cats', 'information about felines')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_distance('', 'empty query test')::NUMERIC,4);
-- Reranking function tests - array of passages
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_distance('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
SELECT rag_jina_reranker_v1_tiny_en.rerank_distance('empty array test', ARRAY[]::text[]);
-- Reranking score function tests - single passage
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the baboon played with the balloon')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat', 'the tanks fired at the buildings')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('query about cats', 'information about felines')::NUMERIC,4);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test');
SELECT ROUND(rag_jina_reranker_v1_tiny_en.rerank_score('', 'empty query test')::NUMERIC,4);
-- Reranking score function tests - array of passages
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('the cat sat on the mat',
ARRAY['the baboon played with the balloon', 'the tanks fired at the buildings'])) AS x);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases']);
SELECT ARRAY(SELECT ROUND(x::NUMERIC,4) FROM unnest(rag_jina_reranker_v1_tiny_en.rerank_score('query about programming',
ARRAY['Python is a programming language', 'Java is also a programming language', 'SQL is used for databases'])) AS x);
SELECT rag_jina_reranker_v1_tiny_en.rerank_score('empty array test', ARRAY[]::text[]);

View File

@@ -0,0 +1,7 @@
#!/bin/sh
set -ex
cd "$(dirname ${0})"
dropdb --if-exist contrib_regression
createdb contrib_regression
PG_REGRESS=$(dirname "$(pg_config --pgxs)")/../test/regress/pg_regress
${PG_REGRESS} --inputdir=./ --bindir='/usr/local/pgsql/bin' --use-existing --dbname=contrib_regression rum rum_hash ruminv timestamp orderby orderby_hash altorder altorder_hash limits int2 int4 int8 float4 float8 money oid time timetz date interval macaddr inet cidr text varchar char bytea bit varbit numeric rum_weight expr array

View File

@@ -3,3 +3,5 @@ pg_distrib_dir='/usr/local/'
listen_pg_addr='0.0.0.0:6400'
listen_http_addr='0.0.0.0:9898'
remote_storage={ endpoint='http://minio:9000', bucket_name='neon', bucket_region='eu-north-1', prefix_in_bucket='/pageserver' }
control_plane_api='http://0.0.0.0:6666' # No storage controller in docker compose, specify a junk address
control_plane_emergency_mode=true

44
docker-compose/run-tests.sh Normal file → Executable file
View File

@@ -1,6 +1,42 @@
#!/bin/bash
set -x
if [[ -v BENCHMARK_CONNSTR ]]; then
uri_no_proto="${BENCHMARK_CONNSTR#postgres://}"
uri_no_proto="${uri_no_proto#postgresql://}"
if [[ $uri_no_proto == *\?* ]]; then
base="${uri_no_proto%%\?*}" # before '?'
else
base="$uri_no_proto"
fi
if [[ $base =~ ^([^:]+):([^@]+)@([^:/]+):?([0-9]*)/(.+)$ ]]; then
export PGUSER="${BASH_REMATCH[1]}"
export PGPASSWORD="${BASH_REMATCH[2]}"
export PGHOST="${BASH_REMATCH[3]}"
export PGPORT="${BASH_REMATCH[4]:-5432}"
export PGDATABASE="${BASH_REMATCH[5]}"
echo export PGUSER="${BASH_REMATCH[1]}"
echo export PGPASSWORD="${BASH_REMATCH[2]}"
echo export PGHOST="${BASH_REMATCH[3]}"
echo export PGPORT="${BASH_REMATCH[4]:-5432}"
echo export PGDATABASE="${BASH_REMATCH[5]}"
else
echo "Invalid PostgreSQL base URI"
exit 1
fi
fi
REGULAR_USER=false
while getopts r arg; do
case $arg in
r)
REGULAR_USER=true
shift $((OPTIND-1))
;;
*) :
;;
esac
done
extdir=${1}
cd "${extdir}" || exit 2
@@ -12,6 +48,11 @@ for d in ${LIST}; do
FAILED="${d} ${FAILED}"
break
fi
if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
"${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
continue
fi
if [ -f "${d}/neon-test.sh" ]; then
"${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
else
@@ -19,5 +60,8 @@ for d in ${LIST}; do
fi
done
[ -z "${FAILED}" ] && exit 0
for d in ${FAILED}; do
cat "$(find $d -name regression.diffs)"
done
echo "${FAILED}"
exit 1

View File

@@ -38,11 +38,6 @@ Currently, the following metrics are collected:
Amount of WAL produced , by a timeline, i.e. last_record_lsn
This is an absolute, per-timeline metric.
- `resident_size`
Size of all the layer files in the tenant's directory on disk on the pageserver.
This is an absolute, per-tenant metric.
- `remote_storage_size`
Size of the remote storage (S3) directory.

View File

@@ -343,7 +343,7 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
TimelineId::from_array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 7]);
const ENDPOINT_ID: &str = "ep-winter-frost-a662z3vg";
fn token() -> String {
let claims = endpoint_storage::Claims {
let claims = endpoint_storage::claims::EndpointStorageClaims {
tenant_id: TENANT_ID,
timeline_id: TIMELINE_ID,
endpoint_id: ENDPOINT_ID.into(),
@@ -489,16 +489,8 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH
}
fn delete_prefix_token(uri: &str) -> String {
use serde::Serialize;
let parts = uri.split("/").collect::<Vec<&str>>();
#[derive(Serialize)]
struct PrefixClaims {
tenant_id: TenantId,
timeline_id: Option<TimelineId>,
endpoint_id: Option<endpoint_storage::EndpointId>,
exp: u64,
}
let claims = PrefixClaims {
let claims = endpoint_storage::claims::DeletePrefixClaims {
tenant_id: parts.get(1).map(|c| c.parse().unwrap()).unwrap(),
timeline_id: parts.get(2).map(|c| c.parse().unwrap()),
endpoint_id: parts.get(3).map(ToString::to_string),

View File

@@ -0,0 +1,52 @@
use serde::{Deserialize, Serialize};
use std::fmt::Display;
use utils::id::{EndpointId, TenantId, TimelineId};
/// Claims to add, remove, or retrieve endpoint data. Used by compute_ctl
#[derive(Deserialize, Serialize, PartialEq)]
pub struct EndpointStorageClaims {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub endpoint_id: EndpointId,
pub exp: u64,
}
/// Claims to remove tenant, timeline, or endpoint data. Used by control plane
#[derive(Deserialize, Serialize, PartialEq)]
pub struct DeletePrefixClaims {
pub tenant_id: TenantId,
/// None when tenant is deleted (endpoint_id is also None in this case)
pub timeline_id: Option<TimelineId>,
/// None when timeline is deleted
pub endpoint_id: Option<EndpointId>,
pub exp: u64,
}
impl Display for EndpointStorageClaims {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"EndpointClaims(tenant_id={} timeline_id={} endpoint_id={} exp={})",
self.tenant_id, self.timeline_id, self.endpoint_id, self.exp
)
}
}
impl Display for DeletePrefixClaims {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"DeletePrefixClaims(tenant_id={} timeline_id={} endpoint_id={}, exp={})",
self.tenant_id,
self.timeline_id
.as_ref()
.map(ToString::to_string)
.unwrap_or("".to_string()),
self.endpoint_id
.as_ref()
.map(ToString::to_string)
.unwrap_or("".to_string()),
self.exp
)
}
}

View File

@@ -1,3 +1,5 @@
pub mod claims;
use crate::claims::{DeletePrefixClaims, EndpointStorageClaims};
use anyhow::Result;
use axum::extract::{FromRequestParts, Path};
use axum::response::{IntoResponse, Response};
@@ -13,7 +15,7 @@ use std::result::Result as StdResult;
use std::sync::Arc;
use tokio_util::sync::CancellationToken;
use tracing::{debug, error};
use utils::id::{TenantId, TimelineId};
use utils::id::{EndpointId, TenantId, TimelineId};
// simplified version of utils::auth::JwtAuth
pub struct JwtAuth {
@@ -79,26 +81,6 @@ pub struct Storage {
pub max_upload_file_limit: usize,
}
pub type EndpointId = String; // If needed, reuse small string from proxy/src/types.rc
#[derive(Deserialize, Serialize, PartialEq)]
pub struct Claims {
pub tenant_id: TenantId,
pub timeline_id: TimelineId,
pub endpoint_id: EndpointId,
pub exp: u64,
}
impl Display for Claims {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Claims(tenant_id {} timeline_id {} endpoint_id {} exp {})",
self.tenant_id, self.timeline_id, self.endpoint_id, self.exp
)
}
}
#[derive(Deserialize, Serialize)]
struct KeyRequest {
tenant_id: TenantId,
@@ -107,6 +89,13 @@ struct KeyRequest {
path: String,
}
#[derive(Deserialize, Serialize, PartialEq)]
struct PrefixKeyRequest {
tenant_id: TenantId,
timeline_id: Option<TimelineId>,
endpoint_id: Option<EndpointId>,
}
#[derive(Debug, PartialEq)]
pub struct S3Path {
pub path: RemotePath,
@@ -165,7 +154,7 @@ impl FromRequestParts<Arc<Storage>> for S3Path {
.extract::<TypedHeader<Authorization<Bearer>>>()
.await
.map_err(|e| bad_request(e, "invalid token"))?;
let claims: Claims = state
let claims: EndpointStorageClaims = state
.auth
.decode(bearer.token())
.map_err(|e| bad_request(e, "decoding token"))?;
@@ -178,7 +167,7 @@ impl FromRequestParts<Arc<Storage>> for S3Path {
path.endpoint_id.clone()
};
let route = Claims {
let route = EndpointStorageClaims {
tenant_id: path.tenant_id,
timeline_id: path.timeline_id,
endpoint_id,
@@ -193,38 +182,13 @@ impl FromRequestParts<Arc<Storage>> for S3Path {
}
}
#[derive(Deserialize, Serialize, PartialEq)]
pub struct PrefixKeyPath {
pub tenant_id: TenantId,
pub timeline_id: Option<TimelineId>,
pub endpoint_id: Option<EndpointId>,
}
impl Display for PrefixKeyPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"PrefixKeyPath(tenant_id {} timeline_id {} endpoint_id {})",
self.tenant_id,
self.timeline_id
.as_ref()
.map(ToString::to_string)
.unwrap_or("".to_string()),
self.endpoint_id
.as_ref()
.map(ToString::to_string)
.unwrap_or("".to_string())
)
}
}
#[derive(Debug, PartialEq)]
pub struct PrefixS3Path {
pub path: RemotePath,
}
impl From<&PrefixKeyPath> for PrefixS3Path {
fn from(path: &PrefixKeyPath) -> Self {
impl From<&DeletePrefixClaims> for PrefixS3Path {
fn from(path: &DeletePrefixClaims) -> Self {
let timeline_id = path
.timeline_id
.as_ref()
@@ -250,21 +214,27 @@ impl FromRequestParts<Arc<Storage>> for PrefixS3Path {
state: &Arc<Storage>,
) -> Result<Self, Self::Rejection> {
let Path(path) = parts
.extract::<Path<PrefixKeyPath>>()
.extract::<Path<PrefixKeyRequest>>()
.await
.map_err(|e| bad_request(e, "invalid route"))?;
let TypedHeader(Authorization(bearer)) = parts
.extract::<TypedHeader<Authorization<Bearer>>>()
.await
.map_err(|e| bad_request(e, "invalid token"))?;
let claims: PrefixKeyPath = state
let claims: DeletePrefixClaims = state
.auth
.decode(bearer.token())
.map_err(|e| bad_request(e, "invalid token"))?;
if path != claims {
return Err(unauthorized(path, claims));
let route = DeletePrefixClaims {
tenant_id: path.tenant_id,
timeline_id: path.timeline_id,
endpoint_id: path.endpoint_id,
exp: claims.exp,
};
if route != claims {
return Err(unauthorized(route, claims));
}
Ok((&path).into())
Ok((&route).into())
}
}
@@ -297,7 +267,7 @@ mod tests {
#[test]
fn s3_path() {
let auth = Claims {
let auth = EndpointStorageClaims {
tenant_id: TENANT_ID,
timeline_id: TIMELINE_ID,
endpoint_id: ENDPOINT_ID.into(),
@@ -327,10 +297,11 @@ mod tests {
#[test]
fn prefix_s3_path() {
let mut path = PrefixKeyPath {
let mut path = DeletePrefixClaims {
tenant_id: TENANT_ID,
timeline_id: None,
endpoint_id: None,
exp: 0,
};
let prefix_path = |s: String| RemotePath::from_string(&s).unwrap();
assert_eq!(

Some files were not shown because too many files have changed in this diff Show More