mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-21 23:20:40 +00:00
Fix race condition leading to panic in remote storage sync thread.
The SyncQueue consisted of a tokio mpsc channel, and an atomic counter to keep track of how many items there are in the channel. Updating the atomic counter was racy, and sometimes the consumer would decrement the counter before the producer had incremented it, leading to integer wraparound to usize::MAX. Calling Vec::with_capacity(usize::MAX) leads to a panic. To fix, replace the channel with a VecDeque protected by a Mutex, and a condition variable for signaling. Now that the queue is now protected by standard blocking Mutex and Condvar, refactor the functions touching it to be sync, not async. A theoretical downside of this is that the calls to push items to the queue and the storage sync thread that drains the queue might now need to wait, if another thread is busy manipulating the queue. I believe that's OK; the lock isn't held for very long, and these operations are made in background threads, not in the hot GetPage@LSN path, so they're not very latency-sensitive. Fixes #1719. Also add a test case.
This commit is contained in:
@@ -119,7 +119,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn delete_timeline_negative() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("delete_timeline_negative")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(
|
||||
tempdir()?.path().to_path_buf(),
|
||||
@@ -152,7 +152,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn delete_timeline() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("delete_timeline")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let layer_files = ["a", "b", "c", "d"];
|
||||
|
||||
@@ -286,7 +286,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn download_timeline() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("download_timeline")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let layer_files = ["a", "b", "layer_to_skip", "layer_to_keep_locally"];
|
||||
@@ -385,7 +385,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn download_timeline_negatives() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("download_timeline_negatives")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
let storage = LocalFs::new(tempdir()?.path().to_owned(), harness.conf.workdir.clone())?;
|
||||
|
||||
|
||||
@@ -240,7 +240,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn regular_layer_upload() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("regular_layer_upload")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
|
||||
let layer_files = ["a", "b"];
|
||||
@@ -327,7 +327,7 @@ mod tests {
|
||||
#[tokio::test]
|
||||
async fn layer_upload_after_local_fs_update() -> anyhow::Result<()> {
|
||||
let harness = RepoHarness::create("layer_upload_after_local_fs_update")?;
|
||||
let (sync_queue, _) = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_queue = SyncQueue::new(NonZeroUsize::new(100).unwrap());
|
||||
let sync_id = ZTenantTimelineId::new(harness.tenant_id, TIMELINE_ID);
|
||||
|
||||
let layer_files = ["a1", "b1"];
|
||||
|
||||
Reference in New Issue
Block a user