diff --git a/src/core/segment.rs b/src/core/segment.rs index 6d6d07db5..16cb214d2 100644 --- a/src/core/segment.rs +++ b/src/core/segment.rs @@ -125,11 +125,11 @@ mod tests { { let _file_protection = segment.protect_from_delete(SegmentComponent::POSTINGS); assert!(directory.exists(&*path)); - directory.garbage_collect(living_files.clone()); + directory.garbage_collect(|| { living_files.clone() }); assert!(directory.exists(&*path)); } - directory.garbage_collect(living_files); + directory.garbage_collect(|| { living_files }); assert!(!directory.exists(&*path)); } diff --git a/src/directory/managed_directory.rs b/src/directory/managed_directory.rs index 51e6fd61b..5f4e7e773 100644 --- a/src/directory/managed_directory.rs +++ b/src/directory/managed_directory.rs @@ -116,7 +116,7 @@ impl ManagedDirectory { /// If a file cannot be deleted (for permission reasons for instance) /// an error is simply logged, and the file remains in the list of managed /// files. - pub fn garbage_collect(&mut self, living_files: HashSet) { + pub fn garbage_collect HashSet >(&mut self, get_living_files: L) { info!("Garbage collect"); let mut files_to_delete = vec![]; { @@ -125,6 +125,17 @@ impl ManagedDirectory { self.meta_informations .read() .expect("Managed directory rlock poisoned in garbage collect."); + + // It is crucial to get the living files after acquiring the + // read lock of meta informations. That way, we + // avoid the following scenario. + // + // 1) we get the list of living files. + // 2) someone creates a new file. + // 3) we start garbage collection and remove this file + // even though it is a living file. + let living_files = get_living_files(); + for managed_path in &meta_informations_rlock.managed_paths { if !living_files.contains(managed_path) { files_to_delete.push(managed_path.clone()); @@ -316,7 +327,7 @@ mod tests { { let living_files: HashSet = [TEST_PATH1.to_owned()].into_iter().cloned().collect(); - managed_directory.garbage_collect(living_files); + managed_directory.garbage_collect(|| { living_files }); } { assert!(managed_directory.exists(*TEST_PATH1)); @@ -332,7 +343,7 @@ mod tests { } { let living_files: HashSet = HashSet::new(); - managed_directory.garbage_collect(living_files); + managed_directory.garbage_collect(|| { living_files }); } { assert!(!managed_directory.exists(*TEST_PATH1)); @@ -355,7 +366,7 @@ mod tests { assert!(managed_directory.exists(*TEST_PATH1)); let _mmap_read = managed_directory.open_read(*TEST_PATH1).unwrap(); - managed_directory.garbage_collect(living_files.clone()); + managed_directory.garbage_collect(|| { living_files.clone() }); if cfg!(target_os = "windows") { // On Windows, gc should try and fail the file as it is mmapped. assert!(managed_directory.exists(*TEST_PATH1)); @@ -363,7 +374,7 @@ mod tests { drop(_mmap_read); // The file should still be in the list of managed file and // eventually be deleted once mmap is released. - managed_directory.garbage_collect(living_files); + managed_directory.garbage_collect(|| { living_files }); assert!(!managed_directory.exists(*TEST_PATH1)); } else { assert!(!managed_directory.exists(*TEST_PATH1)); @@ -387,11 +398,11 @@ mod tests { { let _file_protection = managed_directory.protect_file_from_delete(*TEST_PATH1); - managed_directory.garbage_collect(living_files.clone()); + managed_directory.garbage_collect(|| { living_files.clone() }); assert!(managed_directory.exists(*TEST_PATH1)); } - managed_directory.garbage_collect(living_files.clone()); + managed_directory.garbage_collect(|| { living_files.clone() }); assert!(!managed_directory.exists(*TEST_PATH1)); diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 76bca46ec..feeb33d03 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -266,9 +266,10 @@ impl SegmentUpdater { fn garbage_collect_files_exec(&self) { info!("Running garbage collection"); - let living_files = self.0.segment_manager.list_files(); let mut index = self.0.index.clone(); - index.directory_mut().garbage_collect(living_files); + index.directory_mut().garbage_collect(|| { + self.0.segment_manager.list_files() + }); } pub fn commit(&self, opstamp: u64) -> Result<()> {