From 494e92ca59744b8e721bb68d5cdffb5bd189867b Mon Sep 17 00:00:00 2001 From: PSeitz Date: Mon, 22 Aug 2022 01:52:12 -0700 Subject: [PATCH] fix issue in composite (#1456) The file offsets were recorded incorrectly in some cases, e.g. when the recording looked like this [(Field 1, Index 0, Offset 0), (Field 1, Index 1, Offset 14), (Field 0, Index 0, Offset 14)]. The last file is offset 14 to end of file for field 0. But the data was converted to a vec and sorted, which changes the last file to Field 1. --- src/directory/composite_file.rs | 78 +++++++++++++++++++++++++++------ 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/src/directory/composite_file.rs b/src/directory/composite_file.rs index 7743620e1..9fc31e649 100644 --- a/src/directory/composite_file.rs +++ b/src/directory/composite_file.rs @@ -38,7 +38,7 @@ impl BinarySerializable for FileAddr { /// A `CompositeWrite` is used to write a `CompositeFile`. pub struct CompositeWrite { write: CountingWriter, - offsets: HashMap, + offsets: Vec<(FileAddr, u64)>, } impl CompositeWrite { @@ -47,7 +47,7 @@ impl CompositeWrite { pub fn wrap(w: W) -> CompositeWrite { CompositeWrite { write: CountingWriter::wrap(w), - offsets: HashMap::new(), + offsets: Vec::new(), } } @@ -60,8 +60,8 @@ impl CompositeWrite { pub fn for_field_with_idx(&mut self, field: Field, idx: usize) -> &mut CountingWriter { let offset = self.write.written_bytes(); let file_addr = FileAddr::new(field, idx); - assert!(!self.offsets.contains_key(&file_addr)); - self.offsets.insert(file_addr, offset); + assert!(!self.offsets.iter().any(|el| el.0 == file_addr)); + self.offsets.push((file_addr, offset)); &mut self.write } @@ -73,16 +73,8 @@ impl CompositeWrite { let footer_offset = self.write.written_bytes(); VInt(self.offsets.len() as u64).serialize(&mut self.write)?; - let mut offset_fields: Vec<_> = self - .offsets - .iter() - .map(|(file_addr, offset)| (*offset, *file_addr)) - .collect(); - - offset_fields.sort(); - let mut prev_offset = 0; - for (offset, file_addr) in offset_fields { + for (file_addr, offset) in self.offsets { VInt((offset - prev_offset) as u64).serialize(&mut self.write)?; file_addr.serialize(&mut self.write)?; prev_offset = offset; @@ -106,6 +98,14 @@ pub struct CompositeFile { offsets_index: HashMap>, } +impl std::fmt::Debug for CompositeFile { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CompositeFile") + .field("offsets_index", &self.offsets_index) + .finish() + } +} + impl CompositeFile { /// Opens a composite file stored in a given /// `FileSlice`. @@ -233,4 +233,56 @@ mod test { } Ok(()) } + + #[test] + fn test_composite_file_bug() -> crate::Result<()> { + let path = Path::new("test_path"); + let directory = RamDirectory::create(); + { + let w = directory.open_write(path).unwrap(); + let mut composite_write = CompositeWrite::wrap(w); + let mut write = composite_write.for_field_with_idx(Field::from_field_id(1u32), 0); + VInt(32431123u64).serialize(&mut write)?; + write.flush()?; + let write = composite_write.for_field_with_idx(Field::from_field_id(1u32), 1); + write.flush()?; + + let mut write = composite_write.for_field_with_idx(Field::from_field_id(0u32), 0); + VInt(1_000_000).serialize(&mut write)?; + write.flush()?; + + composite_write.close()?; + } + { + let r = directory.open_read(path)?; + let composite_file = CompositeFile::open(&r)?; + { + let file = composite_file + .open_read_with_idx(Field::from_field_id(1u32), 0) + .unwrap() + .read_bytes()?; + let mut file0_buf = file.as_slice(); + let payload_0 = VInt::deserialize(&mut file0_buf)?.0; + assert_eq!(file0_buf.len(), 0); + assert_eq!(payload_0, 32431123u64); + } + { + let file = composite_file + .open_read_with_idx(Field::from_field_id(1u32), 1) + .unwrap() + .read_bytes()?; + let file = file.as_slice(); + assert_eq!(file.len(), 0); + } + { + let file = composite_file + .open_read_with_idx(Field::from_field_id(0u32), 0) + .unwrap() + .read_bytes()?; + let file = file.as_slice(); + assert_eq!(file.len(), 3); + } + } + Ok(()) + } }