fix bug with new sstable index format (#1953)

This commit is contained in:
trinity-1686a
2023-03-22 10:22:36 +01:00
committed by GitHub
parent 1a35f6573d
commit 482b4155e8
6 changed files with 34 additions and 17 deletions

View File

@@ -17,7 +17,7 @@ fn test_dataframe_writer_str() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 88);
assert_eq!(cols[0].num_bytes(), 89);
}
#[test]
@@ -31,7 +31,7 @@ fn test_dataframe_writer_bytes() {
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
assert_eq!(cols.len(), 1);
assert_eq!(cols[0].num_bytes(), 88);
assert_eq!(cols[0].num_bytes(), 89);
}
#[test]

View File

@@ -130,7 +130,7 @@ mod tests {
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 94);
assert_eq!(file.len(), 95);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let column = fast_field_readers
.u64("field")
@@ -180,7 +180,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 122);
assert_eq!(file.len(), 123);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
.u64("field")
@@ -213,7 +213,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 95);
assert_eq!(file.len(), 96);
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let fast_field_reader = fast_field_readers
.u64("field")
@@ -245,7 +245,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 4490);
assert_eq!(file.len(), 4491);
{
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
let col = fast_field_readers
@@ -278,7 +278,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 266);
assert_eq!(file.len(), 267);
{
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
@@ -772,7 +772,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 103);
assert_eq!(file.len(), 104);
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = fast_field_readers.bool("field_bool").unwrap();
assert_eq!(bool_col.first(0), Some(true));
@@ -804,7 +804,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 115);
assert_eq!(file.len(), 116);
let readers = FastFieldReaders::open(file, schema).unwrap();
let bool_col = readers.bool("field_bool").unwrap();
for i in 0..25 {
@@ -829,7 +829,7 @@ mod tests {
write.terminate().unwrap();
}
let file = directory.open_read(path).unwrap();
assert_eq!(file.len(), 105);
assert_eq!(file.len(), 106);
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
let col = fastfield_readers.bool("field_bool").unwrap();
assert_eq!(col.first(0), None);

View File

@@ -100,13 +100,14 @@ Note: there is no ambiguity between both representation as Add is always guarant
### IndexValue
```
+------------+-------+-------+-----+
| EntryCount | Entry | Entry | ... |
+------------+-------+-------+-----+
|---( # of entries)---|
+------------+----------+-------+-------+-----+
| EntryCount | StartPos | Entry | Entry | ... |
+------------+----------+-------+-------+-----+
|---( # of entries)---|
```
- EntryCount(VInt): number of entries
- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block
- Entry (IndexEntry)
### Entry

View File

@@ -397,8 +397,9 @@ mod test {
// end of block
0u8, 0u8, 0u8, 0u8, // no more blocks
// index
6u8, 0u8, 0u8, 0u8, // block len
7u8, 0u8, 0u8, 0u8, // block len
1, // num blocks
0, // offset
11, // len of 1st block
0, // first ord of 1st block
32, 17, 20, // keep 0 push 2 | 17 20

View File

@@ -151,6 +151,7 @@ impl SSTableIndexBuilder {
sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]);
sstable_writer.write_value(&block.block_addr);
sstable_writer.flush_block_if_required()?;
previous_key.clear();
previous_key.extend_from_slice(&block.last_key_or_greater);
@@ -184,7 +185,7 @@ mod tests {
#[test]
fn test_sstable_index() {
let mut sstable_builder = SSTableIndexBuilder::default();
sstable_builder.add_block(b"aaa", 0..20, 0u64);
sstable_builder.add_block(b"aaa", 10..20, 0u64);
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
sstable_builder.add_block(b"ccc", 30..40, 10u64);
sstable_builder.add_block(b"dddd", 40..50, 15u64);

View File

@@ -21,7 +21,7 @@ impl ValueReader for IndexValueReader {
let num_vals = deserialize_vint_u64(&mut data) as usize;
self.vals.clear();
let mut first_ordinal = 0u64;
let mut prev_start = 0usize;
let mut prev_start = deserialize_vint_u64(&mut data) as usize;
for _ in 0..num_vals {
let len = deserialize_vint_u64(&mut data);
let delta_ordinal = deserialize_vint_u64(&mut data);
@@ -53,6 +53,14 @@ impl ValueWriter for IndexValueWriter {
fn serialize_block(&self, output: &mut Vec<u8>) {
let mut prev_ord = 0u64;
vint::serialize_into_vec(self.vals.len() as u64, output);
let start_pos = if let Some(block_addr) = self.vals.first() {
block_addr.byte_range.start as u64
} else {
0
};
vint::serialize_into_vec(start_pos, output);
// TODO use array_windows when it gets stabilized
for elem in self.vals.windows(2) {
let [current, next] = elem else {
@@ -114,5 +122,11 @@ mod tests {
first_ordinal: 10,
},
]);
crate::value::tests::test_value_reader_writer::<_, IndexValueReader, IndexValueWriter>(&[
BlockAddr {
byte_range: 5..10,
first_ordinal: 2,
},
]);
}
}