mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
fix bug with new sstable index format (#1953)
This commit is contained in:
@@ -17,7 +17,7 @@ fn test_dataframe_writer_str() {
|
||||
assert_eq!(columnar.num_columns(), 1);
|
||||
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
|
||||
assert_eq!(cols.len(), 1);
|
||||
assert_eq!(cols[0].num_bytes(), 88);
|
||||
assert_eq!(cols[0].num_bytes(), 89);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -31,7 +31,7 @@ fn test_dataframe_writer_bytes() {
|
||||
assert_eq!(columnar.num_columns(), 1);
|
||||
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("my_string").unwrap();
|
||||
assert_eq!(cols.len(), 1);
|
||||
assert_eq!(cols[0].num_bytes(), 88);
|
||||
assert_eq!(cols[0].num_bytes(), 89);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -130,7 +130,7 @@ mod tests {
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
|
||||
assert_eq!(file.len(), 94);
|
||||
assert_eq!(file.len(), 95);
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let column = fast_field_readers
|
||||
.u64("field")
|
||||
@@ -180,7 +180,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 122);
|
||||
assert_eq!(file.len(), 123);
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let col = fast_field_readers
|
||||
.u64("field")
|
||||
@@ -213,7 +213,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 95);
|
||||
assert_eq!(file.len(), 96);
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let fast_field_reader = fast_field_readers
|
||||
.u64("field")
|
||||
@@ -245,7 +245,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 4490);
|
||||
assert_eq!(file.len(), 4491);
|
||||
{
|
||||
let fast_field_readers = FastFieldReaders::open(file, SCHEMA.clone()).unwrap();
|
||||
let col = fast_field_readers
|
||||
@@ -278,7 +278,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 266);
|
||||
assert_eq!(file.len(), 267);
|
||||
|
||||
{
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
@@ -772,7 +772,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 103);
|
||||
assert_eq!(file.len(), 104);
|
||||
let fast_field_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let bool_col = fast_field_readers.bool("field_bool").unwrap();
|
||||
assert_eq!(bool_col.first(0), Some(true));
|
||||
@@ -804,7 +804,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 115);
|
||||
assert_eq!(file.len(), 116);
|
||||
let readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let bool_col = readers.bool("field_bool").unwrap();
|
||||
for i in 0..25 {
|
||||
@@ -829,7 +829,7 @@ mod tests {
|
||||
write.terminate().unwrap();
|
||||
}
|
||||
let file = directory.open_read(path).unwrap();
|
||||
assert_eq!(file.len(), 105);
|
||||
assert_eq!(file.len(), 106);
|
||||
let fastfield_readers = FastFieldReaders::open(file, schema).unwrap();
|
||||
let col = fastfield_readers.bool("field_bool").unwrap();
|
||||
assert_eq!(col.first(0), None);
|
||||
|
||||
@@ -100,13 +100,14 @@ Note: there is no ambiguity between both representation as Add is always guarant
|
||||
|
||||
### IndexValue
|
||||
```
|
||||
+------------+-------+-------+-----+
|
||||
| EntryCount | Entry | Entry | ... |
|
||||
+------------+-------+-------+-----+
|
||||
|---( # of entries)---|
|
||||
+------------+----------+-------+-------+-----+
|
||||
| EntryCount | StartPos | Entry | Entry | ... |
|
||||
+------------+----------+-------+-------+-----+
|
||||
|---( # of entries)---|
|
||||
```
|
||||
|
||||
- EntryCount(VInt): number of entries
|
||||
- StartPos(VInt): the start pos of the first (data) block referenced by this (index) block
|
||||
- Entry (IndexEntry)
|
||||
|
||||
### Entry
|
||||
|
||||
@@ -397,8 +397,9 @@ mod test {
|
||||
// end of block
|
||||
0u8, 0u8, 0u8, 0u8, // no more blocks
|
||||
// index
|
||||
6u8, 0u8, 0u8, 0u8, // block len
|
||||
7u8, 0u8, 0u8, 0u8, // block len
|
||||
1, // num blocks
|
||||
0, // offset
|
||||
11, // len of 1st block
|
||||
0, // first ord of 1st block
|
||||
32, 17, 20, // keep 0 push 2 | 17 20
|
||||
|
||||
@@ -151,6 +151,7 @@ impl SSTableIndexBuilder {
|
||||
|
||||
sstable_writer.write_suffix(keep_len, &block.last_key_or_greater[keep_len..]);
|
||||
sstable_writer.write_value(&block.block_addr);
|
||||
sstable_writer.flush_block_if_required()?;
|
||||
|
||||
previous_key.clear();
|
||||
previous_key.extend_from_slice(&block.last_key_or_greater);
|
||||
@@ -184,7 +185,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_sstable_index() {
|
||||
let mut sstable_builder = SSTableIndexBuilder::default();
|
||||
sstable_builder.add_block(b"aaa", 0..20, 0u64);
|
||||
sstable_builder.add_block(b"aaa", 10..20, 0u64);
|
||||
sstable_builder.add_block(b"bbbbbbb", 20..30, 5u64);
|
||||
sstable_builder.add_block(b"ccc", 30..40, 10u64);
|
||||
sstable_builder.add_block(b"dddd", 40..50, 15u64);
|
||||
|
||||
@@ -21,7 +21,7 @@ impl ValueReader for IndexValueReader {
|
||||
let num_vals = deserialize_vint_u64(&mut data) as usize;
|
||||
self.vals.clear();
|
||||
let mut first_ordinal = 0u64;
|
||||
let mut prev_start = 0usize;
|
||||
let mut prev_start = deserialize_vint_u64(&mut data) as usize;
|
||||
for _ in 0..num_vals {
|
||||
let len = deserialize_vint_u64(&mut data);
|
||||
let delta_ordinal = deserialize_vint_u64(&mut data);
|
||||
@@ -53,6 +53,14 @@ impl ValueWriter for IndexValueWriter {
|
||||
fn serialize_block(&self, output: &mut Vec<u8>) {
|
||||
let mut prev_ord = 0u64;
|
||||
vint::serialize_into_vec(self.vals.len() as u64, output);
|
||||
|
||||
let start_pos = if let Some(block_addr) = self.vals.first() {
|
||||
block_addr.byte_range.start as u64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
vint::serialize_into_vec(start_pos, output);
|
||||
|
||||
// TODO use array_windows when it gets stabilized
|
||||
for elem in self.vals.windows(2) {
|
||||
let [current, next] = elem else {
|
||||
@@ -114,5 +122,11 @@ mod tests {
|
||||
first_ordinal: 10,
|
||||
},
|
||||
]);
|
||||
crate::value::tests::test_value_reader_writer::<_, IndexValueReader, IndexValueWriter>(&[
|
||||
BlockAddr {
|
||||
byte_range: 5..10,
|
||||
first_ordinal: 2,
|
||||
},
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user