Compare commits

...

1 Commits

Author SHA1 Message Date
Pascal Seitz
6c53f9f37f Fix off by one in optional index
Fixes #2293

Fixes an off by one error in the metadata resize of the optional index
when loading the index.

Merge variables with the same meaning but different names
2024-01-09 15:09:49 +08:00
4 changed files with 36 additions and 9 deletions

View File

@@ -110,6 +110,9 @@ impl<T: PartialOrd + Copy + Debug + Send + Sync + 'static> Column<T> {
} }
/// Get the docids of values which are in the provided value range. /// Get the docids of values which are in the provided value range.
///
/// # Panic
/// Panics if a value in the selected_docid_range range is larger than the number of documents.
#[inline] #[inline]
pub fn get_docids_for_value_range( pub fn get_docids_for_value_range(
&self, &self,

View File

@@ -126,6 +126,8 @@ impl ColumnIndex {
} }
} }
/// # Panic
/// Panics if a value in the doc_id range is larger than the number of documents.
pub fn docid_range_to_rowids(&self, doc_id: Range<DocId>) -> Range<RowId> { pub fn docid_range_to_rowids(&self, doc_id: Range<DocId>) -> Range<RowId> {
match self { match self {
ColumnIndex::Empty { .. } => 0..0, ColumnIndex::Empty { .. } => 0..0,

View File

@@ -21,8 +21,6 @@ const DENSE_BLOCK_THRESHOLD: u32 =
const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1; const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;
const BLOCK_SIZE: RowId = 1 << 16;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
struct BlockMeta { struct BlockMeta {
non_null_rows_before_block: u32, non_null_rows_before_block: u32,
@@ -109,8 +107,8 @@ struct RowAddr {
#[inline(always)] #[inline(always)]
fn row_addr_from_row_id(row_id: RowId) -> RowAddr { fn row_addr_from_row_id(row_id: RowId) -> RowAddr {
RowAddr { RowAddr {
block_id: (row_id / BLOCK_SIZE) as u16, block_id: (row_id / ELEMENTS_PER_BLOCK) as u16,
in_block_row_id: (row_id % BLOCK_SIZE) as u16, in_block_row_id: (row_id % ELEMENTS_PER_BLOCK) as u16,
} }
} }
@@ -490,8 +488,9 @@ fn deserialize_optional_index_block_metadatas(
start_byte_offset += block_variant.num_bytes_in_block(); start_byte_offset += block_variant.num_bytes_in_block();
non_null_rows_before_block += num_non_null_rows; non_null_rows_before_block += num_non_null_rows;
} }
let last_block = row_addr_from_row_id(num_rows).block_id;
block_metas.resize( block_metas.resize(
((num_rows + BLOCK_SIZE - 1) / BLOCK_SIZE) as usize, last_block as usize + 1, // +1 since last block is an index
BlockMeta { BlockMeta {
non_null_rows_before_block, non_null_rows_before_block,
start_byte_offset, start_byte_offset,

View File

@@ -3,6 +3,29 @@ use proptest::strategy::Strategy;
use proptest::{prop_oneof, proptest}; use proptest::{prop_oneof, proptest};
use super::*; use super::*;
use crate::{ColumnarReader, ColumnarWriter, DynamicColumnHandle};
#[test]
fn test_optional_index_bug_2293() {
test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK - 1);
test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK);
test_optional_index_with_num_docs(ELEMENTS_PER_BLOCK + 1);
}
fn test_optional_index_with_num_docs(num_docs: u32) {
let mut dataframe_writer = ColumnarWriter::default();
dataframe_writer.record_numerical(100, "score", 80i64);
let mut buffer: Vec<u8> = Vec::new();
dataframe_writer
.serialize(num_docs, None, &mut buffer)
.unwrap();
let columnar = ColumnarReader::open(buffer).unwrap();
assert_eq!(columnar.num_columns(), 1);
let cols: Vec<DynamicColumnHandle> = columnar.read_columns("score").unwrap();
assert_eq!(cols.len(), 1);
let col = cols[0].open().unwrap();
col.column_index().docid_range_to_rowids(0..num_docs);
}
#[test] #[test]
fn test_dense_block_threshold() { fn test_dense_block_threshold() {
@@ -35,7 +58,7 @@ proptest! {
#[test] #[test]
fn test_with_random_sets_simple() { fn test_with_random_sets_simple() {
let vals = 10..BLOCK_SIZE * 2; let vals = 10..ELEMENTS_PER_BLOCK * 2;
let mut out: Vec<u8> = Vec::new(); let mut out: Vec<u8> = Vec::new();
serialize_optional_index(&vals, 100, &mut out).unwrap(); serialize_optional_index(&vals, 100, &mut out).unwrap();
let null_index = open_optional_index(OwnedBytes::new(out)).unwrap(); let null_index = open_optional_index(OwnedBytes::new(out)).unwrap();
@@ -171,7 +194,7 @@ fn test_optional_index_rank() {
test_optional_index_rank_aux(&[0u32, 1u32]); test_optional_index_rank_aux(&[0u32, 1u32]);
let mut block = Vec::new(); let mut block = Vec::new();
block.push(3u32); block.push(3u32);
block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1)); block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
test_optional_index_rank_aux(&block); test_optional_index_rank_aux(&block);
} }
@@ -185,8 +208,8 @@ fn test_optional_index_iter_empty_one() {
fn test_optional_index_iter_dense_block() { fn test_optional_index_iter_dense_block() {
let mut block = Vec::new(); let mut block = Vec::new();
block.push(3u32); block.push(3u32);
block.extend((0..BLOCK_SIZE).map(|i| i + BLOCK_SIZE + 1)); block.extend((0..ELEMENTS_PER_BLOCK).map(|i| i + ELEMENTS_PER_BLOCK + 1));
test_optional_index_iter_aux(&block, 3 * BLOCK_SIZE); test_optional_index_iter_aux(&block, 3 * ELEMENTS_PER_BLOCK);
} }
#[test] #[test]