num_vals to FastFieldCodecReader

This commit is contained in:
Pascal Seitz
2022-08-24 17:18:43 +02:00
parent 3a9727aa91
commit 4a6f36937c
4 changed files with 23 additions and 2 deletions

View File

@@ -16,23 +16,26 @@ pub struct BitpackedReader {
bit_unpacker: BitUnpacker,
pub min_value_u64: u64,
pub max_value_u64: u64,
pub num_vals: u64,
}
impl FastFieldCodecReader for BitpackedReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: OwnedBytes) -> io::Result<Self> {
let footer_offset = bytes.len() - 16;
let footer_offset = bytes.len() - 24;
let (data, mut footer) = bytes.split(footer_offset);
let min_value = u64::deserialize(&mut footer)?;
let amplitude = u64::deserialize(&mut footer)?;
let num_vals = u64::deserialize(&mut footer)?;
let max_value = min_value + amplitude;
let num_bits = compute_num_bits(amplitude);
let bit_unpacker = BitUnpacker::new(num_bits);
Ok(BitpackedReader {
data,
bit_unpacker,
min_value_u64: min_value,
max_value_u64: max_value,
bit_unpacker,
num_vals,
})
}
#[inline]
@@ -47,11 +50,16 @@ impl FastFieldCodecReader for BitpackedReader {
fn max_value(&self) -> u64 {
self.max_value_u64
}
#[inline]
fn num_vals(&self) -> u64 {
self.num_vals
}
}
pub struct BitpackedSerializerLegacy<'a, W: 'a + Write> {
bit_packer: BitPacker,
write: &'a mut W,
min_value: u64,
num_vals: u64,
amplitude: u64,
num_bits: u8,
}
@@ -78,6 +86,7 @@ impl<'a, W: Write> BitpackedSerializerLegacy<'a, W> {
bit_packer,
write,
min_value,
num_vals: 0,
amplitude,
num_bits,
})
@@ -88,12 +97,14 @@ impl<'a, W: Write> BitpackedSerializerLegacy<'a, W> {
let val_to_write: u64 = val - self.min_value;
self.bit_packer
.write(val_to_write, self.num_bits, &mut self.write)?;
self.num_vals += 1;
Ok(())
}
pub fn close_field(mut self) -> io::Result<()> {
self.bit_packer.close(&mut self.write)?;
self.min_value.serialize(&mut self.write)?;
self.amplitude.serialize(&mut self.write)?;
self.num_vals.serialize(&mut self.write)?;
Ok(())
}
}

View File

@@ -182,6 +182,10 @@ impl FastFieldCodecReader for BlockwiseLinearReader {
fn max_value(&self) -> u64 {
self.footer.max_value
}
#[inline]
fn num_vals(&self) -> u64 {
self.footer.num_vals
}
}
/// Same as LinearSerializer, but working on chunks of CHUNK_SIZE elements.

View File

@@ -18,6 +18,7 @@ pub trait FastFieldCodecReader: Sized {
fn get_u64(&self, doc: u64) -> u64;
fn min_value(&self) -> u64;
fn max_value(&self) -> u64;
fn num_vals(&self) -> u64;
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
@@ -183,6 +184,7 @@ mod tests {
let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
let reader = R::open_from_bytes(OwnedBytes::new(out)).unwrap();
assert_eq!(reader.num_vals(), data.len() as u64);
for (doc, orig_val) in data.iter().enumerate() {
let val = reader.get_u64(doc as u64);
if val != *orig_val {

View File

@@ -89,6 +89,10 @@ impl FastFieldCodecReader for LinearReader {
fn max_value(&self) -> u64 {
self.footer.max_value
}
#[inline]
fn num_vals(&self) -> u64 {
self.footer.num_vals
}
}
/// Fastfield serializer, which tries to guess values by linear interpolation