Merge pull request #1493 from quickwit-oss/remove_vec_impl

remove Column impl on Vec
This commit is contained in:
PSeitz
2022-08-29 07:54:33 -07:00
committed by GitHub
3 changed files with 45 additions and 30 deletions

View File

@@ -90,45 +90,32 @@ pub struct FastFieldStats {
pub num_vals: u64,
}
impl<'a> Column for &'a [u64] {
struct VecColum<'a>(&'a [u64]);
impl<'a> Column for VecColum<'a> {
fn get_val(&self, position: u64) -> u64 {
self[position as usize]
self.0[position as usize]
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new((self as &[u64]).iter().cloned())
Box::new(self.0.iter().cloned())
}
fn min_value(&self) -> u64 {
self.iter().min().unwrap_or(0)
self.0.iter().min().cloned().unwrap_or(0)
}
fn max_value(&self) -> u64 {
self.iter().max().unwrap_or(0)
self.0.iter().max().cloned().unwrap_or(0)
}
fn num_vals(&self) -> u64 {
self.len() as u64
self.0.len() as u64
}
}
impl Column for Vec<u64> {
fn get_val(&self, position: u64) -> u64 {
self[position as usize]
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new((self as &[u64]).iter().cloned())
}
fn min_value(&self) -> u64 {
self.iter().min().unwrap_or(0)
}
fn max_value(&self) -> u64 {
self.iter().max().unwrap_or(0)
}
fn num_vals(&self) -> u64 {
self.len() as u64
impl<'a> From<&'a [u64]> for VecColum<'a> {
fn from(data: &'a [u64]) -> Self {
Self(data)
}
}
@@ -145,10 +132,10 @@ mod tests {
data: &[u64],
name: &str,
) -> Option<(f32, f32)> {
let estimation = Codec::estimate(&data)?;
let estimation = Codec::estimate(&VecColum::from(data))?;
let mut out: Vec<u8> = Vec::new();
Codec::serialize(&mut out, &data).unwrap();
Codec::serialize(&mut out, &VecColum::from(data)).unwrap();
let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
@@ -234,6 +221,7 @@ mod tests {
#[test]
fn estimation_good_interpolation_case() {
let data = (10..=20000_u64).collect::<Vec<_>>();
let data: VecColum = data.as_slice().into();
let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
assert_le!(linear_interpol_estimation, 0.01);
@@ -247,8 +235,9 @@ mod tests {
}
#[test]
fn estimation_test_bad_interpolation_case() {
let data = vec![200, 10, 10, 10, 10, 1000, 20];
let data: &[u64] = &[200, 10, 10, 10, 10, 1000, 20];
let data: VecColum = data.into();
let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
assert_le!(linear_interpol_estimation, 0.32);
@@ -259,6 +248,7 @@ mod tests {
fn estimation_test_bad_interpolation_case_monotonically_increasing() {
let mut data: Vec<u64> = (200..=20000_u64).collect();
data.push(1_000_000);
let data: VecColum = data.as_slice().into();
// in this case the linear interpolation can't in fact not be worse than bitpacking,
// but the estimator adds some threshold, which leads to estimated worse behavior

View File

@@ -3,9 +3,33 @@ extern crate prettytable;
use fastfield_codecs::bitpacked::BitpackedCodec;
use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec;
use fastfield_codecs::linear::LinearCodec;
use fastfield_codecs::{FastFieldCodec, FastFieldCodecType, FastFieldStats};
use fastfield_codecs::{Column, FastFieldCodec, FastFieldCodecType, FastFieldStats};
use prettytable::{Cell, Row, Table};
struct Data<'a>(&'a [u64]);
impl<'a> Column for Data<'a> {
fn get_val(&self, position: u64) -> u64 {
self.0[position as usize]
}
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
Box::new(self.0.iter().cloned())
}
fn min_value(&self) -> u64 {
*self.0.iter().min().unwrap_or(&0)
}
fn max_value(&self) -> u64 {
*self.0.iter().max().unwrap_or(&0)
}
fn num_vals(&self) -> u64 {
self.0.len() as u64
}
}
fn main() {
let mut table = Table::new();
@@ -86,10 +110,11 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
pub fn serialize_with_codec<C: FastFieldCodec>(
data: &[u64],
) -> Option<(f32, f32, FastFieldCodecType)> {
let data = Data(data);
let estimation = C::estimate(&data)?;
let mut out = Vec::new();
C::serialize(&mut out, &data).unwrap();
let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
let actual_compression = out.len() as f32 / (data.num_vals() * 8) as f32;
Some((estimation, actual_compression, C::CODEC_TYPE))
}

View File

@@ -133,7 +133,7 @@ impl TermOrdinalMapping {
fn max_term_ord(&self) -> TermOrdinal {
self.per_segment_new_term_ordinals
.iter()
.flat_map(|term_ordinals| term_ordinals.iter().max())
.flat_map(|term_ordinals| term_ordinals.iter().max().cloned())
.max()
.unwrap_or_default()
}
@@ -784,7 +784,7 @@ impl IndexMerger {
let new_doc_id: DocId =
self.offsets
.iter()
.position(|offset| offset > pos)
.position(|&offset| offset > pos)
.expect("pos is out of bounds") as DocId
- 1u32;