mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-07 17:42:55 +00:00
Merge pull request #1493 from quickwit-oss/remove_vec_impl
remove Column impl on Vec
This commit is contained in:
@@ -90,45 +90,32 @@ pub struct FastFieldStats {
|
||||
pub num_vals: u64,
|
||||
}
|
||||
|
||||
impl<'a> Column for &'a [u64] {
|
||||
struct VecColum<'a>(&'a [u64]);
|
||||
impl<'a> Column for VecColum<'a> {
|
||||
fn get_val(&self, position: u64) -> u64 {
|
||||
self[position as usize]
|
||||
self.0[position as usize]
|
||||
}
|
||||
|
||||
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
|
||||
Box::new((self as &[u64]).iter().cloned())
|
||||
Box::new(self.0.iter().cloned())
|
||||
}
|
||||
|
||||
fn min_value(&self) -> u64 {
|
||||
self.iter().min().unwrap_or(0)
|
||||
self.0.iter().min().cloned().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn max_value(&self) -> u64 {
|
||||
self.iter().max().unwrap_or(0)
|
||||
self.0.iter().max().cloned().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u64 {
|
||||
self.len() as u64
|
||||
self.0.len() as u64
|
||||
}
|
||||
}
|
||||
|
||||
impl Column for Vec<u64> {
|
||||
fn get_val(&self, position: u64) -> u64 {
|
||||
self[position as usize]
|
||||
}
|
||||
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
|
||||
Box::new((self as &[u64]).iter().cloned())
|
||||
}
|
||||
fn min_value(&self) -> u64 {
|
||||
self.iter().min().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn max_value(&self) -> u64 {
|
||||
self.iter().max().unwrap_or(0)
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u64 {
|
||||
self.len() as u64
|
||||
impl<'a> From<&'a [u64]> for VecColum<'a> {
|
||||
fn from(data: &'a [u64]) -> Self {
|
||||
Self(data)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,10 +132,10 @@ mod tests {
|
||||
data: &[u64],
|
||||
name: &str,
|
||||
) -> Option<(f32, f32)> {
|
||||
let estimation = Codec::estimate(&data)?;
|
||||
let estimation = Codec::estimate(&VecColum::from(data))?;
|
||||
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
Codec::serialize(&mut out, &data).unwrap();
|
||||
Codec::serialize(&mut out, &VecColum::from(data)).unwrap();
|
||||
|
||||
let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
|
||||
|
||||
@@ -234,6 +221,7 @@ mod tests {
|
||||
#[test]
|
||||
fn estimation_good_interpolation_case() {
|
||||
let data = (10..=20000_u64).collect::<Vec<_>>();
|
||||
let data: VecColum = data.as_slice().into();
|
||||
|
||||
let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
|
||||
assert_le!(linear_interpol_estimation, 0.01);
|
||||
@@ -247,8 +235,9 @@ mod tests {
|
||||
}
|
||||
#[test]
|
||||
fn estimation_test_bad_interpolation_case() {
|
||||
let data = vec![200, 10, 10, 10, 10, 1000, 20];
|
||||
let data: &[u64] = &[200, 10, 10, 10, 10, 1000, 20];
|
||||
|
||||
let data: VecColum = data.into();
|
||||
let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
|
||||
assert_le!(linear_interpol_estimation, 0.32);
|
||||
|
||||
@@ -259,6 +248,7 @@ mod tests {
|
||||
fn estimation_test_bad_interpolation_case_monotonically_increasing() {
|
||||
let mut data: Vec<u64> = (200..=20000_u64).collect();
|
||||
data.push(1_000_000);
|
||||
let data: VecColum = data.as_slice().into();
|
||||
|
||||
// in this case the linear interpolation can't in fact not be worse than bitpacking,
|
||||
// but the estimator adds some threshold, which leads to estimated worse behavior
|
||||
|
||||
@@ -3,9 +3,33 @@ extern crate prettytable;
|
||||
use fastfield_codecs::bitpacked::BitpackedCodec;
|
||||
use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec;
|
||||
use fastfield_codecs::linear::LinearCodec;
|
||||
use fastfield_codecs::{FastFieldCodec, FastFieldCodecType, FastFieldStats};
|
||||
use fastfield_codecs::{Column, FastFieldCodec, FastFieldCodecType, FastFieldStats};
|
||||
use prettytable::{Cell, Row, Table};
|
||||
|
||||
struct Data<'a>(&'a [u64]);
|
||||
|
||||
impl<'a> Column for Data<'a> {
|
||||
fn get_val(&self, position: u64) -> u64 {
|
||||
self.0[position as usize]
|
||||
}
|
||||
|
||||
fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = u64> + 'b> {
|
||||
Box::new(self.0.iter().cloned())
|
||||
}
|
||||
|
||||
fn min_value(&self) -> u64 {
|
||||
*self.0.iter().min().unwrap_or(&0)
|
||||
}
|
||||
|
||||
fn max_value(&self) -> u64 {
|
||||
*self.0.iter().max().unwrap_or(&0)
|
||||
}
|
||||
|
||||
fn num_vals(&self) -> u64 {
|
||||
self.0.len() as u64
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut table = Table::new();
|
||||
|
||||
@@ -86,10 +110,11 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
|
||||
pub fn serialize_with_codec<C: FastFieldCodec>(
|
||||
data: &[u64],
|
||||
) -> Option<(f32, f32, FastFieldCodecType)> {
|
||||
let data = Data(data);
|
||||
let estimation = C::estimate(&data)?;
|
||||
let mut out = Vec::new();
|
||||
C::serialize(&mut out, &data).unwrap();
|
||||
let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
|
||||
let actual_compression = out.len() as f32 / (data.num_vals() * 8) as f32;
|
||||
Some((estimation, actual_compression, C::CODEC_TYPE))
|
||||
}
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ impl TermOrdinalMapping {
|
||||
fn max_term_ord(&self) -> TermOrdinal {
|
||||
self.per_segment_new_term_ordinals
|
||||
.iter()
|
||||
.flat_map(|term_ordinals| term_ordinals.iter().max())
|
||||
.flat_map(|term_ordinals| term_ordinals.iter().max().cloned())
|
||||
.max()
|
||||
.unwrap_or_default()
|
||||
}
|
||||
@@ -784,7 +784,7 @@ impl IndexMerger {
|
||||
let new_doc_id: DocId =
|
||||
self.offsets
|
||||
.iter()
|
||||
.position(|offset| offset > pos)
|
||||
.position(|&offset| offset > pos)
|
||||
.expect("pos is out of bounds") as DocId
|
||||
- 1u32;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user