From 64f08a1a5c451c2ee99ef37757432f930c13f4fb Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 16 Sep 2022 14:42:27 +0900 Subject: [PATCH] Hiding useless symbols and removing code. (#1522) --- fastfield_codecs/src/lib.rs | 21 ++++++----------- fastfield_codecs/src/main.rs | 12 +--------- src/fastfield/mod.rs | 2 +- src/fastfield/serializer/mod.rs | 2 +- src/fastfield/writer.rs | 22 ++++++++--------- src/indexer/merger.rs | 42 +++++++++++++++------------------ 6 files changed, 39 insertions(+), 62 deletions(-) diff --git a/fastfield_codecs/src/lib.rs b/fastfield_codecs/src/lib.rs index 9a11f986a..bf4a1ad34 100644 --- a/fastfield_codecs/src/lib.rs +++ b/fastfield_codecs/src/lib.rs @@ -17,7 +17,7 @@ use serialize::Header; mod bitpacked; mod blockwise_linear; -pub(crate) mod line; +mod line; mod linear; mod monotonic_mapping; @@ -25,12 +25,13 @@ mod column; mod gcd; mod serialize; -pub use self::bitpacked::BitpackedCodec; -pub use self::blockwise_linear::BlockwiseLinearCodec; +use self::bitpacked::BitpackedCodec; +use self::blockwise_linear::BlockwiseLinearCodec; pub use self::column::{monotonic_map_column, Column, VecColumn}; -pub use self::linear::LinearCodec; +use self::linear::LinearCodec; pub use self::monotonic_mapping::MonotonicallyMappableToU64; -pub use self::serialize::{estimate, serialize, serialize_and_load, NormalizedHeader}; +use self::serialize::NormalizedHeader; +pub use self::serialize::{estimate, serialize, serialize_and_load}; #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)] #[repr(u8)] @@ -100,7 +101,7 @@ fn open_specific_codec( /// The FastFieldSerializerEstimate trait is required on all variants /// of fast field compressions, to decide which one to choose. -pub trait FastFieldCodec: 'static { +trait FastFieldCodec: 'static { /// A codex needs to provide a unique name and id, which is /// used for debugging and de/serialization. const CODEC_TYPE: FastFieldCodecType; @@ -132,14 +133,6 @@ pub const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [ FastFieldCodecType::Linear, ]; -#[derive(Debug, Clone)] -/// Statistics are used in codec detection and stored in the fast field footer. -pub struct FastFieldStats { - pub min_value: u64, - pub max_value: u64, - pub num_vals: u64, -} - #[cfg(test)] mod tests { use proptest::prelude::*; diff --git a/fastfield_codecs/src/main.rs b/fastfield_codecs/src/main.rs index 91f18649f..082d2c4bc 100644 --- a/fastfield_codecs/src/main.rs +++ b/fastfield_codecs/src/main.rs @@ -1,6 +1,6 @@ #[macro_use] extern crate prettytable; -use fastfield_codecs::{Column, FastFieldCodecType, FastFieldStats, VecColumn}; +use fastfield_codecs::{Column, FastFieldCodecType, VecColumn}; use prettytable::{Cell, Row, Table}; fn main() { @@ -90,13 +90,3 @@ pub fn serialize_with_codec( let actual_compression = out.len() as f32 / (col.num_vals() * 8) as f32; Some((estimation, actual_compression, codec_type)) } - -pub fn stats_from_vec(data: &[u64]) -> FastFieldStats { - let min_value = data.iter().cloned().min().unwrap_or(0); - let max_value = data.iter().cloned().max().unwrap_or(0); - FastFieldStats { - min_value, - max_value, - num_vals: data.len() as u64, - } -} diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 1513f9937..936d8b8a0 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -30,7 +30,7 @@ pub(crate) use self::multivalued::MultivalueStartIndex; pub use self::multivalued::{MultiValuedFastFieldReader, MultiValuedFastFieldWriter}; pub use self::readers::FastFieldReaders; pub(crate) use self::readers::{type_and_cardinality, FastType}; -pub use self::serializer::{Column, CompositeFastFieldSerializer, FastFieldStats}; +pub use self::serializer::{Column, CompositeFastFieldSerializer}; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use crate::schema::{Cardinality, FieldType, Type, Value}; use crate::{DateTime, DocId}; diff --git a/src/fastfield/serializer/mod.rs b/src/fastfield/serializer/mod.rs index adf830d5e..6efe3e28e 100644 --- a/src/fastfield/serializer/mod.rs +++ b/src/fastfield/serializer/mod.rs @@ -1,6 +1,6 @@ use std::io::{self, Write}; -pub use fastfield_codecs::{Column, FastFieldStats}; +pub use fastfield_codecs::Column; use fastfield_codecs::{FastFieldCodecType, MonotonicallyMappableToU64, ALL_CODEC_TYPES}; use crate::directory::{CompositeWrite, WritePtr}; diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index d93772b7e..dd673fcd2 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -7,7 +7,6 @@ use fnv::FnvHashMap; use tantivy_bitpacker::BlockedBitpacker; use super::multivalued::MultiValuedFastFieldWriter; -use super::serializer::FastFieldStats; use super::FastFieldType; use crate::fastfield::{BytesFastFieldWriter, CompositeFastFieldSerializer}; use crate::indexer::doc_id_mapping::DocIdMapping; @@ -360,16 +359,12 @@ impl IntFastFieldWriter { (self.val_min, self.val_max) }; - let stats = FastFieldStats { - min_value: min, - max_value: max, - num_vals: self.val_count as u64, - }; - let fastfield_accessor = WriterFastFieldAccessProvider { doc_id_map, vals: &self.vals, - stats, + min_value: min, + max_value: max, + num_vals: self.val_count as u64, }; serializer.create_auto_detect_u64_fast_field(self.field, fastfield_accessor)?; @@ -382,8 +377,11 @@ impl IntFastFieldWriter { struct WriterFastFieldAccessProvider<'map, 'bitp> { doc_id_map: Option<&'map DocIdMapping>, vals: &'bitp BlockedBitpacker, - stats: FastFieldStats, + min_value: u64, + max_value: u64, + num_vals: u64, } + impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> { /// Return the value associated to the given doc. /// @@ -417,14 +415,14 @@ impl<'map, 'bitp> Column for WriterFastFieldAccessProvider<'map, 'bitp> { } fn min_value(&self) -> u64 { - self.stats.min_value + self.min_value } fn max_value(&self) -> u64 { - self.stats.max_value + self.max_value } fn num_vals(&self) -> u64 { - self.stats.num_vals + self.num_vals } } diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 827f421c6..7b25f3f34 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -11,8 +11,7 @@ use crate::core::{Segment, SegmentReader}; use crate::docset::{DocSet, TERMINATED}; use crate::error::DataCorruption; use crate::fastfield::{ - AliveBitSet, Column, CompositeFastFieldSerializer, FastFieldStats, MultiValueLength, - MultiValuedFastFieldReader, + AliveBitSet, Column, CompositeFastFieldSerializer, MultiValueLength, MultiValuedFastFieldReader, }; use crate::fieldnorm::{FieldNormReader, FieldNormReaders, FieldNormsSerializer, FieldNormsWriter}; use crate::indexer::doc_id_mapping::{expect_field_id_for_sort_field, SegmentDocIdMapping}; @@ -359,16 +358,13 @@ impl IndexMerger { }) .collect::>(); - let stats = FastFieldStats { - min_value, - max_value, - num_vals: doc_id_mapping.len() as u64, - }; #[derive(Clone)] struct SortedDocIdFieldAccessProvider<'a> { doc_id_mapping: &'a SegmentDocIdMapping, fast_field_readers: &'a Vec>>, - stats: FastFieldStats, + min_value: u64, + max_value: u64, + num_vals: u64, } impl<'a> Column for SortedDocIdFieldAccessProvider<'a> { fn get_val(&self, doc: u64) -> u64 { @@ -391,21 +387,23 @@ impl IndexMerger { ) } fn min_value(&self) -> u64 { - self.stats.min_value + self.min_value } fn max_value(&self) -> u64 { - self.stats.max_value + self.max_value } fn num_vals(&self) -> u64 { - self.stats.num_vals + self.num_vals } } let fastfield_accessor = SortedDocIdFieldAccessProvider { doc_id_mapping, fast_field_readers: &fast_field_readers, - stats, + min_value, + max_value, + num_vals: doc_id_mapping.len() as u64, }; fast_field_serializer.create_auto_detect_u64_fast_field(field, fastfield_accessor)?; @@ -705,17 +703,13 @@ impl IndexMerger { } // We can now initialize our serializer, and push it the different values - let stats = FastFieldStats { - max_value, - num_vals: num_vals as u64, - min_value, - }; - struct SortedDocIdMultiValueAccessProvider<'a> { doc_id_mapping: &'a SegmentDocIdMapping, fast_field_readers: &'a Vec>, offsets: Vec, - stats: FastFieldStats, + min_value: u64, + max_value: u64, + num_vals: u64, } impl<'a> Column for SortedDocIdMultiValueAccessProvider<'a> { fn get_val(&self, pos: u64) -> u64 { @@ -757,22 +751,24 @@ impl IndexMerger { ) } fn min_value(&self) -> u64 { - self.stats.min_value + self.min_value } fn max_value(&self) -> u64 { - self.stats.max_value + self.max_value } fn num_vals(&self) -> u64 { - self.stats.num_vals + self.num_vals } } let fastfield_accessor = SortedDocIdMultiValueAccessProvider { doc_id_mapping, fast_field_readers: &ff_readers, offsets, - stats, + min_value, + max_value, + num_vals: num_vals as u64, }; fast_field_serializer.create_auto_detect_u64_fast_field_with_idx( field,