diff --git a/fastfield_codecs/Cargo.toml b/fastfield_codecs/Cargo.toml index a53a8ceeb..85106f2c5 100644 --- a/fastfield_codecs/Cargo.toml +++ b/fastfield_codecs/Cargo.toml @@ -17,7 +17,7 @@ rand = {version="0.8.3", optional= true} fastdivide = "0.4" log = "0.4" itertools = { version = "0.10.3" } -measure_time = { version="0.8.2", optional=true} +measure_time = { version="0.8.2" } [dev-dependencies] more-asserts = "0.3.0" @@ -25,7 +25,7 @@ proptest = "1.0.0" rand = "0.8.3" [features] -bin = ["prettytable-rs", "rand", "measure_time"] +bin = ["prettytable-rs", "rand"] default = ["bin"] unstable = [] diff --git a/fastfield_codecs/src/serialize.rs b/fastfield_codecs/src/serialize.rs index 92f55f5d0..0e9ea0a15 100644 --- a/fastfield_codecs/src/serialize.rs +++ b/fastfield_codecs/src/serialize.rs @@ -23,7 +23,8 @@ use std::sync::Arc; use common::{BinarySerializable, VInt}; use fastdivide::DividerU64; -use log::warn; +use log::{debug, warn}; +use measure_time::debug_time; use ownedbytes::OwnedBytes; use crate::bitpacked::BitpackedCodec; @@ -183,6 +184,7 @@ fn detect_codec( ) -> Option { let mut estimations = Vec::new(); for &codec in codecs { + debug_time!("estimate time for codec: {:?}", codec); let estimation_opt = match codec { FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&column), FastFieldCodecType::Linear => LinearCodec::estimate(&column), @@ -202,6 +204,7 @@ fn detect_codec( // codecs estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX); estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right)); + debug!("Chosen Codec {:?}", estimations.first()?.1); Some(estimations.first()?.1) } @@ -210,6 +213,12 @@ fn serialize_given_codec( codec_type: FastFieldCodecType, output: &mut impl io::Write, ) -> io::Result<()> { + debug_time!( + "Serialize time for codec: {:?}, num_vals {}", + codec_type, + column.num_vals() + ); + match codec_type { FastFieldCodecType::Bitpacked => { BitpackedCodec::serialize(&column, output)?; diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs index 212b49616..b9bd0d403 100644 --- a/src/fastfield/multivalued/writer.rs +++ b/src/fastfield/multivalued/writer.rs @@ -3,6 +3,7 @@ use std::sync::Mutex; use fastfield_codecs::{Column, MonotonicallyMappableToU64, VecColumn}; use fnv::FnvHashMap; +use measure_time::debug_time; use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType}; use crate::indexer::doc_id_mapping::DocIdMapping; @@ -146,6 +147,13 @@ impl MultiValuedFastFieldWriter { { self.doc_index.push(self.vals.len() as u64); let col = VecColumn::from(&self.doc_index[..]); + + debug_time!( + "segment-write-multi-fast-field-idx, num_vals {}, field_id {:?}", + col.num_vals(), + self.field() + ); + if let Some(doc_id_map) = doc_id_map { let multi_value_start_index = MultivalueStartIndex::new(&col, doc_id_map); serializer.create_auto_detect_u64_fast_field_with_idx( @@ -158,6 +166,12 @@ impl MultiValuedFastFieldWriter { } } { + debug_time!( + "segment-write-multi-fast-field-values, num_vals {}, field_id {:?}", + self.vals.len(), + self.field() + ); + // Writing the values themselves. // TODO FIXME: Use less memory. let mut values: Vec = Vec::new(); diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a7d70635b..22a814ed2 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -4,6 +4,7 @@ use std::io; use common; use fastfield_codecs::{Column, MonotonicallyMappableToU64}; use fnv::FnvHashMap; +use measure_time::debug_time; use tantivy_bitpacker::BlockedBitpacker; use super::multivalued::MultiValuedFastFieldWriter; @@ -367,6 +368,8 @@ impl IntFastFieldWriter { num_vals: self.val_count as u64, }; + debug_time!("segment-write-single-field, field_id {:?}", self.field()); + serializer.create_auto_detect_u64_fast_field(self.field, fastfield_accessor)?; Ok(()) diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 210584e8c..97a01b347 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -250,7 +250,11 @@ impl IndexMerger { mut term_ord_mappings: HashMap, doc_id_mapping: &SegmentDocIdMapping, ) -> crate::Result<()> { - debug_time!("write-fast-fields"); + debug_time!( + "merge-all-fast-fields, num_segments {}, num docs new segment {}", + self.readers.len(), + doc_id_mapping.len() + ); for (field, field_entry) in self.schema.fields() { let field_type = field_entry.field_type(); @@ -311,6 +315,12 @@ impl IndexMerger { doc_id_mapping: &SegmentDocIdMapping, ) -> crate::Result<()> { let fast_field_accessor = SortedDocIdColumn::new(&self.readers, doc_id_mapping, field); + debug_time!( + "merge-single-fast-field, num_vals {}, num_segments {}, field_id {:?}", + fast_field_accessor.num_vals(), + self.readers.len(), + field + ); fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?; Ok(()) @@ -458,6 +468,12 @@ impl IndexMerger { fast_field_serializer: &mut CompositeFastFieldSerializer, doc_id_mapping: &SegmentDocIdMapping, ) -> crate::Result> { + debug_time!( + "merge-multi-fast-field-idx, num_segments {}, field_id {:?}", + self.readers.len(), + field + ); + let reader_ordinal_and_field_accessors = self .readers .iter() @@ -571,6 +587,13 @@ impl IndexMerger { let fastfield_accessor = SortedDocIdMultiValueColumn::new(&self.readers, doc_id_mapping, &offsets, field); + debug_time!( + "merge-multi-fast-field-values, num_vals {}, num_segments {}, field_id {:?}", + fastfield_accessor.num_vals(), + self.readers.len(), + field + ); + fast_field_serializer.create_auto_detect_u64_fast_field_with_idx( field, fastfield_accessor,