mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-03 15:52:55 +00:00
Compare commits
2 Commits
optional_c
...
debug_time
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dbd3aed24a | ||
|
|
7707b8a6e1 |
@@ -17,7 +17,7 @@ rand = {version="0.8.3", optional= true}
|
|||||||
fastdivide = "0.4"
|
fastdivide = "0.4"
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
itertools = { version = "0.10.3" }
|
itertools = { version = "0.10.3" }
|
||||||
measure_time = { version="0.8.2", optional=true}
|
measure_time = { version="0.8.2" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
more-asserts = "0.3.0"
|
more-asserts = "0.3.0"
|
||||||
@@ -25,7 +25,7 @@ proptest = "1.0.0"
|
|||||||
rand = "0.8.3"
|
rand = "0.8.3"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
bin = ["prettytable-rs", "rand", "measure_time"]
|
bin = ["prettytable-rs", "rand"]
|
||||||
default = ["bin"]
|
default = ["bin"]
|
||||||
unstable = []
|
unstable = []
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,8 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use common::{BinarySerializable, VInt};
|
use common::{BinarySerializable, VInt};
|
||||||
use fastdivide::DividerU64;
|
use fastdivide::DividerU64;
|
||||||
use log::warn;
|
use log::{trace, warn};
|
||||||
|
use measure_time::trace_time;
|
||||||
use ownedbytes::OwnedBytes;
|
use ownedbytes::OwnedBytes;
|
||||||
|
|
||||||
use crate::bitpacked::BitpackedCodec;
|
use crate::bitpacked::BitpackedCodec;
|
||||||
@@ -183,6 +184,7 @@ fn detect_codec(
|
|||||||
) -> Option<FastFieldCodecType> {
|
) -> Option<FastFieldCodecType> {
|
||||||
let mut estimations = Vec::new();
|
let mut estimations = Vec::new();
|
||||||
for &codec in codecs {
|
for &codec in codecs {
|
||||||
|
trace_time!("estimate time for codec: {:?}", codec);
|
||||||
let estimation_opt = match codec {
|
let estimation_opt = match codec {
|
||||||
FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&column),
|
FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&column),
|
||||||
FastFieldCodecType::Linear => LinearCodec::estimate(&column),
|
FastFieldCodecType::Linear => LinearCodec::estimate(&column),
|
||||||
@@ -202,6 +204,7 @@ fn detect_codec(
|
|||||||
// codecs
|
// codecs
|
||||||
estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX);
|
estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX);
|
||||||
estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right));
|
estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right));
|
||||||
|
trace!("Chosen Codec {:?}", estimations.first()?.1);
|
||||||
Some(estimations.first()?.1)
|
Some(estimations.first()?.1)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -210,6 +213,12 @@ fn serialize_given_codec(
|
|||||||
codec_type: FastFieldCodecType,
|
codec_type: FastFieldCodecType,
|
||||||
output: &mut impl io::Write,
|
output: &mut impl io::Write,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
|
trace_time!(
|
||||||
|
"Serialize time for codec: {:?}, num_vals {}",
|
||||||
|
codec_type,
|
||||||
|
column.num_vals()
|
||||||
|
);
|
||||||
|
|
||||||
match codec_type {
|
match codec_type {
|
||||||
FastFieldCodecType::Bitpacked => {
|
FastFieldCodecType::Bitpacked => {
|
||||||
BitpackedCodec::serialize(&column, output)?;
|
BitpackedCodec::serialize(&column, output)?;
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ use std::sync::Mutex;
|
|||||||
|
|
||||||
use fastfield_codecs::{Column, MonotonicallyMappableToU64, VecColumn};
|
use fastfield_codecs::{Column, MonotonicallyMappableToU64, VecColumn};
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
|
use measure_time::{debug_time, trace_time};
|
||||||
|
|
||||||
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
|
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
|
||||||
use crate::indexer::doc_id_mapping::DocIdMapping;
|
use crate::indexer::doc_id_mapping::DocIdMapping;
|
||||||
@@ -146,6 +147,13 @@ impl MultiValuedFastFieldWriter {
|
|||||||
{
|
{
|
||||||
self.doc_index.push(self.vals.len() as u64);
|
self.doc_index.push(self.vals.len() as u64);
|
||||||
let col = VecColumn::from(&self.doc_index[..]);
|
let col = VecColumn::from(&self.doc_index[..]);
|
||||||
|
|
||||||
|
trace_time!(
|
||||||
|
"segment-serialize-multi-fast-field-idx, num_vals {}, field_id {:?}",
|
||||||
|
col.num_vals(),
|
||||||
|
self.field()
|
||||||
|
);
|
||||||
|
|
||||||
if let Some(doc_id_map) = doc_id_map {
|
if let Some(doc_id_map) = doc_id_map {
|
||||||
let multi_value_start_index = MultivalueStartIndex::new(&col, doc_id_map);
|
let multi_value_start_index = MultivalueStartIndex::new(&col, doc_id_map);
|
||||||
serializer.create_auto_detect_u64_fast_field_with_idx(
|
serializer.create_auto_detect_u64_fast_field_with_idx(
|
||||||
@@ -158,6 +166,12 @@ impl MultiValuedFastFieldWriter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
trace_time!(
|
||||||
|
"segment-serialize-multi-fast-field-values, num_vals {}, field_id {:?}",
|
||||||
|
self.vals.len(),
|
||||||
|
self.field()
|
||||||
|
);
|
||||||
|
|
||||||
// Writing the values themselves.
|
// Writing the values themselves.
|
||||||
// TODO FIXME: Use less memory.
|
// TODO FIXME: Use less memory.
|
||||||
let mut values: Vec<u64> = Vec::new();
|
let mut values: Vec<u64> = Vec::new();
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ use std::io;
|
|||||||
use common;
|
use common;
|
||||||
use fastfield_codecs::{Column, MonotonicallyMappableToU64};
|
use fastfield_codecs::{Column, MonotonicallyMappableToU64};
|
||||||
use fnv::FnvHashMap;
|
use fnv::FnvHashMap;
|
||||||
|
use measure_time::{debug_time, trace_time};
|
||||||
use tantivy_bitpacker::BlockedBitpacker;
|
use tantivy_bitpacker::BlockedBitpacker;
|
||||||
|
|
||||||
use super::multivalued::MultiValuedFastFieldWriter;
|
use super::multivalued::MultiValuedFastFieldWriter;
|
||||||
@@ -215,6 +216,7 @@ impl FastFieldsWriter {
|
|||||||
mapping: &HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>,
|
mapping: &HashMap<Field, FnvHashMap<UnorderedTermId, TermOrdinal>>,
|
||||||
doc_id_map: Option<&DocIdMapping>,
|
doc_id_map: Option<&DocIdMapping>,
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
|
debug_time!("segment-serialize-all-fast-fields",);
|
||||||
for field_writer in self.term_id_writers {
|
for field_writer in self.term_id_writers {
|
||||||
let field = field_writer.field();
|
let field = field_writer.field();
|
||||||
field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
|
field_writer.serialize(serializer, mapping.get(&field), doc_id_map)?;
|
||||||
@@ -367,6 +369,11 @@ impl IntFastFieldWriter {
|
|||||||
num_vals: self.val_count as u64,
|
num_vals: self.val_count as u64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
trace_time!(
|
||||||
|
"segment-serialize-single-value-field, field_id {:?}",
|
||||||
|
self.field()
|
||||||
|
);
|
||||||
|
|
||||||
serializer.create_auto_detect_u64_fast_field(self.field, fastfield_accessor)?;
|
serializer.create_auto_detect_u64_fast_field(self.field, fastfield_accessor)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use fastfield_codecs::VecColumn;
|
use fastfield_codecs::VecColumn;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use measure_time::debug_time;
|
use measure_time::{debug_time, trace_time};
|
||||||
|
|
||||||
use crate::core::{Segment, SegmentReader};
|
use crate::core::{Segment, SegmentReader};
|
||||||
use crate::docset::{DocSet, TERMINATED};
|
use crate::docset::{DocSet, TERMINATED};
|
||||||
@@ -250,7 +250,11 @@ impl IndexMerger {
|
|||||||
mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
|
mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
|
||||||
doc_id_mapping: &SegmentDocIdMapping,
|
doc_id_mapping: &SegmentDocIdMapping,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
debug_time!("write-fast-fields");
|
debug_time!(
|
||||||
|
"merge-all-fast-fields, num_segments {}, num docs new segment {}",
|
||||||
|
self.readers.len(),
|
||||||
|
doc_id_mapping.len()
|
||||||
|
);
|
||||||
|
|
||||||
for (field, field_entry) in self.schema.fields() {
|
for (field, field_entry) in self.schema.fields() {
|
||||||
let field_type = field_entry.field_type();
|
let field_type = field_entry.field_type();
|
||||||
@@ -311,6 +315,12 @@ impl IndexMerger {
|
|||||||
doc_id_mapping: &SegmentDocIdMapping,
|
doc_id_mapping: &SegmentDocIdMapping,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let fast_field_accessor = SortedDocIdColumn::new(&self.readers, doc_id_mapping, field);
|
let fast_field_accessor = SortedDocIdColumn::new(&self.readers, doc_id_mapping, field);
|
||||||
|
trace_time!(
|
||||||
|
"merge-single-fast-field, num_vals {}, num_segments {}, field_id {:?}",
|
||||||
|
fast_field_accessor.num_vals(),
|
||||||
|
self.readers.len(),
|
||||||
|
field
|
||||||
|
);
|
||||||
fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
|
fast_field_serializer.create_auto_detect_u64_fast_field(field, fast_field_accessor)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -458,6 +468,12 @@ impl IndexMerger {
|
|||||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||||
doc_id_mapping: &SegmentDocIdMapping,
|
doc_id_mapping: &SegmentDocIdMapping,
|
||||||
) -> crate::Result<Vec<u64>> {
|
) -> crate::Result<Vec<u64>> {
|
||||||
|
trace_time!(
|
||||||
|
"merge-multi-fast-field-idx, num_segments {}, field_id {:?}",
|
||||||
|
self.readers.len(),
|
||||||
|
field
|
||||||
|
);
|
||||||
|
|
||||||
let reader_ordinal_and_field_accessors = self
|
let reader_ordinal_and_field_accessors = self
|
||||||
.readers
|
.readers
|
||||||
.iter()
|
.iter()
|
||||||
@@ -488,7 +504,7 @@ impl IndexMerger {
|
|||||||
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
fast_field_serializer: &mut CompositeFastFieldSerializer,
|
||||||
doc_id_mapping: &SegmentDocIdMapping,
|
doc_id_mapping: &SegmentDocIdMapping,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
debug_time!("write-term-id-fast-field");
|
trace_time!("write-term-id-fast-field");
|
||||||
|
|
||||||
// Multifastfield consists of 2 fastfields.
|
// Multifastfield consists of 2 fastfields.
|
||||||
// The first serves as an index into the second one and is strictly increasing.
|
// The first serves as an index into the second one and is strictly increasing.
|
||||||
@@ -571,6 +587,13 @@ impl IndexMerger {
|
|||||||
|
|
||||||
let fastfield_accessor =
|
let fastfield_accessor =
|
||||||
SortedDocIdMultiValueColumn::new(&self.readers, doc_id_mapping, &offsets, field);
|
SortedDocIdMultiValueColumn::new(&self.readers, doc_id_mapping, &offsets, field);
|
||||||
|
trace_time!(
|
||||||
|
"merge-multi-fast-field-values, num_vals {}, num_segments {}, field_id {:?}",
|
||||||
|
fastfield_accessor.num_vals(),
|
||||||
|
self.readers.len(),
|
||||||
|
field
|
||||||
|
);
|
||||||
|
|
||||||
fast_field_serializer.create_auto_detect_u64_fast_field_with_idx(
|
fast_field_serializer.create_auto_detect_u64_fast_field_with_idx(
|
||||||
field,
|
field,
|
||||||
fastfield_accessor,
|
fastfield_accessor,
|
||||||
@@ -624,7 +647,7 @@ impl IndexMerger {
|
|||||||
fieldnorm_reader: Option<FieldNormReader>,
|
fieldnorm_reader: Option<FieldNormReader>,
|
||||||
doc_id_mapping: &SegmentDocIdMapping,
|
doc_id_mapping: &SegmentDocIdMapping,
|
||||||
) -> crate::Result<Option<TermOrdinalMapping>> {
|
) -> crate::Result<Option<TermOrdinalMapping>> {
|
||||||
debug_time!("write-postings-for-field");
|
debug_time!("write-postings-for-field {:?}", indexed_field);
|
||||||
let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
|
let mut positions_buffer: Vec<u32> = Vec::with_capacity(1_000);
|
||||||
let mut delta_computer = DeltaComputer::new();
|
let mut delta_computer = DeltaComputer::new();
|
||||||
|
|
||||||
@@ -827,7 +850,7 @@ impl IndexMerger {
|
|||||||
debug!("write-storable-field");
|
debug!("write-storable-field");
|
||||||
|
|
||||||
if !doc_id_mapping.is_trivial() {
|
if !doc_id_mapping.is_trivial() {
|
||||||
debug!("non-trivial-doc-id-mapping");
|
debug!("non-trivial-doc-id-mapping (index is sorted)");
|
||||||
|
|
||||||
let store_readers: Vec<_> = self
|
let store_readers: Vec<_> = self
|
||||||
.readers
|
.readers
|
||||||
@@ -855,7 +878,7 @@ impl IndexMerger {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
debug!("trivial-doc-id-mapping");
|
debug!("trivial-doc-id-mapping (index is not sorted)");
|
||||||
for reader in &self.readers {
|
for reader in &self.readers {
|
||||||
let store_reader = reader.get_store_reader(1)?;
|
let store_reader = reader.get_store_reader(1)?;
|
||||||
if reader.has_deletes()
|
if reader.has_deletes()
|
||||||
|
|||||||
Reference in New Issue
Block a user