Compare commits

..

13 Commits

Author SHA1 Message Date
Paul Masurel
08a723d936 Revert "add dense codec" 2022-12-12 12:00:54 +09:00
PSeitz
a05a0035f8 Merge pull request #1711 from quickwit-oss/sparse_dense_index
add dense codec
2022-12-09 08:48:43 +01:00
Pascal Seitz
976128a412 extend benchmarks 2022-12-09 15:21:25 +08:00
PSeitz
f27b3e312d Apply suggestions from code review
Co-authored-by: Paul Masurel <paul@quickwit.io>
2022-12-09 08:01:56 +01:00
PSeitz
56dea6f08d Apply suggestions from code review
Co-authored-by: Paul Masurel <paul@quickwit.io>
2022-12-09 08:01:02 +01:00
Pascal Seitz
789d29cf45 move code to DenseIndexBlock
improve benchmark
2022-12-09 14:18:26 +08:00
Paul Masurel
a36b50d825 benchmark fix and important optimisation 2022-12-08 18:55:20 +09:00
PSeitz
09f65e5467 Merge pull request #1707 from quickwit-oss/bump_version
bump version
2022-12-08 09:03:47 +01:00
Pascal Seitz
2c2f5c3877 add dense codec 2022-12-08 12:40:32 +08:00
Pascal Seitz
11b01e4141 chore: Release 2022-12-02 16:45:18 +08:00
Pascal Seitz
3e8852c606 revert tant version 2022-12-02 16:44:34 +08:00
Pascal Seitz
725f1ecb80 update cargo.toml 2022-12-02 16:43:17 +08:00
Pascal Seitz
afa27afe7d group workspace deps 2022-12-02 16:31:30 +08:00

View File

@@ -206,7 +206,6 @@ pub struct SegmentHistogramCollector {
field_type: Type,
interval: f64,
offset: f64,
min_doc_count: u64,
first_bucket_num: i64,
bounds: HistogramBounds,
}
@@ -216,30 +215,6 @@ impl SegmentHistogramCollector {
self,
agg_with_accessor: &BucketAggregationWithAccessor,
) -> crate::Result<IntermediateBucketResult> {
// Compute the number of buckets to validate against max num buckets
// Note: We use min_doc_count here, but it's only an lowerbound here, since were are on the
// intermediate level and after merging the number of documents of a bucket could exceed
// `min_doc_count`.
{
let cut_off_buckets_front = self
.buckets
.iter()
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
.count();
let cut_off_buckets_back = self.buckets[cut_off_buckets_front..]
.iter()
.rev()
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
.count();
let estimate_num_buckets =
self.buckets.len() - cut_off_buckets_front - cut_off_buckets_back;
agg_with_accessor
.bucket_count
.add_count(estimate_num_buckets as u32);
agg_with_accessor.bucket_count.validate_bucket_count()?;
}
let mut buckets = Vec::with_capacity(
self.buckets
.iter()
@@ -276,6 +251,11 @@ impl SegmentHistogramCollector {
);
};
agg_with_accessor
.bucket_count
.add_count(buckets.len() as u32);
agg_with_accessor.bucket_count.validate_bucket_count()?;
Ok(IntermediateBucketResult::Histogram { buckets })
}
@@ -328,7 +308,6 @@ impl SegmentHistogramCollector {
first_bucket_num,
bounds,
sub_aggregations,
min_doc_count: req.min_doc_count(),
})
}
@@ -1542,36 +1521,4 @@ mod tests {
Ok(())
}
#[test]
fn histogram_test_max_buckets_segments() -> crate::Result<()> {
let values = vec![0.0, 70000.0];
let index = get_test_index_from_values(true, &values)?;
let agg_req: Aggregations = vec![(
"my_interval".to_string(),
Aggregation::Bucket(BucketAggregation {
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
field: "score_f64".to_string(),
interval: 1.0,
..Default::default()
}),
sub_aggregation: Default::default(),
}),
)]
.into_iter()
.collect();
let res = exec_request(agg_req, &index);
assert_eq!(
res.unwrap_err().to_string(),
"An invalid argument was passed: 'Aborting aggregation because too many buckets were \
created'"
.to_string()
);
Ok(())
}
}