Compare commits

...

3 Commits

Author SHA1 Message Date
Paul Masurel
68a9066d13 Fix format (#2852)
Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
2026-03-16 10:43:39 +01:00
Paul Masurel
d02559a4d1 Update time deps to defensively address a vulnerability. (#2850)
Closes #2849

Co-authored-by: Paul Masurel <paul.masurel@datadoghq.com>
2026-03-12 16:47:11 +01:00
Anas Limem
1922abaf33 Fixed integer overflow in segment sorting and merge policy truncation (#2846) 2026-03-12 16:44:38 +01:00
5 changed files with 43 additions and 8 deletions

View File

@@ -47,7 +47,7 @@ rustc-hash = "2.0.0"
thiserror = "2.0.1"
htmlescape = "0.3.1"
fail = { version = "0.5.0", optional = true }
time = { version = "0.3.35", features = ["serde-well-known"] }
time = { version = "0.3.47", features = ["serde-well-known"] }
smallvec = "1.8.0"
rayon = "1.5.2"
lru = "0.16.3"
@@ -86,7 +86,7 @@ futures = "0.3.21"
paste = "1.0.11"
more-asserts = "0.3.1"
rand_distr = "0.5"
time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
time = { version = "0.3.47", features = ["serde-well-known", "macros"] }
postcard = { version = "1.0.4", features = [
"use-std",
], default-features = false }
@@ -201,4 +201,3 @@ harness = false
[[bench]]
name = "regex_all_terms"
harness = false

View File

@@ -15,11 +15,10 @@ repository = "https://github.com/quickwit-oss/tantivy"
byteorder = "1.4.3"
ownedbytes = { version= "0.9", path="../ownedbytes" }
async-trait = "0.1"
time = { version = "0.3.10", features = ["serde-well-known"] }
time = { version = "0.3.47", features = ["serde-well-known"] }
serde = { version = "1.0.136", features = ["derive"] }
[dev-dependencies]
binggan = "0.14.0"
proptest = "1.0.0"
rand = "0.9"

View File

@@ -94,7 +94,7 @@ impl MergePolicy for LogMergePolicy {
fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
let size_sorted_segments = segments
.iter()
.filter(|seg| seg.num_docs() <= (self.max_docs_before_merge as u32))
.filter(|seg| (seg.num_docs() as usize) <= self.max_docs_before_merge)
.sorted_by_key(|seg| std::cmp::Reverse(seg.max_doc()))
.collect::<Vec<&SegmentMeta>>();
@@ -372,4 +372,21 @@ mod tests {
assert_eq!(merge_candidates[0].0.len(), 1);
assert_eq!(merge_candidates[0].0[0], test_input[1].id());
}
#[test]
fn test_max_docs_before_merge_large_value() {
// Regression test: (max_docs_before_merge as u32) truncates values > u32::MAX.
// Casting num_docs() to usize instead avoids the truncation.
let mut policy = LogMergePolicy::default();
policy.set_min_num_segments(2);
policy.set_max_docs_before_merge(5_000_000_000usize);
let test_input = vec![
create_random_segment_meta(100_000),
create_random_segment_meta(100_000),
];
let result = policy.compute_merge_candidates(&test_input);
// Both segments should be eligible (100_000 < 5_000_000_000)
assert_eq!(result.len(), 1);
assert_eq!(result[0].0.len(), 2);
}
}

View File

@@ -403,7 +403,8 @@ impl SegmentUpdater {
// from the different drives.
//
// Segment 1 from disk 1, Segment 1 from disk 2, etc.
committed_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
committed_segment_metas
.sort_by_key(|segment_meta| std::cmp::Reverse(segment_meta.max_doc()));
let index_meta = IndexMeta {
index_settings: index.settings().clone(),
segments: committed_segment_metas,
@@ -705,6 +706,7 @@ mod tests {
use crate::collector::TopDocs;
use crate::directory::RamDirectory;
use crate::fastfield::AliveBitSet;
use crate::index::{SegmentId, SegmentMetaInventory};
use crate::indexer::merge_policy::tests::MergeWheneverPossible;
use crate::indexer::merger::IndexMerger;
use crate::indexer::segment_updater::merge_filtered_segments;
@@ -712,6 +714,22 @@ mod tests {
use crate::schema::*;
use crate::{Directory, DocAddress, Index, Segment};
#[test]
fn test_segment_sort_large_max_doc() {
// Regression test: -(max_doc as i32) overflows for max_doc >= 2^31.
// Using std::cmp::Reverse avoids this.
let inventory = SegmentMetaInventory::default();
let mut metas = vec![
inventory.new_segment_meta(SegmentId::generate_random(), 100),
inventory.new_segment_meta(SegmentId::generate_random(), (1u32 << 31) - 1),
inventory.new_segment_meta(SegmentId::generate_random(), 50_000),
];
metas.sort_by_key(|m| std::cmp::Reverse(m.max_doc()));
assert_eq!(metas[0].max_doc(), (1u32 << 31) - 1);
assert_eq!(metas[1].max_doc(), 50_000);
assert_eq!(metas[2].max_doc(), 100);
}
#[test]
fn test_delete_during_merge() -> crate::Result<()> {
let mut schema_builder = Schema::builder();

View File

@@ -169,8 +169,10 @@ mod macros;
mod future_result;
// Re-exports
pub use columnar;
pub use common::{ByteCount, DateTime};
pub use {columnar, query_grammar, time};
pub use query_grammar;
pub use time;
pub use crate::error::TantivyError;
pub use crate::future_result::FutureResult;