mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-03-16 18:20:42 +00:00
Compare commits
3 Commits
release_ta
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68a9066d13 | ||
|
|
d02559a4d1 | ||
|
|
1922abaf33 |
@@ -47,7 +47,7 @@ rustc-hash = "2.0.0"
|
||||
thiserror = "2.0.1"
|
||||
htmlescape = "0.3.1"
|
||||
fail = { version = "0.5.0", optional = true }
|
||||
time = { version = "0.3.35", features = ["serde-well-known"] }
|
||||
time = { version = "0.3.47", features = ["serde-well-known"] }
|
||||
smallvec = "1.8.0"
|
||||
rayon = "1.5.2"
|
||||
lru = "0.16.3"
|
||||
@@ -86,7 +86,7 @@ futures = "0.3.21"
|
||||
paste = "1.0.11"
|
||||
more-asserts = "0.3.1"
|
||||
rand_distr = "0.5"
|
||||
time = { version = "0.3.10", features = ["serde-well-known", "macros"] }
|
||||
time = { version = "0.3.47", features = ["serde-well-known", "macros"] }
|
||||
postcard = { version = "1.0.4", features = [
|
||||
"use-std",
|
||||
], default-features = false }
|
||||
@@ -201,4 +201,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "regex_all_terms"
|
||||
harness = false
|
||||
|
||||
|
||||
@@ -15,11 +15,10 @@ repository = "https://github.com/quickwit-oss/tantivy"
|
||||
byteorder = "1.4.3"
|
||||
ownedbytes = { version= "0.9", path="../ownedbytes" }
|
||||
async-trait = "0.1"
|
||||
time = { version = "0.3.10", features = ["serde-well-known"] }
|
||||
time = { version = "0.3.47", features = ["serde-well-known"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
binggan = "0.14.0"
|
||||
proptest = "1.0.0"
|
||||
rand = "0.9"
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ impl MergePolicy for LogMergePolicy {
|
||||
fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
|
||||
let size_sorted_segments = segments
|
||||
.iter()
|
||||
.filter(|seg| seg.num_docs() <= (self.max_docs_before_merge as u32))
|
||||
.filter(|seg| (seg.num_docs() as usize) <= self.max_docs_before_merge)
|
||||
.sorted_by_key(|seg| std::cmp::Reverse(seg.max_doc()))
|
||||
.collect::<Vec<&SegmentMeta>>();
|
||||
|
||||
@@ -372,4 +372,21 @@ mod tests {
|
||||
assert_eq!(merge_candidates[0].0.len(), 1);
|
||||
assert_eq!(merge_candidates[0].0[0], test_input[1].id());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_max_docs_before_merge_large_value() {
|
||||
// Regression test: (max_docs_before_merge as u32) truncates values > u32::MAX.
|
||||
// Casting num_docs() to usize instead avoids the truncation.
|
||||
let mut policy = LogMergePolicy::default();
|
||||
policy.set_min_num_segments(2);
|
||||
policy.set_max_docs_before_merge(5_000_000_000usize);
|
||||
let test_input = vec![
|
||||
create_random_segment_meta(100_000),
|
||||
create_random_segment_meta(100_000),
|
||||
];
|
||||
let result = policy.compute_merge_candidates(&test_input);
|
||||
// Both segments should be eligible (100_000 < 5_000_000_000)
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].0.len(), 2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -403,7 +403,8 @@ impl SegmentUpdater {
|
||||
// from the different drives.
|
||||
//
|
||||
// Segment 1 from disk 1, Segment 1 from disk 2, etc.
|
||||
committed_segment_metas.sort_by_key(|segment_meta| -(segment_meta.max_doc() as i32));
|
||||
committed_segment_metas
|
||||
.sort_by_key(|segment_meta| std::cmp::Reverse(segment_meta.max_doc()));
|
||||
let index_meta = IndexMeta {
|
||||
index_settings: index.settings().clone(),
|
||||
segments: committed_segment_metas,
|
||||
@@ -705,6 +706,7 @@ mod tests {
|
||||
use crate::collector::TopDocs;
|
||||
use crate::directory::RamDirectory;
|
||||
use crate::fastfield::AliveBitSet;
|
||||
use crate::index::{SegmentId, SegmentMetaInventory};
|
||||
use crate::indexer::merge_policy::tests::MergeWheneverPossible;
|
||||
use crate::indexer::merger::IndexMerger;
|
||||
use crate::indexer::segment_updater::merge_filtered_segments;
|
||||
@@ -712,6 +714,22 @@ mod tests {
|
||||
use crate::schema::*;
|
||||
use crate::{Directory, DocAddress, Index, Segment};
|
||||
|
||||
#[test]
|
||||
fn test_segment_sort_large_max_doc() {
|
||||
// Regression test: -(max_doc as i32) overflows for max_doc >= 2^31.
|
||||
// Using std::cmp::Reverse avoids this.
|
||||
let inventory = SegmentMetaInventory::default();
|
||||
let mut metas = vec![
|
||||
inventory.new_segment_meta(SegmentId::generate_random(), 100),
|
||||
inventory.new_segment_meta(SegmentId::generate_random(), (1u32 << 31) - 1),
|
||||
inventory.new_segment_meta(SegmentId::generate_random(), 50_000),
|
||||
];
|
||||
metas.sort_by_key(|m| std::cmp::Reverse(m.max_doc()));
|
||||
assert_eq!(metas[0].max_doc(), (1u32 << 31) - 1);
|
||||
assert_eq!(metas[1].max_doc(), 50_000);
|
||||
assert_eq!(metas[2].max_doc(), 100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_delete_during_merge() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
|
||||
@@ -169,8 +169,10 @@ mod macros;
|
||||
mod future_result;
|
||||
|
||||
// Re-exports
|
||||
pub use columnar;
|
||||
pub use common::{ByteCount, DateTime};
|
||||
pub use {columnar, query_grammar, time};
|
||||
pub use query_grammar;
|
||||
pub use time;
|
||||
|
||||
pub use crate::error::TantivyError;
|
||||
pub use crate::future_result::FutureResult;
|
||||
|
||||
Reference in New Issue
Block a user