From ff6ee3a5db1bfb1a7986e931dc328c4a9f467c0c Mon Sep 17 00:00:00 2001
From: Mohammad Dashti <mdashti@gmail.com>
Date: Fri, 5 Dec 2025 22:37:19 -0800
Subject: [PATCH] fix: post-rebase fixes - Add missing size_hint module
 declaration - Remove test-only export
 serialize_and_load_u64_based_column_values - fixed quickwit CI issues

---
 Cargo.lock                                    | 17 +++-----------
 columnar/benches/bench_merge.rs               |  9 +++++++-
 .../src/column_index/optional_index/tests.rs  |  6 ++++-
 columnar/src/columnar/merge/tests.rs          |  2 ++
 src/index/segment_reader.rs                   | 13 ++++++-----
 src/indexer/segment_updater.rs                |  1 -
 src/query/automaton_weight.rs                 |  2 +-
 src/query/intersection.rs                     |  7 +++++-
 src/query/mod.rs                              |  9 ++++----
 src/query/more_like_this/mod.rs               |  2 +-
 src/query/term_query/term_query.rs            | 22 ++++++++++++++++++-
 .../term_set_query_fastfield.rs               |  2 +-
 sstable/Cargo.toml                            |  2 +-
 sstable/src/lib.rs                            |  2 +-
 sstable/tests/sstable_test.rs                 |  6 +++--
 15 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6efeebc65..51e5b8eb0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1646,7 +1646,7 @@ dependencies = [
 
 [[package]]
 name = "tantivy"
-version = "0.25.0"
+version = "0.26.0"
 dependencies = [
  "aho-corasick",
  "arc-swap",
@@ -1700,7 +1700,7 @@ dependencies = [
  "tantivy-bitpacker",
  "tantivy-columnar",
  "tantivy-common",
- "tantivy-fst 0.5.0 (git+https://github.com/paradedb/fst.git)",
+ "tantivy-fst",
  "tantivy-query-grammar",
  "tantivy-sstable",
  "tantivy-stacker",
@@ -1757,17 +1757,6 @@ dependencies = [
  "time",
 ]
 
-[[package]]
-name = "tantivy-fst"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18"
-dependencies = [
- "byteorder",
- "regex-syntax 0.8.5",
- "utf8-ranges",
-]
-
 [[package]]
 name = "tantivy-fst"
 version = "0.5.0"
@@ -1801,7 +1790,7 @@ dependencies = [
  "rand",
  "tantivy-bitpacker",
  "tantivy-common",
- "tantivy-fst 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "tantivy-fst",
  "zstd",
 ]
 
diff --git a/columnar/benches/bench_merge.rs b/columnar/benches/bench_merge.rs
index a4b6c3b3f..43abd5192 100644
--- a/columnar/benches/bench_merge.rs
+++ b/columnar/benches/bench_merge.rs
@@ -40,7 +40,14 @@ fn main() {
                 let columnar_readers = columnar_readers.iter().collect::<Vec<_>>();
                 let merge_row_order = StackMergeOrder::stack(&columnar_readers[..]);
 
-                merge_columnar(&columnar_readers, &[], merge_row_order.into(), &mut out).unwrap();
+                merge_columnar(
+                    &columnar_readers,
+                    &[],
+                    merge_row_order.into(),
+                    &mut out,
+                    || false,
+                )
+                .unwrap();
                 Some(out.len() as u64)
             },
         );
diff --git a/columnar/src/column_index/optional_index/tests.rs b/columnar/src/column_index/optional_index/tests.rs
index e27537239..a47a3864d 100644
--- a/columnar/src/column_index/optional_index/tests.rs
+++ b/columnar/src/column_index/optional_index/tests.rs
@@ -164,7 +164,11 @@ fn test_optional_index_large() {
 fn test_optional_index_iter_aux(row_ids: &[RowId], num_rows: RowId) {
     let optional_index = OptionalIndex::for_test(num_rows, row_ids);
     assert_eq!(optional_index.num_docs(), num_rows);
-    assert!(optional_index.iter_rows().eq(row_ids.iter().copied()));
+    assert!(
+        optional_index
+            .iter_non_null_docs()
+            .eq(row_ids.iter().copied())
+    );
 }
 
 #[test]
diff --git a/columnar/src/columnar/merge/tests.rs b/columnar/src/columnar/merge/tests.rs
index 359c12446..3cfc147d4 100644
--- a/columnar/src/columnar/merge/tests.rs
+++ b/columnar/src/columnar/merge/tests.rs
@@ -571,6 +571,7 @@ proptest! {
             &[],
             MergeRowOrder::Stack(stack_merge_order),
             &mut out,
+            || false,
         ).unwrap();
 
         let merged_reader = ColumnarReader::open(out).unwrap();
@@ -588,6 +589,7 @@ proptest! {
             &[],
             MergeRowOrder::Stack(stack_merge_order),
             &mut out,
+            || false,
         ).unwrap();
 
     }
diff --git a/src/index/segment_reader.rs b/src/index/segment_reader.rs
index 1f91d2f18..db32e0fa5 100644
--- a/src/index/segment_reader.rs
+++ b/src/index/segment_reader.rs
@@ -1,5 +1,4 @@
 use std::collections::HashMap;
-use std::ops::BitOrAssign;
 use std::path::PathBuf;
 use std::sync::{Arc, OnceLock, RwLock};
 use std::{fmt, io};
@@ -377,7 +376,8 @@ impl SegmentReader {
                 if is_json {
                     let term_dictionary_json_field_num_bytes: u64 = self
                         .termdict_composite
-                        .open_read(field)
+                        .get()
+                        .and_then(|composite| composite.open_read(field))
                         .map(|file_slice| file_slice.len() as u64)
                         .unwrap_or(0u64);
                     let inv_index = self.inverted_index(field)?;
@@ -429,19 +429,22 @@ impl SegmentReader {
                 } else {
                     let postings_size: ByteCount = self
                         .postings_composite
-                        .open_read(field)
+                        .get()
+                        .and_then(|composite| composite.open_read(field))
                         .map(|posting_fileslice| posting_fileslice.len())
                         .unwrap_or(0)
                         .into();
                     let positions_size: ByteCount = self
                         .positions_composite
-                        .open_read(field)
+                        .get()
+                        .and_then(|composite| composite.open_read(field))
                         .map(|positions_fileslice| positions_fileslice.len())
                         .unwrap_or(0)
                         .into();
                     let term_dictionary_size: ByteCount = self
                         .termdict_composite
-                        .open_read(field)
+                        .get()
+                        .and_then(|composite| composite.open_read(field))
                         .map(|term_dictionary_fileslice| term_dictionary_fileslice.len())
                         .unwrap_or(0)
                         .into();
diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs
index aec3abc4e..994350b38 100644
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -1,4 +1,3 @@
-use std::any::Any;
 use std::borrow::BorrowMut;
 use std::collections::HashSet;
 use std::io::Write;
diff --git a/src/query/automaton_weight.rs b/src/query/automaton_weight.rs
index 4f1f196b0..b5fc99f1a 100644
--- a/src/query/automaton_weight.rs
+++ b/src/query/automaton_weight.rs
@@ -101,7 +101,7 @@ where
             scorers.push(scorer);
         }
 
-        let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default);
+        let scorer = BufferedUnionScorer::build(scorers, SumCombiner::default, reader.max_doc());
         Ok(Box::new(scorer))
     }
 
diff --git a/src/query/intersection.rs b/src/query/intersection.rs
index 659c69051..92c247dad 100644
--- a/src/query/intersection.rs
+++ b/src/query/intersection.rs
@@ -90,7 +90,11 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
         }
     }
 
-    pub fn with_two_sets(left: TDocSet, right: TDocSet) -> Intersection<TDocSet, TDocSet> {
+    pub fn with_two_sets(
+        left: TDocSet,
+        right: TDocSet,
+        num_docs: u32,
+    ) -> Intersection<TDocSet, TDocSet> {
         let mut docsets = vec![left, right];
         go_to_first_doc(&mut docsets);
         let left = docsets.remove(0);
@@ -99,6 +103,7 @@ impl<TDocSet: DocSet> Intersection<TDocSet, TDocSet> {
             left,
             right,
             others: docsets,
+            num_docs,
         }
     }
 }
diff --git a/src/query/mod.rs b/src/query/mod.rs
index a17f0459a..37798f495 100644
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -22,6 +22,7 @@ mod range_query;
 mod regex_query;
 mod reqopt_scorer;
 mod scorer;
+mod size_hint;
 mod term_query;
 mod term_set_query;
 mod union;
@@ -49,9 +50,7 @@ pub use self::explanation::{does_not_match, Explanation};
 pub(crate) use self::fuzzy_query::DfaWrapper;
 pub use self::fuzzy_query::FuzzyTermQuery;
 pub use self::intersection::{intersect_scorers, Intersection};
-pub use self::more_like_this::{
-    MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder, ScoreTerm,
-};
+pub use self::more_like_this::{MoreLikeThis, MoreLikeThisQuery, MoreLikeThisQueryBuilder};
 pub use self::phrase_prefix_query::PhrasePrefixQuery;
 pub use self::phrase_query::regex_phrase_query::{wildcard_query_to_regex_str, RegexPhraseQuery};
 pub use self::phrase_query::regex_phrase_weight::RegexPhraseWeight;
@@ -123,7 +122,9 @@ mod tests {
             query.query_terms(text_field, &segment_reader, &mut |term, pos| {
                 terms.push((term.clone(), pos))
             });
-            assert_eq!(vec![(term_a.clone(), false); 5], terms);
+            // With the new query_terms signature that includes segment_reader,
+            // duplicate terms are deduplicated
+            assert_eq!(vec![(term_a.clone(), false)], terms);
         }
         {
             let query = query_parser.parse_query("a -b").unwrap();
diff --git a/src/query/more_like_this/mod.rs b/src/query/more_like_this/mod.rs
index 943ddc69e..277d2b9bb 100644
--- a/src/query/more_like_this/mod.rs
+++ b/src/query/more_like_this/mod.rs
@@ -3,5 +3,5 @@ mod more_like_this;
 /// Module containing the different query implementations.
 mod query;
 
-pub use self::more_like_this::{MoreLikeThis, ScoreTerm};
+pub use self::more_like_this::MoreLikeThis;
 pub use self::query::{MoreLikeThisQuery, MoreLikeThisQueryBuilder};
diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs
index e617ad8bc..38640034c 100644
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -1,8 +1,10 @@
 use std::fmt;
+use std::ops::Bound;
 
 use super::term_weight::TermWeight;
 use crate::query::bm25::Bm25Weight;
-use crate::query::{EnableScoring, Explanation, Query, Weight};
+use crate::query::range_query::is_type_valid_for_fastfield_range_query;
+use crate::query::{EnableScoring, Explanation, Query, RangeQuery, Weight};
 use crate::schema::{Field, IndexRecordOption};
 use crate::{SegmentReader, Term};
 
@@ -122,6 +124,24 @@ impl TermQuery {
 
 impl Query for TermQuery {
     fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
+        // If the field is not indexed but is a suitable fast field, fall back to a range query
+        // on the fast field matching exactly this term.
+        //
+        // Note: This is considerable slower since it requires to scan the entire fast field.
+        // TODO: The range query would gain from having a single-value optimization
+        let schema = enable_scoring.schema();
+        let field_entry = schema.get_field_entry(self.term.field());
+        if !field_entry.is_indexed()
+            && field_entry.is_fast()
+            && is_type_valid_for_fastfield_range_query(self.term.typ())
+            && !enable_scoring.is_scoring_enabled()
+        {
+            let range_query = RangeQuery::new(
+                Bound::Included(self.term.clone()),
+                Bound::Included(self.term.clone()),
+            );
+            return range_query.weight(enable_scoring);
+        }
         Ok(Box::new(self.specialized_weight(enable_scoring)?))
     }
     fn query_terms(
diff --git a/src/query/term_set_query/term_set_query_fastfield.rs b/src/query/term_set_query/term_set_query_fastfield.rs
index 316840400..95c110986 100644
--- a/src/query/term_set_query/term_set_query_fastfield.rs
+++ b/src/query/term_set_query/term_set_query_fastfield.rs
@@ -1,6 +1,6 @@
 use std::net::Ipv6Addr;
 
-use columnar::{Column, ColumnType, MonotonicallyMappableToU64};
+use columnar::{Column, ColumnType};
 use rustc_hash::{FxHashMap, FxHashSet};
 
 use crate::query::score_combiner::DoNothingCombiner;
diff --git a/sstable/Cargo.toml b/sstable/Cargo.toml
index 7b353cece..473ceb55d 100644
--- a/sstable/Cargo.toml
+++ b/sstable/Cargo.toml
@@ -14,7 +14,7 @@ common = {version= "0.10", path="../common", package="tantivy-common"}
 futures-util = "0.3.30"
 itertools = "0.14.0"
 tantivy-bitpacker = { version= "0.9", path="../bitpacker" }
-tantivy-fst = "0.5"
+tantivy-fst = { git = "https://github.com/paradedb/fst.git" }
 # experimental gives us access to Decompressor::upper_bound
 zstd = { version = "0.13", optional = true, features = ["experimental"] }
 
diff --git a/sstable/src/lib.rs b/sstable/src/lib.rs
index e51a338db..83452bde9 100644
--- a/sstable/src/lib.rs
+++ b/sstable/src/lib.rs
@@ -24,7 +24,7 @@
 //!
 //! // Open the sstable.
 //! let sstable =
-//!     Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
+//!     Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes)).unwrap();
 //!
 //! // Search for a key.
 //! let value = sstable.get(b"banana").unwrap();
diff --git a/sstable/tests/sstable_test.rs b/sstable/tests/sstable_test.rs
index c08547404..9968522d9 100644
--- a/sstable/tests/sstable_test.rs
+++ b/sstable/tests/sstable_test.rs
@@ -12,7 +12,8 @@ fn test_create_and_search_sstable() {
 
     // Open the sstable.
     let sstable =
-        Dictionary::<MonotonicU64SSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
+        Dictionary::<MonotonicU64SSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
+            .unwrap();
 
     // Search for a key.
     let value = sstable.get(b"banana").unwrap();
@@ -34,7 +35,8 @@ fn test_custom_value_sstable() {
 
     // Open the sstable.
     let sstable =
-        Dictionary::<VecU32ValueSSTable>::from_bytes(OwnedBytes::new(sstable_bytes)).unwrap();
+        Dictionary::<VecU32ValueSSTable>::from_bytes_for_tests(OwnedBytes::new(sstable_bytes))
+            .unwrap();
 
     let mut stream = sstable.stream().unwrap();
     assert!(stream.advance());