Update CHANGELOG with date range queries

Cargo check
Removing TermMerger::next().
2026-01-10 19:12:54 +00:00 · 2021-04-19 09:33:56 +09:00 · 2021-04-19 08:54:28 +09:00 · 2021-04-19 08:54:28 +09:00 · 2021-04-19 08:54:28 +09:00 · 2021-04-19 08:54:28 +09:00
33 changed files with 627 additions and 298 deletions
--- a/.github/ISSUE_TEMPLATE/actions.md
+++ b/.github/ISSUE_TEMPLATE/actions.md
@@ -0,0 +1,13 @@
+---
+name: Actions
+about: Actions not directly related to producing code.
+
+---
+
+# Actions title
+
+Action description. 
+e.g. 
+- benchmark
+- investigate and report
+- etc.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,8 +3,10 @@ Tantivy 0.15.0
 - API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
  This change is breaking but migration is trivial.
 - Added an Histogram collector. (@fulmicoton) #994
- Added support for Option<TCollector>.  (@fulmicoton) 
-
+- Added support for Option<TCollector>.  (@fulmicoton)
+- DocAddress is now a struct (@scampi) #987
+- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
+- Date field support for range queries (@rihardsk) #516

 Tantivy 0.14.0
 =========================
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -1,11 +1,11 @@
 use super::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
 use crate::Occur;
-use combine::error::StringStreamError;
 use combine::parser::char::{char, digit, letter, space, spaces, string};
 use combine::parser::Parser;
 use combine::{
    attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
 };
+use combine::{error::StringStreamError, parser::combinator::recognize};

 fn field<'a>() -> impl Parser<&'a str, Output = String> {
    (
@@ -35,6 +35,62 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
        })
 }

+/// Parses a date time according to rfc3339
+/// 2015-08-02T18:54:42+02
+/// 2021-04-13T19:46:26.266051969+00:00
+///
+/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
+/// We delegate rejecting such invalid dates to the logical AST compuation code
+/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
+/// it (instead of merely extracting the datetime value as string as done here).
+fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
+    let two_digits = || recognize::<String, _, _>((digit(), digit()));
+
+    // Parses a time zone
+    // -06:30
+    // Z
+    let time_zone = {
+        let utc = recognize::<String, _, _>(char('Z'));
+        let offset = recognize((
+            choice([char('-'), char('+')]),
+            two_digits(),
+            char(':'),
+            two_digits(),
+        ));
+
+        utc.or(offset)
+    };
+
+    // Parses a date
+    // 2010-01-30
+    let date = {
+        recognize::<String, _, _>((
+            many1::<String, _, _>(digit()),
+            char('-'),
+            two_digits(),
+            char('-'),
+            two_digits(),
+        ))
+    };
+
+    // Parses a time
+    // 12:30:02
+    // 19:46:26.266051969
+    let time = {
+        recognize::<String, _, _>((
+            two_digits(),
+            char(':'),
+            two_digits(),
+            char(':'),
+            two_digits(),
+            optional((char('.'), many1::<String, _, _>(digit()))),
+            time_zone,
+        ))
+    };
+
+    recognize((date, char('T'), time))
+}
+
 fn term_val<'a>() -> impl Parser<&'a str, Output = String> {
    let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
    phrase.or(word())
@@ -83,7 +139,8 @@ fn spaces1<'a>() -> impl Parser<&'a str, Output = ()> {
 /// [a TO *], [a TO c], [abc TO bcd}
 fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
    let range_term_val = || {
-        word()
+        attempt(date_time())
+            .or(word())
            .or(negative_number())
            .or(char('*').with(value("*".to_string())))
    };
@@ -324,6 +381,22 @@ mod test {
        error_parse("-1.");
    }

+    #[test]
+    fn test_date_time() {
+        let (val, remaining) = date_time()
+            .parse("2015-08-02T18:54:42+02:30")
+            .expect("cannot parse date");
+        assert_eq!(val, "2015-08-02T18:54:42+02:30");
+        assert_eq!(remaining, "");
+        assert!(date_time().parse("2015-08-02T18:54:42+02").is_err());
+
+        let (val, remaining) = date_time()
+            .parse("2021-04-13T19:46:26.266051969+00:00")
+            .expect("cannot parse fractional date");
+        assert_eq!(val, "2021-04-13T19:46:26.266051969+00:00");
+        assert_eq!(remaining, "");
+    }
+
    fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
        let query = parse_to_ast().parse(query).unwrap().0;
        let query_str = format!("{:?}", query);
@@ -437,25 +510,60 @@ mod test {
    #[test]
    fn test_range_parser() {
        // testing the range() parser separately
-        let res = range().parse("title: <hello").unwrap().0;
+        let res = range()
+            .parse("title: <hello")
+            .expect("Cannot parse felxible bound word")
+            .0;
        let expected = UserInputLeaf::Range {
            field: Some("title".to_string()),
            lower: UserInputBound::Unbounded,
            upper: UserInputBound::Exclusive("hello".to_string()),
        };
-        let res2 = range().parse("title:{* TO hello}").unwrap().0;
+        let res2 = range()
+            .parse("title:{* TO hello}")
+            .expect("Cannot parse ununbounded to word")
+            .0;
        assert_eq!(res, expected);
        assert_eq!(res2, expected);
+
        let expected_weight = UserInputLeaf::Range {
            field: Some("weight".to_string()),
            lower: UserInputBound::Inclusive("71.2".to_string()),
            upper: UserInputBound::Unbounded,
        };
-
-        let res3 = range().parse("weight: >=71.2").unwrap().0;
-        let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+        let res3 = range()
+            .parse("weight: >=71.2")
+            .expect("Cannot parse flexible bound float")
+            .0;
+        let res4 = range()
+            .parse("weight:[71.2 TO *}")
+            .expect("Cannot parse float to unbounded")
+            .0;
        assert_eq!(res3, expected_weight);
        assert_eq!(res4, expected_weight);
+
+        let expected_dates = UserInputLeaf::Range {
+            field: Some("date_field".to_string()),
+            lower: UserInputBound::Exclusive("2015-08-02T18:54:42Z".to_string()),
+            upper: UserInputBound::Inclusive("2021-08-02T18:54:42+02:30".to_string()),
+        };
+        let res5 = range()
+            .parse("date_field:{2015-08-02T18:54:42Z TO 2021-08-02T18:54:42+02:30]")
+            .expect("Cannot parse date range")
+            .0;
+        assert_eq!(res5, expected_dates);
+
+        let expected_flexible_dates = UserInputLeaf::Range {
+            field: Some("date_field".to_string()),
+            lower: UserInputBound::Unbounded,
+            upper: UserInputBound::Inclusive("2021-08-02T18:54:42.12345+02:30".to_string()),
+        };
+
+        let res6 = range()
+            .parse("date_field: <=2021-08-02T18:54:42.12345+02:30")
+            .expect("Cannot parse date range")
+            .0;
+        assert_eq!(res6, expected_flexible_dates);
    }

    #[test]
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -2,7 +2,7 @@ use super::Collector;
 use crate::collector::SegmentCollector;
 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;

 /// `CountCollector` collector only counts how many
@@ -45,7 +45,7 @@ impl Collector for Count {

    fn for_segment(
        &self,
-        _: SegmentLocalId,
+        _: SegmentOrdinal,
        _: &SegmentReader,
    ) -> crate::Result<SegmentCountCollector> {
        Ok(SegmentCountCollector::default())
--- a/src/collector/docset_collector.rs
+++ b/src/collector/docset_collector.rs
@@ -15,7 +15,7 @@ impl Collector for DocSetCollector {

    fn for_segment(
        &self,
-        segment_local_id: crate::SegmentLocalId,
+        segment_local_id: crate::SegmentOrdinal,
        _segment: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok(DocSetChildCollector {
@@ -36,7 +36,7 @@ impl Collector for DocSetCollector {
        let mut result = HashSet::with_capacity(len);
        for (segment_local_id, docs) in segment_fruits {
            for doc in docs {
-                result.insert(DocAddress(segment_local_id, doc));
+                result.insert(DocAddress::new(segment_local_id, doc));
            }
        }
        Ok(result)
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -5,7 +5,7 @@ use crate::schema::Facet;
 use crate::schema::Field;
 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;
 use std::cmp::Ordering;
 use std::collections::btree_map;
@@ -37,7 +37,10 @@ impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {

 impl<'a> Ord for Hit<'a> {
    fn cmp(&self, other: &Self) -> Ordering {
-        other.count.cmp(&self.count)
+        other
+            .count
+            .cmp(&self.count)
+            .then(self.facet.cmp(other.facet))
    }
 }

@@ -262,7 +265,7 @@ impl Collector for FacetCollector {

    fn for_segment(
        &self,
-        _: SegmentLocalId,
+        _: SegmentOrdinal,
        reader: &SegmentReader,
    ) -> crate::Result<FacetSegmentCollector> {
        let facet_reader = reader.facet_reader(self.field)?;
@@ -657,6 +660,41 @@ mod tests {
            );
        }
    }
+
+    #[test]
+    fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let facet_field = schema_builder.add_facet_field("facet", INDEXED);
+        let schema = schema_builder.build();
+        let index = Index::create_in_ram(schema);
+
+        let docs: Vec<Document> = vec![("b", 2), ("a", 2), ("c", 4)]
+            .into_iter()
+            .flat_map(|(c, count)| {
+                let facet = Facet::from(&format!("/facet/{}", c));
+                let doc = doc!(facet_field => facet);
+                iter::repeat(doc).take(count)
+            })
+            .collect();
+
+        let mut index_writer = index.writer_for_tests()?;
+        for doc in docs {
+            index_writer.add_document(doc);
+        }
+        index_writer.commit()?;
+
+        let searcher = index.reader()?.searcher();
+        let mut facet_collector = FacetCollector::for_field(facet_field);
+        facet_collector.add_facet("/facet");
+        let counts: FacetCounts = searcher.search(&AllQuery, &facet_collector)?;
+
+        let facets: Vec<(&Facet, u64)> = counts.top_k("/facet", 2);
+        assert_eq!(
+            facets,
+            vec![(&Facet::from("/facet/c"), 4), (&Facet::from("/facet/a"), 2)]
+        );
+        Ok(())
+    }
 }

 #[cfg(all(test, feature = "unstable"))]
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -47,7 +47,7 @@ use crate::{Score, SegmentReader, TantivyError};
 /// let top_docs = searcher.search(&query, &no_filter_collector).unwrap();
 ///
 /// assert_eq!(top_docs.len(), 1);
-/// assert_eq!(top_docs[0].1, DocAddress(0, 1));
+/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
 ///
 /// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
 /// let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -106,7 +106,7 @@ impl Collector for HistogramCollector {

    fn for_segment(
        &self,
-        _segment_local_id: crate::SegmentLocalId,
+        _segment_local_id: crate::SegmentOrdinal,
        segment: &crate::SegmentReader,
    ) -> crate::Result<Self::Child> {
        let ff_reader = segment.fast_fields().u64_lenient(self.field)?;
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -86,7 +86,7 @@ See the `custom_collector` example.

 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;
 use downcast_rs::impl_downcast;

@@ -155,7 +155,7 @@ pub trait Collector: Sync + Send {
    /// on this segment.
    fn for_segment(
        &self,
-        segment_local_id: SegmentLocalId,
+        segment_local_id: SegmentOrdinal,
        segment: &SegmentReader,
    ) -> crate::Result<Self::Child>;

@@ -214,7 +214,7 @@ impl<TCollector: Collector> Collector for Option<TCollector> {

    fn for_segment(
        &self,
-        segment_local_id: SegmentLocalId,
+        segment_local_id: SegmentOrdinal,
        segment: &SegmentReader,
    ) -> crate::Result<Self::Child> {
        Ok(if let Some(inner) = self {
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -3,7 +3,7 @@ use super::SegmentCollector;
 use crate::collector::Fruit;
 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;
 use crate::TantivyError;
 use std::marker::PhantomData;
@@ -175,7 +175,7 @@ impl<'a> Collector for MultiCollector<'a> {

    fn for_segment(
        &self,
-        segment_local_id: SegmentLocalId,
+        segment_local_id: SegmentOrdinal,
        segment: &SegmentReader,
    ) -> crate::Result<MultiCollectorChild> {
        let children = self
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -5,7 +5,7 @@ use crate::fastfield::FastFieldReader;
 use crate::schema::Field;
 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::{DocAddress, Document, Searcher};

 use crate::collector::{Count, FilterCollector, TopDocs};
@@ -53,7 +53,7 @@ pub fn test_filter_collector() {
    let top_docs = searcher.search(&query, &filter_some_collector).unwrap();

    assert_eq!(top_docs.len(), 1);
-    assert_eq!(top_docs[0].1, DocAddress(0, 1));
+    assert_eq!(top_docs[0].1, DocAddress::new(0, 1));

    let filter_all_collector: FilterCollector<_, _, u64> =
        FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
@@ -82,7 +82,7 @@ pub struct TestCollector {
 }

 pub struct TestSegmentCollector {
-    segment_id: SegmentLocalId,
+    segment_id: SegmentOrdinal,
    fruit: TestFruit,
 }

@@ -108,7 +108,7 @@ impl Collector for TestCollector {

    fn for_segment(
        &self,
-        segment_id: SegmentLocalId,
+        segment_id: SegmentOrdinal,
        _reader: &SegmentReader,
    ) -> crate::Result<TestSegmentCollector> {
        Ok(TestSegmentCollector {
@@ -126,7 +126,7 @@ impl Collector for TestCollector {
            if fruit.docs().is_empty() {
                0
            } else {
-                fruit.docs()[0].segment_ord()
+                fruit.docs()[0].segment_ord
            }
        });
        let mut docs = vec![];
@@ -143,7 +143,7 @@ impl SegmentCollector for TestSegmentCollector {
    type Fruit = TestFruit;

    fn collect(&mut self, doc: DocId, score: Score) {
-        self.fruit.docs.push(DocAddress(self.segment_id, doc));
+        self.fruit.docs.push(DocAddress::new(self.segment_id, doc));
        self.fruit.scores.push(score);
    }

@@ -177,7 +177,7 @@ impl Collector for FastFieldTestCollector {

    fn for_segment(
        &self,
-        _: SegmentLocalId,
+        _: SegmentOrdinal,
        segment_reader: &SegmentReader,
    ) -> crate::Result<FastFieldSegmentCollector> {
        let reader = segment_reader
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,6 +1,6 @@
 use crate::DocAddress;
 use crate::DocId;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
@@ -118,7 +118,7 @@ where

    pub(crate) fn for_segment<F: PartialOrd>(
        &self,
-        segment_id: SegmentLocalId,
+        segment_id: SegmentOrdinal,
        _: &SegmentReader,
    ) -> TopSegmentCollector<F> {
        TopSegmentCollector::new(segment_id, self.limit + self.offset)
@@ -147,29 +147,32 @@ where
 pub(crate) struct TopSegmentCollector<T> {
    limit: usize,
    heap: BinaryHeap<ComparableDoc<T, DocId>>,
-    segment_id: u32,
+    segment_ord: u32,
 }

 impl<T: PartialOrd> TopSegmentCollector<T> {
-    fn new(segment_id: SegmentLocalId, limit: usize) -> TopSegmentCollector<T> {
+    fn new(segment_ord: SegmentOrdinal, limit: usize) -> TopSegmentCollector<T> {
        TopSegmentCollector {
            limit,
            heap: BinaryHeap::with_capacity(limit),
-            segment_id,
+            segment_ord,
        }
    }
 }

 impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
    pub fn harvest(self) -> Vec<(T, DocAddress)> {
-        let segment_id = self.segment_id;
+        let segment_ord = self.segment_ord;
        self.heap
            .into_sorted_vec()
            .into_iter()
            .map(|comparable_doc| {
                (
                    comparable_doc.feature,
-                    DocAddress(segment_id, comparable_doc.doc),
+                    DocAddress {
+                        segment_ord,
+                        doc_id: comparable_doc.doc,
+                    },
                )
            })
            .collect()
@@ -220,9 +223,9 @@ mod tests {
        assert_eq!(
            top_collector.harvest(),
            vec![
-                (0.8, DocAddress(0, 1)),
-                (0.3, DocAddress(0, 5)),
-                (0.2, DocAddress(0, 3))
+                (0.8, DocAddress::new(0, 1)),
+                (0.3, DocAddress::new(0, 5)),
+                (0.2, DocAddress::new(0, 3))
            ]
        );
    }
@@ -238,10 +241,10 @@ mod tests {
        assert_eq!(
            top_collector.harvest(),
            vec![
-                (0.9, DocAddress(0, 7)),
-                (0.8, DocAddress(0, 1)),
-                (0.3, DocAddress(0, 5)),
-                (0.2, DocAddress(0, 3))
+                (0.9, DocAddress::new(0, 7)),
+                (0.8, DocAddress::new(0, 1)),
+                (0.3, DocAddress::new(0, 5)),
+                (0.2, DocAddress::new(0, 3))
            ]
        );
    }
@@ -276,17 +279,17 @@ mod tests {

        let results = collector
            .merge_fruits(vec![vec![
-                (0.9, DocAddress(0, 1)),
-                (0.8, DocAddress(0, 2)),
-                (0.7, DocAddress(0, 3)),
-                (0.6, DocAddress(0, 4)),
-                (0.5, DocAddress(0, 5)),
+                (0.9, DocAddress::new(0, 1)),
+                (0.8, DocAddress::new(0, 2)),
+                (0.7, DocAddress::new(0, 3)),
+                (0.6, DocAddress::new(0, 4)),
+                (0.5, DocAddress::new(0, 5)),
            ]])
            .unwrap();

        assert_eq!(
            results,
-            vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),]
+            vec![(0.8, DocAddress::new(0, 2)), (0.7, DocAddress::new(0, 3)),]
        );
    }

@@ -295,10 +298,13 @@ mod tests {
        let collector = TopCollector::with_limit(2).and_offset(1);

        let results = collector
-            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .merge_fruits(vec![vec![
+                (0.9, DocAddress::new(0, 1)),
+                (0.8, DocAddress::new(0, 2)),
+            ]])
            .unwrap();

-        assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]);
+        assert_eq!(results, vec![(0.8, DocAddress::new(0, 2)),]);
    }

    #[test]
@@ -306,7 +312,10 @@ mod tests {
        let collector = TopCollector::with_limit(2).and_offset(20);

        let results = collector
-            .merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
+            .merge_fruits(vec![vec![
+                (0.9, DocAddress::new(0, 1)),
+                (0.8, DocAddress::new(0, 2)),
+            ]])
            .unwrap();

        assert_eq!(results, vec![]);
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -10,7 +10,7 @@ use crate::schema::Field;
 use crate::DocAddress;
 use crate::DocId;
 use crate::Score;
-use crate::SegmentLocalId;
+use crate::SegmentOrdinal;
 use crate::SegmentReader;
 use crate::{collector::custom_score_top_collector::CustomScoreTopCollector, fastfield::FastValue};
 use crate::{collector::top_collector::TopSegmentCollector, TantivyError};
@@ -37,7 +37,7 @@ where

    fn for_segment(
        &self,
-        segment_local_id: crate::SegmentLocalId,
+        segment_local_id: crate::SegmentOrdinal,
        segment: &SegmentReader,
    ) -> crate::Result<Self::Child> {
        let schema = segment.schema();
@@ -113,8 +113,8 @@ where
 /// let query = query_parser.parse_query("diary").unwrap();
 /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
 ///
-/// assert_eq!(top_docs[0].1, DocAddress(0, 1));
-/// assert_eq!(top_docs[1].1, DocAddress(0, 3));
+/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
+/// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
 /// ```
 pub struct TopDocs(TopCollector<Score>);

@@ -201,8 +201,8 @@ impl TopDocs {
    /// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
    ///
    /// assert_eq!(top_docs.len(), 2);
-    /// assert_eq!(top_docs[0].1, DocAddress(0, 4));
-    /// assert_eq!(top_docs[1].1, DocAddress(0, 3));
+    /// assert_eq!(top_docs[0].1, DocAddress::new(0, 4));
+    /// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
    /// ```
    pub fn and_offset(self, offset: usize) -> TopDocs {
        TopDocs(self.0.and_offset(offset))
@@ -243,8 +243,8 @@ impl TopDocs {
    /// #   let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
    /// #   let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
    /// #   assert_eq!(top_docs,
-    /// #            vec![(97u64, DocAddress(0u32, 1)),
-    /// #                 (80u64, DocAddress(0u32, 3))]);
+    /// #            vec![(97u64, DocAddress::new(0u32, 1)),
+    /// #                 (80u64, DocAddress::new(0u32, 3))]);
    /// #   Ok(())
    /// # }
    /// /// Searches the document matching the given query, and
@@ -323,8 +323,8 @@ impl TopDocs {
    /// #   let reader = index.reader()?;
    /// #   let top_docs = docs_sorted_by_revenue(&reader.searcher(), &AllQuery, rating)?;
    /// #   assert_eq!(top_docs,
-    /// #            vec![(119_000_000i64, DocAddress(0, 1)),
-    /// #                 (92_000_000i64, DocAddress(0, 0))]);
+    /// #            vec![(119_000_000i64, DocAddress::new(0, 1)),
+    /// #                 (92_000_000i64, DocAddress::new(0, 0))]);
    /// #   Ok(())
    /// # }
    /// /// Searches the document matching the given query, and
@@ -600,7 +600,7 @@ impl Collector for TopDocs {

    fn for_segment(
        &self,
-        segment_local_id: SegmentLocalId,
+        segment_local_id: SegmentOrdinal,
        reader: &SegmentReader,
    ) -> crate::Result<Self::Child> {
        let collector = self.0.for_segment(segment_local_id, reader);
@@ -671,7 +671,15 @@ impl Collector for TopDocs {
        let fruit = heap
            .into_sorted_vec()
            .into_iter()
-            .map(|cid| (cid.feature, DocAddress(segment_ord, cid.doc)))
+            .map(|cid| {
+                (
+                    cid.feature,
+                    DocAddress {
+                        segment_ord,
+                        doc_id: cid.doc,
+                    },
+                )
+            })
            .collect();
        Ok(fruit)
    }
@@ -741,9 +749,9 @@ mod tests {
        assert_results_equals(
            &score_docs,
            &[
-                (0.81221175, DocAddress(0u32, 1)),
-                (0.5376842, DocAddress(0u32, 2)),
-                (0.48527452, DocAddress(0, 0)),
+                (0.81221175, DocAddress::new(0u32, 1)),
+                (0.5376842, DocAddress::new(0u32, 2)),
+                (0.48527452, DocAddress::new(0, 0)),
            ],
        );
    }
@@ -760,7 +768,7 @@ mod tests {
            .searcher()
            .search(&text_query, &TopDocs::with_limit(4).and_offset(2))
            .unwrap();
-        assert_results_equals(&score_docs[..], &[(0.48527452, DocAddress(0, 0))]);
+        assert_results_equals(&score_docs[..], &[(0.48527452, DocAddress::new(0, 0))]);
    }

    #[test]
@@ -778,8 +786,8 @@ mod tests {
        assert_results_equals(
            &score_docs,
            &[
-                (0.81221175, DocAddress(0u32, 1)),
-                (0.5376842, DocAddress(0u32, 2)),
+                (0.81221175, DocAddress::new(0u32, 1)),
+                (0.5376842, DocAddress::new(0u32, 2)),
            ],
        );
    }
@@ -799,8 +807,8 @@ mod tests {
        assert_results_equals(
            &score_docs[..],
            &[
-                (0.5376842, DocAddress(0u32, 2)),
-                (0.48527452, DocAddress(0, 0)),
+                (0.5376842, DocAddress::new(0u32, 2)),
+                (0.48527452, DocAddress::new(0, 0)),
            ],
        );
    }
@@ -864,9 +872,9 @@ mod tests {
        assert_eq!(
            &top_docs[..],
            &[
-                (64, DocAddress(0, 1)),
-                (16, DocAddress(0, 2)),
-                (12, DocAddress(0, 0))
+                (64, DocAddress::new(0, 1)),
+                (16, DocAddress::new(0, 2)),
+                (12, DocAddress::new(0, 0))
            ]
        );
    }
@@ -898,8 +906,8 @@ mod tests {
        assert_eq!(
            &top_docs[..],
            &[
-                (mr_birthday, DocAddress(0, 1)),
-                (pr_birthday, DocAddress(0, 0)),
+                (mr_birthday, DocAddress::new(0, 1)),
+                (pr_birthday, DocAddress::new(0, 0)),
            ]
        );
        Ok(())
@@ -927,7 +935,10 @@ mod tests {
        let top_docs: Vec<(i64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
        assert_eq!(
            &top_docs[..],
-            &[(40i64, DocAddress(0, 1)), (-1i64, DocAddress(0, 0)),]
+            &[
+                (40i64, DocAddress::new(0, 1)),
+                (-1i64, DocAddress::new(0, 0)),
+            ]
        );
        Ok(())
    }
@@ -954,7 +965,10 @@ mod tests {
        let top_docs: Vec<(f64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
        assert_eq!(
            &top_docs[..],
-            &[(40f64, DocAddress(0, 1)), (-1.0f64, DocAddress(0, 0)),]
+            &[
+                (40f64, DocAddress::new(0, 1)),
+                (-1.0f64, DocAddress::new(0, 0)),
+            ]
        );
        Ok(())
    }
@@ -1034,7 +1048,7 @@ mod tests {

        assert_eq!(
            score_docs,
-            vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
+            vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
        );
    }

@@ -1056,7 +1070,7 @@ mod tests {

        assert_eq!(
            score_docs,
-            vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
+            vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
        );
    }

--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -239,7 +239,7 @@ impl Index {
    /// Such segments can of course be part of the index,
    /// but also they could be segments being currently built or in the middle of a merge
    /// operation.
-    pub fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
+    pub(crate) fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
        self.inventory.all()
    }

--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -54,9 +54,8 @@ impl Searcher {
    /// The searcher uses the segment ordinal to route the
    /// the request to the right `Segment`.
    pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
-        let DocAddress(segment_local_id, doc_id) = doc_address;
-        let store_reader = &self.store_readers[segment_local_id as usize];
-        store_reader.get(doc_id)
+        let store_reader = &self.store_readers[doc_address.segment_ord as usize];
+        store_reader.get(doc_address.doc_id)
    }

    /// Access the schema associated to the index of this searcher.
--- a/src/fastfield/bytes/mod.rs
+++ b/src/fastfield/bytes/mod.rs
@@ -56,7 +56,7 @@ mod tests {
    fn test_stored_bytes() -> crate::Result<()> {
        let searcher = create_index_for_test(STORED)?;
        assert_eq!(searcher.num_docs(), 1);
-        let retrieved_doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let retrieved_doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let field = searcher.schema().get_field("string_bytes").unwrap();
        let values: Vec<&Value> = retrieved_doc.get_all(field).collect();
        assert_eq!(values.len(), 2);
@@ -72,7 +72,7 @@ mod tests {
    fn test_non_stored_bytes() -> crate::Result<()> {
        let searcher = create_index_for_test(INDEXED)?;
        assert_eq!(searcher.num_docs(), 1);
-        let retrieved_doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let retrieved_doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let field = searcher.schema().get_field("string_bytes").unwrap();
        assert!(retrieved_doc.get_first(field).is_none());
        Ok(())
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -105,7 +105,7 @@ mod tests {
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert_eq!(&facet_ords, &[2u64]);
-        let doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let value = doc.get_first(facet_field).and_then(Value::path);
        assert_eq!(value, None);
        Ok(())
@@ -128,7 +128,7 @@ mod tests {
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert!(facet_ords.is_empty());
-        let doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let value = doc.get_first(facet_field).and_then(Value::path);
        assert_eq!(value, Some("/a/b".to_string()));
        Ok(())
@@ -151,7 +151,7 @@ mod tests {
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert_eq!(&facet_ords, &[2u64]);
-        let doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let value = doc.get_first(facet_field).and_then(Value::path);
        assert_eq!(value, Some("/a/b".to_string()));
        Ok(())
@@ -174,7 +174,7 @@ mod tests {
        let mut facet_ords = Vec::new();
        facet_reader.facet_ords(0u32, &mut facet_ords);
        assert!(facet_ords.is_empty());
-        let doc = searcher.doc(DocAddress(0u32, 0u32))?;
+        let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
        let value = doc.get_first(facet_field).and_then(Value::path);
        assert_eq!(value, None);
        Ok(())
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -147,37 +147,50 @@ mod tests {
            }
        }

-        // TODO: support Date range queries
-        //        {
-        //            let parser = QueryParser::for_index(&index, vec![date_field]);
-        //            let range_q = format!("\"{}\"..\"{}\"",
-        //                                  (first_time_stamp + Duration::seconds(1)).to_rfc3339(),
-        //                                  (first_time_stamp + Duration::seconds(3)).to_rfc3339()
-        //            );
-        //            let query = parser.parse_query(&range_q)
-        //                .expect("could not parse query");
-        //            let results = searcher.search(&query, &TopDocs::with_limit(5))
-        //                .expect("could not query index");
-        //
-        //
-        //            assert_eq!(results.len(), 2);
-        //            for (i, doc_pair) in results.iter().enumerate() {
-        //                let retrieved_doc = searcher.doc(doc_pair.1).expect("cannot fetch doc");
-        //                let offset_sec = match i {
-        //                    0 => 1,
-        //                    1 => 3,
-        //                    _ => panic!("should not have more than 2 docs")
-        //                };
-        //                let time_i_val = match i {
-        //                    0 => 2,
-        //                    1 => 3,
-        //                    _ => panic!("should not have more than 2 docs")
-        //                };
-        //                assert_eq!(retrieved_doc.get_first(date_field).expect("cannot find value").date_value().timestamp(),
-        //                           (first_time_stamp + Duration::seconds(offset_sec)).timestamp());
-        //                assert_eq!(retrieved_doc.get_first(time_i).expect("cannot find value").i64_value(), time_i_val);
-        //            }
-        //        }
+        {
+            let parser = QueryParser::for_index(&index, vec![date_field]);
+            let range_q = format!(
+                "[{} TO {}]",
+                (first_time_stamp + Duration::seconds(1)).to_rfc3339(),
+                (first_time_stamp + Duration::seconds(3)).to_rfc3339()
+            );
+            let query = parser.parse_query(&range_q).expect("could not parse query");
+            let results = searcher
+                .search(&query, &TopDocs::with_limit(5))
+                .expect("could not query index");
+
+            assert_eq!(results.len(), 2);
+            for (i, doc_pair) in results.iter().enumerate() {
+                let retrieved_doc = searcher.doc(doc_pair.1).expect("cannot fetch doc");
+                let offset_sec = match i {
+                    0 => 1,
+                    1 => 2,
+                    _ => panic!("should not have more than 2 docs"),
+                };
+                let time_i_val = match i {
+                    0 => 2,
+                    1 => 3,
+                    _ => panic!("should not have more than 2 docs"),
+                };
+                assert_eq!(
+                    retrieved_doc
+                        .get_first(date_field)
+                        .expect("cannot find value")
+                        .date_value()
+                        .expect("value not of Date type")
+                        .timestamp(),
+                    (first_time_stamp + Duration::seconds(offset_sec)).timestamp()
+                );
+                assert_eq!(
+                    retrieved_doc
+                        .get_first(time_i)
+                        .expect("cannot find value")
+                        .i64_value()
+                        .expect("value not of i64 type"),
+                    time_i_val
+                );
+            }
+        }
    }

    #[test]
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -798,49 +798,53 @@ mod tests {
            {
                assert_eq!(
                    get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
-                    vec![DocAddress(0, 1), DocAddress(0, 2), DocAddress(0, 4)]
+                    vec![
+                        DocAddress::new(0, 1),
+                        DocAddress::new(0, 2),
+                        DocAddress::new(0, 4)
+                    ]
                );
                assert_eq!(
                    get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
-                    vec![DocAddress(0, 0), DocAddress(0, 3)]
+                    vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
                );
                assert_eq!(
                    get_doc_ids(vec![Term::from_field_text(text_field, "g")])?,
-                    vec![DocAddress(0, 4)]
+                    vec![DocAddress::new(0, 4)]
                );
                assert_eq!(
                    get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
                    vec![
-                        DocAddress(0, 0),
-                        DocAddress(0, 1),
-                        DocAddress(0, 2),
-                        DocAddress(0, 3),
-                        DocAddress(0, 4)
+                        DocAddress::new(0, 0),
+                        DocAddress::new(0, 1),
+                        DocAddress::new(0, 2),
+                        DocAddress::new(0, 3),
+                        DocAddress::new(0, 4)
                    ]
                );
                assert_eq!(
                    get_doc_ids(vec![Term::from_field_date(date_field, &curr_time)])?,
-                    vec![DocAddress(0, 0), DocAddress(0, 3)]
+                    vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
                );
            }
            {
-                let doc = searcher.doc(DocAddress(0, 0))?;
+                let doc = searcher.doc(DocAddress::new(0, 0))?;
                assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b"));
            }
            {
-                let doc = searcher.doc(DocAddress(0, 1))?;
+                let doc = searcher.doc(DocAddress::new(0, 1))?;
                assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c"));
            }
            {
-                let doc = searcher.doc(DocAddress(0, 2))?;
+                let doc = searcher.doc(DocAddress::new(0, 2))?;
                assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c d"));
            }
            {
-                let doc = searcher.doc(DocAddress(0, 3))?;
+                let doc = searcher.doc(DocAddress::new(0, 3))?;
                assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b"));
            }
            {
-                let doc = searcher.doc(DocAddress(0, 4))?;
+                let doc = searcher.doc(DocAddress::new(0, 4))?;
                assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c g"));
            }
            {
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -24,6 +24,7 @@ pub use self::prepared_commit::PreparedCommit;
 pub use self::segment_entry::SegmentEntry;
 pub use self::segment_manager::SegmentManager;
 pub use self::segment_serializer::SegmentSerializer;
+pub use self::segment_updater::merge_segments;
 pub use self::segment_writer::SegmentWriter;

 /// Alias for the default merge policy, which is the `LogMergePolicy`.
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -114,7 +114,7 @@ fn merge(
    // first we need to apply deletes to our segment.
    let merged_segment = index.new_segment();

-    // First we apply all of the delet to the merged segment, up to the target opstamp.
+    // First we apply all of the delete to the merged segment, up to the target opstamp.
    for segment_entry in &mut segment_entries {
        let segment = index.segment(segment_entry.meta().clone());
        advance_deletes(segment, segment_entry, target_opstamp)?;
@@ -141,6 +141,81 @@ fn merge(
    Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
 }

+/// Advanced: Merges a list of segments from different indices in a new index.
+///
+/// Returns `TantivyError` if the the indices list is empty or their
+/// schemas don't match.
+///
+/// `output_directory`: is assumed to be empty.
+///
+/// # Warning
+/// This function does NOT check or take the `IndexWriter` is running. It is not
+/// meant to work if you have an IndexWriter running for the origin indices, or
+/// the destination Index.
+#[doc(hidden)]
+pub fn merge_segments<Dir: Directory>(
+    indices: &[Index],
+    output_directory: Dir,
+) -> crate::Result<Index> {
+    if indices.is_empty() {
+        // If there are no indices to merge, there is no need to do anything.
+        return Err(crate::TantivyError::InvalidArgument(
+            "No indices given to marge".to_string(),
+        ));
+    }
+
+    let target_schema = indices[0].schema();
+
+    // let's check that all of the indices have the same schema
+    if indices
+        .iter()
+        .skip(1)
+        .any(|index| index.schema() != target_schema)
+    {
+        return Err(crate::TantivyError::InvalidArgument(
+            "Attempt to merge different schema indices".to_string(),
+        ));
+    }
+
+    let mut segments: Vec<Segment> = Vec::new();
+    for index in indices {
+        segments.extend(index.searchable_segments()?);
+    }
+
+    let mut merged_index = Index::create(output_directory, target_schema.clone())?;
+    let merged_segment = merged_index.new_segment();
+    let merged_segment_id = merged_segment.id();
+    let merger: IndexMerger = IndexMerger::open(merged_index.schema(), &segments[..])?;
+    let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
+    let num_docs = merger.write(segment_serializer)?;
+
+    let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);
+
+    let stats = format!(
+        "Segments Merge: [{}]",
+        segments
+            .iter()
+            .fold(String::new(), |sum, current| format!(
+                "{}{} ",
+                sum,
+                current.meta().id().uuid_string()
+            ))
+            .trim_end()
+    );
+
+    let index_meta = IndexMeta {
+        segments: vec![segment_meta],
+        schema: target_schema,
+        opstamp: 0u64,
+        payload: Some(stats),
+    };
+
+    // save the meta.json
+    save_metas(&index_meta, merged_index.directory_mut())?;
+
+    Ok(merged_index)
+}
+
 pub(crate) struct InnerSegmentUpdater {
    // we keep a copy of the current active IndexMeta to
    // avoid loading the file everytime we need it in the
@@ -479,7 +554,7 @@ impl SegmentUpdater {
                    if delete_operation.opstamp < committed_opstamp {
                        let index = &segment_updater.index;
                        let segment = index.segment(after_merge_segment_entry.meta().clone());
-                        if let Err(e) = advance_deletes(
+                        if let Err(advance_deletes_err) = advance_deletes(
                            segment,
                            &mut after_merge_segment_entry,
                            committed_opstamp,
@@ -487,7 +562,7 @@ impl SegmentUpdater {
                            error!(
                                "Merge of {:?} was cancelled (advancing deletes failed): {:?}",
                                merge_operation.segment_ids(),
-                                e
+                                advance_deletes_err
                            );
                            if cfg!(test) {
                                panic!("Merge failed.");
@@ -495,7 +570,7 @@ impl SegmentUpdater {
                            // ... cancel merge
                            // `merge_operations` are tracked. As it is dropped, the
                            // the segment_ids will be available again for merge.
-                            return Err(e);
+                            return Err(advance_deletes_err);
                        }
                    }
                }
@@ -540,158 +615,201 @@ impl SegmentUpdater {

 #[cfg(test)]
 mod tests {
-
+    use super::merge_segments;
+    use crate::directory::RAMDirectory;
    use crate::indexer::merge_policy::tests::MergeWheneverPossible;
    use crate::schema::*;
    use crate::Index;

    #[test]
-    fn test_delete_during_merge() {
+    fn test_delete_during_merge() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", TEXT);
-        let schema = schema_builder.build();
-
-        let index = Index::create_in_ram(schema);
+        let index = Index::create_in_ram(schema_builder.build());

        // writing the segment
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;
        index_writer.set_merge_policy(Box::new(MergeWheneverPossible));

-        {
-            for _ in 0..100 {
-                index_writer.add_document(doc!(text_field=>"a"));
-                index_writer.add_document(doc!(text_field=>"b"));
-            }
-            assert!(index_writer.commit().is_ok());
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field=>"a"));
+            index_writer.add_document(doc!(text_field=>"b"));
        }
+        index_writer.commit()?;

-        {
-            for _ in 0..100 {
-                index_writer.add_document(doc!(text_field=>"c"));
-                index_writer.add_document(doc!(text_field=>"d"));
-            }
-            assert!(index_writer.commit().is_ok());
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field=>"c"));
+            index_writer.add_document(doc!(text_field=>"d"));
        }
+        index_writer.commit()?;

-        {
-            index_writer.add_document(doc!(text_field=>"e"));
-            index_writer.add_document(doc!(text_field=>"f"));
-            assert!(index_writer.commit().is_ok());
-        }
+        index_writer.add_document(doc!(text_field=>"e"));
+        index_writer.add_document(doc!(text_field=>"f"));
+        index_writer.commit()?;

-        {
-            let term = Term::from_field_text(text_field, "a");
-            index_writer.delete_term(term);
-            assert!(index_writer.commit().is_ok());
-        }
-        let reader = index.reader().unwrap();
+        let term = Term::from_field_text(text_field, "a");
+        index_writer.delete_term(term);
+        index_writer.commit()?;
+
+        let reader = index.reader()?;
        assert_eq!(reader.searcher().num_docs(), 302);

-        {
-            index_writer
-                .wait_merging_threads()
-                .expect("waiting for merging threads");
-        }
+        index_writer.wait_merging_threads()?;

-        reader.reload().unwrap();
+        reader.reload()?;
        assert_eq!(reader.searcher().segment_readers().len(), 1);
        assert_eq!(reader.searcher().num_docs(), 302);
+        Ok(())
    }

    #[test]
-    fn delete_all_docs() {
+    fn delete_all_docs() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", TEXT);
-        let schema = schema_builder.build();
-
-        let index = Index::create_in_ram(schema);
+        let index = Index::create_in_ram(schema_builder.build());

        // writing the segment
-        let mut index_writer = index.writer_for_tests().unwrap();
+        let mut index_writer = index.writer_for_tests()?;

-        {
-            for _ in 0..100 {
-                index_writer.add_document(doc!(text_field=>"a"));
-                index_writer.add_document(doc!(text_field=>"b"));
-            }
-            assert!(index_writer.commit().is_ok());
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field=>"a"));
+            index_writer.add_document(doc!(text_field=>"b"));
+        }
+        index_writer.commit()?;
+
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field=>"c"));
+            index_writer.add_document(doc!(text_field=>"d"));
+        }
+        index_writer.commit()?;
+
+        index_writer.add_document(doc!(text_field=>"e"));
+        index_writer.add_document(doc!(text_field=>"f"));
+        index_writer.commit()?;
+
+        let seg_ids = index.searchable_segment_ids()?;
+        // docs exist, should have at least 1 segment
+        assert!(seg_ids.len() > 0);
+
+        let term_vals = vec!["a", "b", "c", "d", "e", "f"];
+        for term_val in term_vals {
+            let term = Term::from_field_text(text_field, term_val);
+            index_writer.delete_term(term);
+            index_writer.commit()?;
        }

-        {
-            for _ in 0..100 {
-                index_writer.add_document(doc!(text_field=>"c"));
-                index_writer.add_document(doc!(text_field=>"d"));
-            }
-            assert!(index_writer.commit().is_ok());
-        }
+        index_writer.wait_merging_threads()?;

-        {
-            index_writer.add_document(doc!(text_field=>"e"));
-            index_writer.add_document(doc!(text_field=>"f"));
-            assert!(index_writer.commit().is_ok());
-        }
-
-        {
-            let seg_ids = index
-                .searchable_segment_ids()
-                .expect("Searchable segments failed.");
-            // docs exist, should have at least 1 segment
-            assert!(seg_ids.len() > 0);
-        }
-
-        {
-            let term_vals = vec!["a", "b", "c", "d", "e", "f"];
-            for term_val in term_vals {
-                let term = Term::from_field_text(text_field, term_val);
-                index_writer.delete_term(term);
-                assert!(index_writer.commit().is_ok());
-            }
-        }
-
-        {
-            index_writer
-                .wait_merging_threads()
-                .expect("waiting for merging threads");
-        }
-
-        let reader = index.reader().unwrap();
+        let reader = index.reader()?;
        assert_eq!(reader.searcher().num_docs(), 0);

-        let seg_ids = index
-            .searchable_segment_ids()
-            .expect("Searchable segments failed.");
+        let seg_ids = index.searchable_segment_ids()?;
        assert!(seg_ids.is_empty());

-        reader.reload().unwrap();
+        reader.reload()?;
        assert_eq!(reader.searcher().num_docs(), 0);
        // empty segments should be erased
-        assert!(index.searchable_segment_metas().unwrap().is_empty());
+        assert!(index.searchable_segment_metas()?.is_empty());
        assert!(reader.searcher().segment_readers().is_empty());
+
+        Ok(())
    }

    #[test]
-    fn test_remove_all_segments() {
+    fn test_remove_all_segments() -> crate::Result<()> {
        let mut schema_builder = Schema::builder();
        let text_field = schema_builder.add_text_field("text", TEXT);
-        let schema = schema_builder.build();
-
-        let index = Index::create_in_ram(schema);
+        let index = Index::create_in_ram(schema_builder.build());

        // writing the segment
-        let mut index_writer = index.writer_for_tests().unwrap();
-
-        {
-            for _ in 0..100 {
-                index_writer.add_document(doc!(text_field=>"a"));
-                index_writer.add_document(doc!(text_field=>"b"));
-            }
-            assert!(index_writer.commit().is_ok());
+        let mut index_writer = index.writer_for_tests()?;
+        for _ in 0..100 {
+            index_writer.add_document(doc!(text_field=>"a"));
+            index_writer.add_document(doc!(text_field=>"b"));
        }
+        index_writer.commit()?;
+
        index_writer.segment_updater().remove_all_segments();
        let seg_vec = index_writer
            .segment_updater()
            .segment_manager
            .segment_entries();
        assert!(seg_vec.is_empty());
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_segments() -> crate::Result<()> {
+        let mut indices = vec![];
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+
+        for _ in 0..3 {
+            let index = Index::create_in_ram(schema.clone());
+
+            // writing two segments
+            let mut index_writer = index.writer_for_tests()?;
+            for _ in 0..100 {
+                index_writer.add_document(doc!(text_field=>"fizz"));
+                index_writer.add_document(doc!(text_field=>"buzz"));
+            }
+            index_writer.commit()?;
+
+            for _ in 0..1000 {
+                index_writer.add_document(doc!(text_field=>"foo"));
+                index_writer.add_document(doc!(text_field=>"bar"));
+            }
+            index_writer.commit()?;
+            indices.push(index);
+        }
+
+        assert_eq!(indices.len(), 3);
+        let output_directory = RAMDirectory::default();
+        let index = merge_segments(&indices, output_directory)?;
+        assert_eq!(index.schema(), schema);
+
+        let segments = index.searchable_segments()?;
+        assert_eq!(segments.len(), 1);
+
+        let segment_metas = segments[0].meta();
+        assert_eq!(segment_metas.num_deleted_docs(), 0);
+        assert_eq!(segment_metas.num_docs(), 6600);
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_empty_indices_array() {
+        let merge_result = merge_segments(&[], RAMDirectory::default());
+        assert!(merge_result.is_err());
+    }
+
+    #[test]
+    fn test_merge_mismatched_schema() -> crate::Result<()> {
+        let first_index = {
+            let mut schema_builder = Schema::builder();
+            let text_field = schema_builder.add_text_field("text", TEXT);
+            let index = Index::create_in_ram(schema_builder.build());
+            let mut index_writer = index.writer_for_tests()?;
+            index_writer.add_document(doc!(text_field=>"some text"));
+            index_writer.commit()?;
+            index
+        };
+
+        let second_index = {
+            let mut schema_builder = Schema::builder();
+            let body_field = schema_builder.add_text_field("body", TEXT);
+            let index = Index::create_in_ram(schema_builder.build());
+            let mut index_writer = index.writer_for_tests()?;
+            index_writer.add_document(doc!(body_field=>"some body"));
+            index_writer.commit()?;
+            index
+        };
+
+        // mismatched schema index list
+        let result = merge_segments(&[first_index, second_index], RAMDirectory::default());
+        assert!(result.is_err());
+
+        Ok(())
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -163,6 +163,7 @@ pub use crate::core::{Executor, SegmentComponent};
 pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
 pub use crate::core::{InvertedIndexReader, SegmentReader};
 pub use crate::directory::Directory;
+pub use crate::indexer::merge_segments;
 pub use crate::indexer::operation::UserOperation;
 pub use crate::indexer::IndexWriter;
 pub use crate::postings::Postings;
@@ -254,20 +255,16 @@ pub type Opstamp = u64;
 /// the document to the search query.
 pub type Score = f32;

-/// A `SegmentLocalId` identifies a segment.
-/// It only makes sense for a given searcher.
-pub type SegmentLocalId = u32;
+/// A `SegmentOrdinal` identifies a segment, within a `Searcher`.
+pub type SegmentOrdinal = u32;

 impl DocAddress {
-    /// Return the segment ordinal id that identifies the segment
-    /// hosting the document in the `Searcher` it is called from.
-    pub fn segment_ord(self) -> SegmentLocalId {
-        self.0
-    }
-
-    /// Return the segment-local `DocId`
-    pub fn doc(self) -> DocId {
-        self.1
+    /// Creates a new DocAddress from the segment/docId pair.
+    pub fn new(segment_ord: SegmentOrdinal, doc_id: DocId) -> DocAddress {
+        DocAddress {
+            segment_ord,
+            doc_id,
+        }
    }
 }

@@ -280,7 +277,13 @@ impl DocAddress {
 /// The id used for the segment is actually an ordinal
 /// in the list of `Segment`s held by a `Searcher`.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct DocAddress(pub SegmentLocalId, pub DocId);
+pub struct DocAddress {
+    /// The segment ordinal id that identifies the segment
+    /// hosting the document in the `Searcher` it is called from.
+    pub segment_ord: SegmentOrdinal,
+    /// The segment-local `DocId`.
+    pub doc_id: DocId,
+}

 #[cfg(test)]
 mod tests {
@@ -778,30 +781,38 @@ mod tests {
        };
        assert_eq!(
            get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
-            vec![DocAddress(0, 1), DocAddress(0, 2)]
+            vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
        );
        assert_eq!(
            get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
-            vec![DocAddress(0, 0)]
+            vec![DocAddress::new(0, 0)]
        );
        assert_eq!(
            get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
-            vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
+            vec![
+                DocAddress::new(0, 0),
+                DocAddress::new(0, 1),
+                DocAddress::new(0, 2)
+            ]
        );
        assert_eq!(
            get_doc_ids(vec![Term::from_field_text(text_field, "c")])?,
-            vec![DocAddress(0, 1), DocAddress(0, 2)]
+            vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
        );
        assert_eq!(
            get_doc_ids(vec![Term::from_field_text(text_field, "d")])?,
-            vec![DocAddress(0, 2)]
+            vec![DocAddress::new(0, 2)]
        );
        assert_eq!(
            get_doc_ids(vec![
                Term::from_field_text(text_field, "b"),
                Term::from_field_text(text_field, "a"),
            ])?,
-            vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
+            vec![
+                DocAddress::new(0, 0),
+                DocAddress::new(0, 1),
+                DocAddress::new(0, 2)
+            ]
        );
        Ok(())
    }
--- a/src/query/boolean_query/boolean_query.rs
+++ b/src/query/boolean_query/boolean_query.rs
@@ -238,9 +238,9 @@ mod tests {
        assert_eq!(
            docs,
            vec![
-                DocAddress(0u32, 1u32),
-                DocAddress(0u32, 2u32),
-                DocAddress(0u32, 3u32)
+                DocAddress::new(0u32, 1u32),
+                DocAddress::new(0u32, 2u32),
+                DocAddress::new(0u32, 3u32)
            ]
            .into_iter()
            .collect()
@@ -264,15 +264,24 @@ mod tests {
            BooleanQuery::intersection(vec![term_b.box_clone(), term_c.box_clone()]);
        {
            let docs = searcher.search(&intersection_ab, &DocSetCollector)?;
-            assert_eq!(docs, vec![DocAddress(0u32, 2u32)].into_iter().collect());
+            assert_eq!(
+                docs,
+                vec![DocAddress::new(0u32, 2u32)].into_iter().collect()
+            );
        }
        {
            let docs = searcher.search(&intersection_ac, &DocSetCollector)?;
-            assert_eq!(docs, vec![DocAddress(0u32, 1u32)].into_iter().collect());
+            assert_eq!(
+                docs,
+                vec![DocAddress::new(0u32, 1u32)].into_iter().collect()
+            );
        }
        {
            let docs = searcher.search(&intersection_bc, &DocSetCollector)?;
-            assert_eq!(docs, vec![DocAddress(0u32, 0u32)].into_iter().collect());
+            assert_eq!(
+                docs,
+                vec![DocAddress::new(0u32, 0u32)].into_iter().collect()
+            );
        }
        Ok(())
    }
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -128,7 +128,7 @@ mod tests {
                .docs()
                .iter()
                .cloned()
-                .map(|doc| doc.1)
+                .map(|doc| doc.doc_id)
                .collect::<Vec<DocId>>()
        };
        {
@@ -196,8 +196,8 @@ mod tests {
            let topdocs_no_excluded = matching_topdocs(&boolean_query_no_excluded);
            assert_eq!(topdocs_no_excluded.len(), 2);
            let (top_score, top_doc) = topdocs_no_excluded[0];
-            assert_eq!(top_doc, DocAddress(0, 4));
-            assert_eq!(topdocs_no_excluded[1].1, DocAddress(0, 3)); // ignore score of doc 3.
+            assert_eq!(top_doc, DocAddress::new(0, 4));
+            assert_eq!(topdocs_no_excluded[1].1, DocAddress::new(0, 3)); // ignore score of doc 3.
            score_doc_4 = top_score;
        }

@@ -210,7 +210,7 @@ mod tests {
            let topdocs_excluded = matching_topdocs(&boolean_query_two_excluded);
            assert_eq!(topdocs_excluded.len(), 1);
            let (top_score, top_doc) = topdocs_excluded[0];
-            assert_eq!(top_doc, DocAddress(0, 4));
+            assert_eq!(top_doc, DocAddress::new(0, 4));
            assert_eq!(top_score, score_doc_4);
        }
    }
@@ -309,7 +309,7 @@ mod tests {
            IndexRecordOption::Basic,
        ));
        let query = BooleanQuery::from(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
-        let explanation = query.explain(&searcher, DocAddress(0, 0u32))?;
+        let explanation = query.explain(&searcher, DocAddress::new(0, 0u32))?;
        assert_nearly_equals!(explanation.value(), 0.6931472);
        Ok(())
    }
--- a/src/query/boost_query.rs
+++ b/src/query/boost_query.rs
@@ -150,7 +150,7 @@ mod tests {
        let reader = index.reader().unwrap();
        let searcher = reader.searcher();
        let query = BoostQuery::new(Box::new(AllQuery), 0.2);
-        let explanation = query.explain(&searcher, DocAddress(0, 0u32)).unwrap();
+        let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
        assert_eq!(
            explanation.to_pretty_json(),
            "{\n  \"value\": 0.2,\n  \"description\": \"Boost x0.2 of ...\",\n  \"details\": [\n    {\n      \"value\": 1.0,\n      \"description\": \"AllQuery\",\n      \"context\": []\n    }\n  ],\n  \"context\": []\n}"
--- a/src/query/phrase_query/mod.rs
+++ b/src/query/phrase_query/mod.rs
@@ -58,7 +58,7 @@ pub mod tests {
            test_fruits
                .docs()
                .iter()
-                .map(|docaddr| docaddr.1)
+                .map(|docaddr| docaddr.doc_id)
                .collect::<Vec<_>>()
        };
        assert_eq!(test_query(vec!["a", "b"]), vec![1, 2, 3, 4]);
@@ -109,7 +109,7 @@ pub mod tests {
            test_fruits
                .docs()
                .iter()
-                .map(|docaddr| docaddr.1)
+                .map(|docaddr| docaddr.doc_id)
                .collect::<Vec<_>>()
        };
        assert_eq!(test_query(vec!["a", "b", "c"]), vec![2, 4]);
@@ -206,8 +206,8 @@ pub mod tests {
                .docs()
                .to_vec()
        };
-        assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress(0, 1)]);
-        assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress(0, 2)]);
+        assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress::new(0, 1)]);
+        assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress::new(0, 2)]);
    }

    #[test] // motivated by #234
@@ -233,7 +233,7 @@ pub mod tests {
                .expect("search should succeed")
                .docs()
                .iter()
-                .map(|doc_address| doc_address.1)
+                .map(|doc_address| doc_address.doc_id)
                .collect::<Vec<DocId>>()
        };
        assert_eq!(test_query(vec![(0, "a"), (1, "b")]), vec![0]);
--- a/src/query/query.rs
+++ b/src/query/query.rs
@@ -51,9 +51,9 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {

    /// Returns an `Explanation` for the score of the document.
    fn explain(&self, searcher: &Searcher, doc_address: DocAddress) -> crate::Result<Explanation> {
-        let reader = searcher.segment_reader(doc_address.segment_ord());
+        let reader = searcher.segment_reader(doc_address.segment_ord);
        let weight = self.weight(searcher, true)?;
-        weight.explain(reader, doc_address.doc())
+        weight.explain(reader, doc_address.doc_id)
    }

    /// Returns the number of documents matching the query.
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -157,7 +157,8 @@ fn trim_ast(logical_ast: LogicalAST) -> Option<LogicalAST> {
 ///   a word lexicographically between `a` and `c` (inclusive lower bound, exclusive upper bound).
 ///   Inclusive bounds are `[]`, exclusive are `{}`.
 ///
-/// * date values: The query parser supports rfc3339 formatted dates. For example "2002-10-02T15:00:00.05Z"
+/// * date values: The query parser supports rfc3339 formatted dates. For example `"2002-10-02T15:00:00.05Z"`
+///   or `some_date_field:[2002-10-02T15:00:00Z TO 2002-10-02T18:00:00Z}`
 ///
 /// *  all docs query: A plain `*` will match all documents in the index.
 ///
--- a/src/query/term_query/mod.rs
+++ b/src/query/term_query/mod.rs
@@ -196,18 +196,18 @@ mod tests {
        let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);
        let searcher = index.reader()?.searcher();
        {
-            let explanation = term_query.explain(&searcher, DocAddress(0u32, 1u32))?;
+            let explanation = term_query.explain(&searcher, DocAddress::new(0u32, 1u32))?;
            assert_nearly_equals!(explanation.value(), 0.6931472);
        }
        {
-            let explanation_err = term_query.explain(&searcher, DocAddress(0u32, 0u32));
+            let explanation_err = term_query.explain(&searcher, DocAddress::new(0u32, 0u32));
            assert!(matches!(
                explanation_err,
                Err(crate::TantivyError::InvalidArgument(_msg))
            ));
        }
        {
-            let explanation_err = term_query.explain(&searcher, DocAddress(0u32, 3u32));
+            let explanation_err = term_query.explain(&searcher, DocAddress::new(0u32, 3u32));
            assert!(matches!(
                explanation_err,
                Err(crate::TantivyError::InvalidArgument(_msg))
--- a/src/schema/field_entry.rs
+++ b/src/schema/field_entry.rs
@@ -192,7 +192,7 @@ impl<'de> Deserialize<'de> for FieldEntry {
            Name,
            Type,
            Options,
-        };
+        }

        const FIELDS: &[&str] = &["name", "type", "options"];

--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -192,7 +192,7 @@ impl SchemaBuilder {
        }))
    }
 }
-
+#[derive(Debug)]
 struct InnerSchema {
    fields: Vec<FieldEntry>,
    fields_map: HashMap<String, Field>, // transient
@@ -226,7 +226,7 @@ impl Eq for InnerSchema {}
 /// let schema = schema_builder.build();
 ///
 /// ```
-#[derive(Clone, Eq, PartialEq)]
+#[derive(Clone, Eq, PartialEq, Debug)]
 pub struct Schema(Arc<InnerSchema>);

 impl Schema {
--- a/src/store/index/mod.rs
+++ b/src/store/index/mod.rs
@@ -154,7 +154,7 @@ mod tests {
        let searcher = reader.searcher();
        assert_eq!(searcher.num_docs(), 30);
        for i in 0..searcher.num_docs() as u32 {
-            let _doc = searcher.doc(DocAddress(0u32, i))?;
+            let _doc = searcher.doc(DocAddress::new(0u32, i))?;
        }
        Ok(())
    }
--- a/src/termdict/merger.rs
+++ b/src/termdict/merger.rs
@@ -1,4 +1,3 @@
-use crate::schema::Term;
 use crate::termdict::TermOrdinal;
 use crate::termdict::TermStreamer;
 use std::cmp::Ordering;
@@ -114,14 +113,4 @@ impl<'a> TermMerger<'a> {
    pub fn current_kvs(&self) -> &[HeapItem<'a>] {
        &self.current_streamers[..]
    }
-
-    /// Iterates through terms
-    #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
-    pub fn next(&mut self) -> Option<Term<&[u8]>> {
-        if self.advance() {
-            Some(Term::wrap(self.current_streamers[0].streamer.key()))
-        } else {
-            None
-        }
-    }
 }
Author	SHA1	Message	Date
Paul Masurel	4d08713c73	Update CHANGELOG with date range queries	2021-04-19 09:33:56 +09:00
Paul Masurel	570c4fdbb1	Cargo check	2021-04-19 08:54:28 +09:00
Paul Masurel	00506594b9	Removing `TermMerger::next()`. Closing #933	2021-04-19 08:54:28 +09:00
Paul Masurel	3313fd39fc	Edited CHANGELOG	2021-04-19 08:54:28 +09:00
Hardik Prajapati	9dd8c268bd	Simplified chain orderings	2021-04-19 08:54:28 +09:00
Hardik Prajapati	942cbfb383	Fixed formatting using cargo fmt	2021-04-19 08:54:28 +09:00
Hardik Prajapati	5d1627e52d	Implementation of Ord trait changed for Hit - This will result in lexicographical ordering of facet in BinaryHeep in case of a tie	2021-04-19 08:54:28 +09:00
Hardik Prajapati	84cad5c1aa	AAdded failing test for tie scenario in topk	2021-04-19 08:54:28 +09:00
Rihards Krišlauks	f58345f0f0	Add a date range query example to QueryParser documentation	2021-04-18 22:13:02 +03:00
Rihards Krišlauks	f518012656	Test flexible bounds in date range queries	2021-04-17 19:30:09 +03:00
Rihards Krišlauks	12fb9a95cb	Clean up leftower debug comments	2021-04-17 18:52:44 +03:00
Rihards Krišlauks	55e79e34af	Verified that the change in datetime range test was correct The value that was previously there was 3 and it made the test fail when i enabled it. Verified that it, indeed, should have been 2 instead (the testing code previously contained an error).	2021-04-17 18:16:52 +03:00
Rihards Krišlauks	1649f31258	Make time zone parsing more strict to match rfc3339	2021-04-17 17:57:46 +03:00
Rihards Krišlauks	7849736d80	Move all of the datetime parsing code into a single function For readability	2021-04-17 17:23:47 +03:00
Rihards Krišlauks	e58401be78	Implement date range support in the query parser Tests pass but needs cleanup	2021-04-13 23:32:22 +03:00
Paul Masurel	be1d9e0db7	Marks list_all_segment_metas() as crate private Closes #1004	2021-04-07 23:39:28 +09:00
Paul Masurel	5743b46457	Merge pull request #1006 from tantivy-search/feat-merge-splits Implements merging several index into a brand new index. Closes #1005	2021-04-07 23:38:14 +09:00
Paul Masurel	e67e5ebd46	Minor syntax changes, and passing a tantivy Directory as argument Closes #1005.	2021-04-07 23:35:03 +09:00
Evance Souamoro	a550c85369	fixed issues & added test on merge_segements featt	2021-04-06 16:15:09 +00:00
Evance Souamoro	b185df2b22	added a scratched of implementation but still need to craft one detail and write test to validate	2021-04-06 11:48:51 +00:00
Evance Souamoro	f82922b354	added a scratched of implementation but still need to craft one detail and write test to validate	2021-04-06 11:46:17 +00:00
Paul Masurel	86b30d9d7f	Cargo fmt	2021-03-31 12:20:31 +09:00
Paul Masurel	f1499d5b3e	Cargo fmt	2021-03-31 11:44:03 +09:00
Paul Masurel	30b6828d71	Update actions.md	2021-03-31 10:36:13 +09:00
Paul Masurel	e6b7b7da0a	Create actions.md	2021-03-31 10:34:33 +09:00
Paul Masurel	38a20ae269	Renamed SegmentLocalId to SegmentOrdinal for more homogeneity and edited changelog	2021-03-29 09:25:42 +09:00
Stéphane Campinas	a0ec6e1e9d	Expand the DocAddress struct with named fields	2021-03-28 19:00:23 +02:00