mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-01-04 08:12:54 +00:00
Compare commits
27 Commits
issue/997
...
rihardsk-d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4d08713c73 | ||
|
|
570c4fdbb1 | ||
|
|
00506594b9 | ||
|
|
3313fd39fc | ||
|
|
9dd8c268bd | ||
|
|
942cbfb383 | ||
|
|
5d1627e52d | ||
|
|
84cad5c1aa | ||
|
|
f58345f0f0 | ||
|
|
f518012656 | ||
|
|
12fb9a95cb | ||
|
|
55e79e34af | ||
|
|
1649f31258 | ||
|
|
7849736d80 | ||
|
|
e58401be78 | ||
|
|
be1d9e0db7 | ||
|
|
5743b46457 | ||
|
|
e67e5ebd46 | ||
|
|
a550c85369 | ||
|
|
b185df2b22 | ||
|
|
f82922b354 | ||
|
|
86b30d9d7f | ||
|
|
f1499d5b3e | ||
|
|
30b6828d71 | ||
|
|
e6b7b7da0a | ||
|
|
38a20ae269 | ||
|
|
a0ec6e1e9d |
13
.github/ISSUE_TEMPLATE/actions.md
vendored
Normal file
13
.github/ISSUE_TEMPLATE/actions.md
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
---
|
||||
name: Actions
|
||||
about: Actions not directly related to producing code.
|
||||
|
||||
---
|
||||
|
||||
# Actions title
|
||||
|
||||
Action description.
|
||||
e.g.
|
||||
- benchmark
|
||||
- investigate and report
|
||||
- etc.
|
||||
@@ -3,8 +3,10 @@ Tantivy 0.15.0
|
||||
- API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
|
||||
This change is breaking but migration is trivial.
|
||||
- Added an Histogram collector. (@fulmicoton) #994
|
||||
- Added support for Option<TCollector>. (@fulmicoton)
|
||||
|
||||
- Added support for Option<TCollector>. (@fulmicoton)
|
||||
- DocAddress is now a struct (@scampi) #987
|
||||
- Bugfix consistent tie break handling in facet's topk (@hardikpnsp) #357
|
||||
- Date field support for range queries (@rihardsk) #516
|
||||
|
||||
Tantivy 0.14.0
|
||||
=========================
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
use super::user_input_ast::{UserInputAST, UserInputBound, UserInputLeaf, UserInputLiteral};
|
||||
use crate::Occur;
|
||||
use combine::error::StringStreamError;
|
||||
use combine::parser::char::{char, digit, letter, space, spaces, string};
|
||||
use combine::parser::Parser;
|
||||
use combine::{
|
||||
attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
|
||||
};
|
||||
use combine::{error::StringStreamError, parser::combinator::recognize};
|
||||
|
||||
fn field<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
(
|
||||
@@ -35,6 +35,62 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a date time according to rfc3339
|
||||
/// 2015-08-02T18:54:42+02
|
||||
/// 2021-04-13T19:46:26.266051969+00:00
|
||||
///
|
||||
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
|
||||
/// We delegate rejecting such invalid dates to the logical AST compuation code
|
||||
/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
|
||||
/// it (instead of merely extracting the datetime value as string as done here).
|
||||
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
let two_digits = || recognize::<String, _, _>((digit(), digit()));
|
||||
|
||||
// Parses a time zone
|
||||
// -06:30
|
||||
// Z
|
||||
let time_zone = {
|
||||
let utc = recognize::<String, _, _>(char('Z'));
|
||||
let offset = recognize((
|
||||
choice([char('-'), char('+')]),
|
||||
two_digits(),
|
||||
char(':'),
|
||||
two_digits(),
|
||||
));
|
||||
|
||||
utc.or(offset)
|
||||
};
|
||||
|
||||
// Parses a date
|
||||
// 2010-01-30
|
||||
let date = {
|
||||
recognize::<String, _, _>((
|
||||
many1::<String, _, _>(digit()),
|
||||
char('-'),
|
||||
two_digits(),
|
||||
char('-'),
|
||||
two_digits(),
|
||||
))
|
||||
};
|
||||
|
||||
// Parses a time
|
||||
// 12:30:02
|
||||
// 19:46:26.266051969
|
||||
let time = {
|
||||
recognize::<String, _, _>((
|
||||
two_digits(),
|
||||
char(':'),
|
||||
two_digits(),
|
||||
char(':'),
|
||||
two_digits(),
|
||||
optional((char('.'), many1::<String, _, _>(digit()))),
|
||||
time_zone,
|
||||
))
|
||||
};
|
||||
|
||||
recognize((date, char('T'), time))
|
||||
}
|
||||
|
||||
fn term_val<'a>() -> impl Parser<&'a str, Output = String> {
|
||||
let phrase = char('"').with(many1(satisfy(|c| c != '"'))).skip(char('"'));
|
||||
phrase.or(word())
|
||||
@@ -83,7 +139,8 @@ fn spaces1<'a>() -> impl Parser<&'a str, Output = ()> {
|
||||
/// [a TO *], [a TO c], [abc TO bcd}
|
||||
fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
|
||||
let range_term_val = || {
|
||||
word()
|
||||
attempt(date_time())
|
||||
.or(word())
|
||||
.or(negative_number())
|
||||
.or(char('*').with(value("*".to_string())))
|
||||
};
|
||||
@@ -324,6 +381,22 @@ mod test {
|
||||
error_parse("-1.");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_date_time() {
|
||||
let (val, remaining) = date_time()
|
||||
.parse("2015-08-02T18:54:42+02:30")
|
||||
.expect("cannot parse date");
|
||||
assert_eq!(val, "2015-08-02T18:54:42+02:30");
|
||||
assert_eq!(remaining, "");
|
||||
assert!(date_time().parse("2015-08-02T18:54:42+02").is_err());
|
||||
|
||||
let (val, remaining) = date_time()
|
||||
.parse("2021-04-13T19:46:26.266051969+00:00")
|
||||
.expect("cannot parse fractional date");
|
||||
assert_eq!(val, "2021-04-13T19:46:26.266051969+00:00");
|
||||
assert_eq!(remaining, "");
|
||||
}
|
||||
|
||||
fn test_parse_query_to_ast_helper(query: &str, expected: &str) {
|
||||
let query = parse_to_ast().parse(query).unwrap().0;
|
||||
let query_str = format!("{:?}", query);
|
||||
@@ -437,25 +510,60 @@ mod test {
|
||||
#[test]
|
||||
fn test_range_parser() {
|
||||
// testing the range() parser separately
|
||||
let res = range().parse("title: <hello").unwrap().0;
|
||||
let res = range()
|
||||
.parse("title: <hello")
|
||||
.expect("Cannot parse felxible bound word")
|
||||
.0;
|
||||
let expected = UserInputLeaf::Range {
|
||||
field: Some("title".to_string()),
|
||||
lower: UserInputBound::Unbounded,
|
||||
upper: UserInputBound::Exclusive("hello".to_string()),
|
||||
};
|
||||
let res2 = range().parse("title:{* TO hello}").unwrap().0;
|
||||
let res2 = range()
|
||||
.parse("title:{* TO hello}")
|
||||
.expect("Cannot parse ununbounded to word")
|
||||
.0;
|
||||
assert_eq!(res, expected);
|
||||
assert_eq!(res2, expected);
|
||||
|
||||
let expected_weight = UserInputLeaf::Range {
|
||||
field: Some("weight".to_string()),
|
||||
lower: UserInputBound::Inclusive("71.2".to_string()),
|
||||
upper: UserInputBound::Unbounded,
|
||||
};
|
||||
|
||||
let res3 = range().parse("weight: >=71.2").unwrap().0;
|
||||
let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
|
||||
let res3 = range()
|
||||
.parse("weight: >=71.2")
|
||||
.expect("Cannot parse flexible bound float")
|
||||
.0;
|
||||
let res4 = range()
|
||||
.parse("weight:[71.2 TO *}")
|
||||
.expect("Cannot parse float to unbounded")
|
||||
.0;
|
||||
assert_eq!(res3, expected_weight);
|
||||
assert_eq!(res4, expected_weight);
|
||||
|
||||
let expected_dates = UserInputLeaf::Range {
|
||||
field: Some("date_field".to_string()),
|
||||
lower: UserInputBound::Exclusive("2015-08-02T18:54:42Z".to_string()),
|
||||
upper: UserInputBound::Inclusive("2021-08-02T18:54:42+02:30".to_string()),
|
||||
};
|
||||
let res5 = range()
|
||||
.parse("date_field:{2015-08-02T18:54:42Z TO 2021-08-02T18:54:42+02:30]")
|
||||
.expect("Cannot parse date range")
|
||||
.0;
|
||||
assert_eq!(res5, expected_dates);
|
||||
|
||||
let expected_flexible_dates = UserInputLeaf::Range {
|
||||
field: Some("date_field".to_string()),
|
||||
lower: UserInputBound::Unbounded,
|
||||
upper: UserInputBound::Inclusive("2021-08-02T18:54:42.12345+02:30".to_string()),
|
||||
};
|
||||
|
||||
let res6 = range()
|
||||
.parse("date_field: <=2021-08-02T18:54:42.12345+02:30")
|
||||
.expect("Cannot parse date range")
|
||||
.0;
|
||||
assert_eq!(res6, expected_flexible_dates);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -2,7 +2,7 @@ use super::Collector;
|
||||
use crate::collector::SegmentCollector;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
|
||||
/// `CountCollector` collector only counts how many
|
||||
@@ -45,7 +45,7 @@ impl Collector for Count {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_: SegmentLocalId,
|
||||
_: SegmentOrdinal,
|
||||
_: &SegmentReader,
|
||||
) -> crate::Result<SegmentCountCollector> {
|
||||
Ok(SegmentCountCollector::default())
|
||||
|
||||
@@ -15,7 +15,7 @@ impl Collector for DocSetCollector {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: crate::SegmentLocalId,
|
||||
segment_local_id: crate::SegmentOrdinal,
|
||||
_segment: &crate::SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
Ok(DocSetChildCollector {
|
||||
@@ -36,7 +36,7 @@ impl Collector for DocSetCollector {
|
||||
let mut result = HashSet::with_capacity(len);
|
||||
for (segment_local_id, docs) in segment_fruits {
|
||||
for doc in docs {
|
||||
result.insert(DocAddress(segment_local_id, doc));
|
||||
result.insert(DocAddress::new(segment_local_id, doc));
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::schema::Facet;
|
||||
use crate::schema::Field;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::btree_map;
|
||||
@@ -37,7 +37,10 @@ impl<'a> PartialOrd<Hit<'a>> for Hit<'a> {
|
||||
|
||||
impl<'a> Ord for Hit<'a> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
other.count.cmp(&self.count)
|
||||
other
|
||||
.count
|
||||
.cmp(&self.count)
|
||||
.then(self.facet.cmp(other.facet))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -262,7 +265,7 @@ impl Collector for FacetCollector {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_: SegmentLocalId,
|
||||
_: SegmentOrdinal,
|
||||
reader: &SegmentReader,
|
||||
) -> crate::Result<FacetSegmentCollector> {
|
||||
let facet_reader = reader.facet_reader(self.field)?;
|
||||
@@ -657,6 +660,41 @@ mod tests {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
|
||||
let schema = schema_builder.build();
|
||||
let index = Index::create_in_ram(schema);
|
||||
|
||||
let docs: Vec<Document> = vec![("b", 2), ("a", 2), ("c", 4)]
|
||||
.into_iter()
|
||||
.flat_map(|(c, count)| {
|
||||
let facet = Facet::from(&format!("/facet/{}", c));
|
||||
let doc = doc!(facet_field => facet);
|
||||
iter::repeat(doc).take(count)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
for doc in docs {
|
||||
index_writer.add_document(doc);
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
let searcher = index.reader()?.searcher();
|
||||
let mut facet_collector = FacetCollector::for_field(facet_field);
|
||||
facet_collector.add_facet("/facet");
|
||||
let counts: FacetCounts = searcher.search(&AllQuery, &facet_collector)?;
|
||||
|
||||
let facets: Vec<(&Facet, u64)> = counts.top_k("/facet", 2);
|
||||
assert_eq!(
|
||||
facets,
|
||||
vec![(&Facet::from("/facet/c"), 4), (&Facet::from("/facet/a"), 2)]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "unstable"))]
|
||||
|
||||
@@ -47,7 +47,7 @@ use crate::{Score, SegmentReader, TantivyError};
|
||||
/// let top_docs = searcher.search(&query, &no_filter_collector).unwrap();
|
||||
///
|
||||
/// assert_eq!(top_docs.len(), 1);
|
||||
/// assert_eq!(top_docs[0].1, DocAddress(0, 1));
|
||||
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
|
||||
///
|
||||
/// let filter_all_collector: FilterCollector<_, _, u64> = FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
|
||||
/// let filtered_top_docs = searcher.search(&query, &filter_all_collector).unwrap();
|
||||
|
||||
@@ -106,7 +106,7 @@ impl Collector for HistogramCollector {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_segment_local_id: crate::SegmentLocalId,
|
||||
_segment_local_id: crate::SegmentOrdinal,
|
||||
segment: &crate::SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
let ff_reader = segment.fast_fields().u64_lenient(self.field)?;
|
||||
|
||||
@@ -86,7 +86,7 @@ See the `custom_collector` example.
|
||||
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
use downcast_rs::impl_downcast;
|
||||
|
||||
@@ -155,7 +155,7 @@ pub trait Collector: Sync + Send {
|
||||
/// on this segment.
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: SegmentLocalId,
|
||||
segment_local_id: SegmentOrdinal,
|
||||
segment: &SegmentReader,
|
||||
) -> crate::Result<Self::Child>;
|
||||
|
||||
@@ -214,7 +214,7 @@ impl<TCollector: Collector> Collector for Option<TCollector> {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: SegmentLocalId,
|
||||
segment_local_id: SegmentOrdinal,
|
||||
segment: &SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
Ok(if let Some(inner) = self {
|
||||
|
||||
@@ -3,7 +3,7 @@ use super::SegmentCollector;
|
||||
use crate::collector::Fruit;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
use crate::TantivyError;
|
||||
use std::marker::PhantomData;
|
||||
@@ -175,7 +175,7 @@ impl<'a> Collector for MultiCollector<'a> {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: SegmentLocalId,
|
||||
segment_local_id: SegmentOrdinal,
|
||||
segment: &SegmentReader,
|
||||
) -> crate::Result<MultiCollectorChild> {
|
||||
let children = self
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::fastfield::FastFieldReader;
|
||||
use crate::schema::Field;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::{DocAddress, Document, Searcher};
|
||||
|
||||
use crate::collector::{Count, FilterCollector, TopDocs};
|
||||
@@ -53,7 +53,7 @@ pub fn test_filter_collector() {
|
||||
let top_docs = searcher.search(&query, &filter_some_collector).unwrap();
|
||||
|
||||
assert_eq!(top_docs.len(), 1);
|
||||
assert_eq!(top_docs[0].1, DocAddress(0, 1));
|
||||
assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
|
||||
|
||||
let filter_all_collector: FilterCollector<_, _, u64> =
|
||||
FilterCollector::new(price, &|value| value < 5u64, TopDocs::with_limit(2));
|
||||
@@ -82,7 +82,7 @@ pub struct TestCollector {
|
||||
}
|
||||
|
||||
pub struct TestSegmentCollector {
|
||||
segment_id: SegmentLocalId,
|
||||
segment_id: SegmentOrdinal,
|
||||
fruit: TestFruit,
|
||||
}
|
||||
|
||||
@@ -108,7 +108,7 @@ impl Collector for TestCollector {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_id: SegmentLocalId,
|
||||
segment_id: SegmentOrdinal,
|
||||
_reader: &SegmentReader,
|
||||
) -> crate::Result<TestSegmentCollector> {
|
||||
Ok(TestSegmentCollector {
|
||||
@@ -126,7 +126,7 @@ impl Collector for TestCollector {
|
||||
if fruit.docs().is_empty() {
|
||||
0
|
||||
} else {
|
||||
fruit.docs()[0].segment_ord()
|
||||
fruit.docs()[0].segment_ord
|
||||
}
|
||||
});
|
||||
let mut docs = vec![];
|
||||
@@ -143,7 +143,7 @@ impl SegmentCollector for TestSegmentCollector {
|
||||
type Fruit = TestFruit;
|
||||
|
||||
fn collect(&mut self, doc: DocId, score: Score) {
|
||||
self.fruit.docs.push(DocAddress(self.segment_id, doc));
|
||||
self.fruit.docs.push(DocAddress::new(self.segment_id, doc));
|
||||
self.fruit.scores.push(score);
|
||||
}
|
||||
|
||||
@@ -177,7 +177,7 @@ impl Collector for FastFieldTestCollector {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
_: SegmentLocalId,
|
||||
_: SegmentOrdinal,
|
||||
segment_reader: &SegmentReader,
|
||||
) -> crate::Result<FastFieldSegmentCollector> {
|
||||
let reader = segment_reader
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::DocAddress;
|
||||
use crate::DocId;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::BinaryHeap;
|
||||
@@ -118,7 +118,7 @@ where
|
||||
|
||||
pub(crate) fn for_segment<F: PartialOrd>(
|
||||
&self,
|
||||
segment_id: SegmentLocalId,
|
||||
segment_id: SegmentOrdinal,
|
||||
_: &SegmentReader,
|
||||
) -> TopSegmentCollector<F> {
|
||||
TopSegmentCollector::new(segment_id, self.limit + self.offset)
|
||||
@@ -147,29 +147,32 @@ where
|
||||
pub(crate) struct TopSegmentCollector<T> {
|
||||
limit: usize,
|
||||
heap: BinaryHeap<ComparableDoc<T, DocId>>,
|
||||
segment_id: u32,
|
||||
segment_ord: u32,
|
||||
}
|
||||
|
||||
impl<T: PartialOrd> TopSegmentCollector<T> {
|
||||
fn new(segment_id: SegmentLocalId, limit: usize) -> TopSegmentCollector<T> {
|
||||
fn new(segment_ord: SegmentOrdinal, limit: usize) -> TopSegmentCollector<T> {
|
||||
TopSegmentCollector {
|
||||
limit,
|
||||
heap: BinaryHeap::with_capacity(limit),
|
||||
segment_id,
|
||||
segment_ord,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: PartialOrd + Clone> TopSegmentCollector<T> {
|
||||
pub fn harvest(self) -> Vec<(T, DocAddress)> {
|
||||
let segment_id = self.segment_id;
|
||||
let segment_ord = self.segment_ord;
|
||||
self.heap
|
||||
.into_sorted_vec()
|
||||
.into_iter()
|
||||
.map(|comparable_doc| {
|
||||
(
|
||||
comparable_doc.feature,
|
||||
DocAddress(segment_id, comparable_doc.doc),
|
||||
DocAddress {
|
||||
segment_ord,
|
||||
doc_id: comparable_doc.doc,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
@@ -220,9 +223,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
top_collector.harvest(),
|
||||
vec![
|
||||
(0.8, DocAddress(0, 1)),
|
||||
(0.3, DocAddress(0, 5)),
|
||||
(0.2, DocAddress(0, 3))
|
||||
(0.8, DocAddress::new(0, 1)),
|
||||
(0.3, DocAddress::new(0, 5)),
|
||||
(0.2, DocAddress::new(0, 3))
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -238,10 +241,10 @@ mod tests {
|
||||
assert_eq!(
|
||||
top_collector.harvest(),
|
||||
vec![
|
||||
(0.9, DocAddress(0, 7)),
|
||||
(0.8, DocAddress(0, 1)),
|
||||
(0.3, DocAddress(0, 5)),
|
||||
(0.2, DocAddress(0, 3))
|
||||
(0.9, DocAddress::new(0, 7)),
|
||||
(0.8, DocAddress::new(0, 1)),
|
||||
(0.3, DocAddress::new(0, 5)),
|
||||
(0.2, DocAddress::new(0, 3))
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -276,17 +279,17 @@ mod tests {
|
||||
|
||||
let results = collector
|
||||
.merge_fruits(vec![vec![
|
||||
(0.9, DocAddress(0, 1)),
|
||||
(0.8, DocAddress(0, 2)),
|
||||
(0.7, DocAddress(0, 3)),
|
||||
(0.6, DocAddress(0, 4)),
|
||||
(0.5, DocAddress(0, 5)),
|
||||
(0.9, DocAddress::new(0, 1)),
|
||||
(0.8, DocAddress::new(0, 2)),
|
||||
(0.7, DocAddress::new(0, 3)),
|
||||
(0.6, DocAddress::new(0, 4)),
|
||||
(0.5, DocAddress::new(0, 5)),
|
||||
]])
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
results,
|
||||
vec![(0.8, DocAddress(0, 2)), (0.7, DocAddress(0, 3)),]
|
||||
vec![(0.8, DocAddress::new(0, 2)), (0.7, DocAddress::new(0, 3)),]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -295,10 +298,13 @@ mod tests {
|
||||
let collector = TopCollector::with_limit(2).and_offset(1);
|
||||
|
||||
let results = collector
|
||||
.merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
|
||||
.merge_fruits(vec![vec![
|
||||
(0.9, DocAddress::new(0, 1)),
|
||||
(0.8, DocAddress::new(0, 2)),
|
||||
]])
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results, vec![(0.8, DocAddress(0, 2)),]);
|
||||
assert_eq!(results, vec![(0.8, DocAddress::new(0, 2)),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -306,7 +312,10 @@ mod tests {
|
||||
let collector = TopCollector::with_limit(2).and_offset(20);
|
||||
|
||||
let results = collector
|
||||
.merge_fruits(vec![vec![(0.9, DocAddress(0, 1)), (0.8, DocAddress(0, 2))]])
|
||||
.merge_fruits(vec![vec![
|
||||
(0.9, DocAddress::new(0, 1)),
|
||||
(0.8, DocAddress::new(0, 2)),
|
||||
]])
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(results, vec![]);
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::schema::Field;
|
||||
use crate::DocAddress;
|
||||
use crate::DocId;
|
||||
use crate::Score;
|
||||
use crate::SegmentLocalId;
|
||||
use crate::SegmentOrdinal;
|
||||
use crate::SegmentReader;
|
||||
use crate::{collector::custom_score_top_collector::CustomScoreTopCollector, fastfield::FastValue};
|
||||
use crate::{collector::top_collector::TopSegmentCollector, TantivyError};
|
||||
@@ -37,7 +37,7 @@ where
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: crate::SegmentLocalId,
|
||||
segment_local_id: crate::SegmentOrdinal,
|
||||
segment: &SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
let schema = segment.schema();
|
||||
@@ -113,8 +113,8 @@ where
|
||||
/// let query = query_parser.parse_query("diary").unwrap();
|
||||
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2)).unwrap();
|
||||
///
|
||||
/// assert_eq!(top_docs[0].1, DocAddress(0, 1));
|
||||
/// assert_eq!(top_docs[1].1, DocAddress(0, 3));
|
||||
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 1));
|
||||
/// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
|
||||
/// ```
|
||||
pub struct TopDocs(TopCollector<Score>);
|
||||
|
||||
@@ -201,8 +201,8 @@ impl TopDocs {
|
||||
/// let top_docs = searcher.search(&query, &TopDocs::with_limit(2).and_offset(1)).unwrap();
|
||||
///
|
||||
/// assert_eq!(top_docs.len(), 2);
|
||||
/// assert_eq!(top_docs[0].1, DocAddress(0, 4));
|
||||
/// assert_eq!(top_docs[1].1, DocAddress(0, 3));
|
||||
/// assert_eq!(top_docs[0].1, DocAddress::new(0, 4));
|
||||
/// assert_eq!(top_docs[1].1, DocAddress::new(0, 3));
|
||||
/// ```
|
||||
pub fn and_offset(self, offset: usize) -> TopDocs {
|
||||
TopDocs(self.0.and_offset(offset))
|
||||
@@ -243,8 +243,8 @@ impl TopDocs {
|
||||
/// # let query = QueryParser::for_index(&index, vec![title]).parse_query("diary")?;
|
||||
/// # let top_docs = docs_sorted_by_rating(&reader.searcher(), &query, rating)?;
|
||||
/// # assert_eq!(top_docs,
|
||||
/// # vec![(97u64, DocAddress(0u32, 1)),
|
||||
/// # (80u64, DocAddress(0u32, 3))]);
|
||||
/// # vec![(97u64, DocAddress::new(0u32, 1)),
|
||||
/// # (80u64, DocAddress::new(0u32, 3))]);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// /// Searches the document matching the given query, and
|
||||
@@ -323,8 +323,8 @@ impl TopDocs {
|
||||
/// # let reader = index.reader()?;
|
||||
/// # let top_docs = docs_sorted_by_revenue(&reader.searcher(), &AllQuery, rating)?;
|
||||
/// # assert_eq!(top_docs,
|
||||
/// # vec![(119_000_000i64, DocAddress(0, 1)),
|
||||
/// # (92_000_000i64, DocAddress(0, 0))]);
|
||||
/// # vec![(119_000_000i64, DocAddress::new(0, 1)),
|
||||
/// # (92_000_000i64, DocAddress::new(0, 0))]);
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// /// Searches the document matching the given query, and
|
||||
@@ -600,7 +600,7 @@ impl Collector for TopDocs {
|
||||
|
||||
fn for_segment(
|
||||
&self,
|
||||
segment_local_id: SegmentLocalId,
|
||||
segment_local_id: SegmentOrdinal,
|
||||
reader: &SegmentReader,
|
||||
) -> crate::Result<Self::Child> {
|
||||
let collector = self.0.for_segment(segment_local_id, reader);
|
||||
@@ -671,7 +671,15 @@ impl Collector for TopDocs {
|
||||
let fruit = heap
|
||||
.into_sorted_vec()
|
||||
.into_iter()
|
||||
.map(|cid| (cid.feature, DocAddress(segment_ord, cid.doc)))
|
||||
.map(|cid| {
|
||||
(
|
||||
cid.feature,
|
||||
DocAddress {
|
||||
segment_ord,
|
||||
doc_id: cid.doc,
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Ok(fruit)
|
||||
}
|
||||
@@ -741,9 +749,9 @@ mod tests {
|
||||
assert_results_equals(
|
||||
&score_docs,
|
||||
&[
|
||||
(0.81221175, DocAddress(0u32, 1)),
|
||||
(0.5376842, DocAddress(0u32, 2)),
|
||||
(0.48527452, DocAddress(0, 0)),
|
||||
(0.81221175, DocAddress::new(0u32, 1)),
|
||||
(0.5376842, DocAddress::new(0u32, 2)),
|
||||
(0.48527452, DocAddress::new(0, 0)),
|
||||
],
|
||||
);
|
||||
}
|
||||
@@ -760,7 +768,7 @@ mod tests {
|
||||
.searcher()
|
||||
.search(&text_query, &TopDocs::with_limit(4).and_offset(2))
|
||||
.unwrap();
|
||||
assert_results_equals(&score_docs[..], &[(0.48527452, DocAddress(0, 0))]);
|
||||
assert_results_equals(&score_docs[..], &[(0.48527452, DocAddress::new(0, 0))]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -778,8 +786,8 @@ mod tests {
|
||||
assert_results_equals(
|
||||
&score_docs,
|
||||
&[
|
||||
(0.81221175, DocAddress(0u32, 1)),
|
||||
(0.5376842, DocAddress(0u32, 2)),
|
||||
(0.81221175, DocAddress::new(0u32, 1)),
|
||||
(0.5376842, DocAddress::new(0u32, 2)),
|
||||
],
|
||||
);
|
||||
}
|
||||
@@ -799,8 +807,8 @@ mod tests {
|
||||
assert_results_equals(
|
||||
&score_docs[..],
|
||||
&[
|
||||
(0.5376842, DocAddress(0u32, 2)),
|
||||
(0.48527452, DocAddress(0, 0)),
|
||||
(0.5376842, DocAddress::new(0u32, 2)),
|
||||
(0.48527452, DocAddress::new(0, 0)),
|
||||
],
|
||||
);
|
||||
}
|
||||
@@ -864,9 +872,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[
|
||||
(64, DocAddress(0, 1)),
|
||||
(16, DocAddress(0, 2)),
|
||||
(12, DocAddress(0, 0))
|
||||
(64, DocAddress::new(0, 1)),
|
||||
(16, DocAddress::new(0, 2)),
|
||||
(12, DocAddress::new(0, 0))
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -898,8 +906,8 @@ mod tests {
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[
|
||||
(mr_birthday, DocAddress(0, 1)),
|
||||
(pr_birthday, DocAddress(0, 0)),
|
||||
(mr_birthday, DocAddress::new(0, 1)),
|
||||
(pr_birthday, DocAddress::new(0, 0)),
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
@@ -927,7 +935,10 @@ mod tests {
|
||||
let top_docs: Vec<(i64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[(40i64, DocAddress(0, 1)), (-1i64, DocAddress(0, 0)),]
|
||||
&[
|
||||
(40i64, DocAddress::new(0, 1)),
|
||||
(-1i64, DocAddress::new(0, 0)),
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -954,7 +965,10 @@ mod tests {
|
||||
let top_docs: Vec<(f64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[(40f64, DocAddress(0, 1)), (-1.0f64, DocAddress(0, 0)),]
|
||||
&[
|
||||
(40f64, DocAddress::new(0, 1)),
|
||||
(-1.0f64, DocAddress::new(0, 0)),
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
@@ -1034,7 +1048,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
score_docs,
|
||||
vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
|
||||
vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1056,7 +1070,7 @@ mod tests {
|
||||
|
||||
assert_eq!(
|
||||
score_docs,
|
||||
vec![(1, DocAddress(0, 1)), (0, DocAddress(0, 0)),]
|
||||
vec![(1, DocAddress::new(0, 1)), (0, DocAddress::new(0, 0)),]
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -239,7 +239,7 @@ impl Index {
|
||||
/// Such segments can of course be part of the index,
|
||||
/// but also they could be segments being currently built or in the middle of a merge
|
||||
/// operation.
|
||||
pub fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
|
||||
pub(crate) fn list_all_segment_metas(&self) -> Vec<SegmentMeta> {
|
||||
self.inventory.all()
|
||||
}
|
||||
|
||||
|
||||
@@ -54,9 +54,8 @@ impl Searcher {
|
||||
/// The searcher uses the segment ordinal to route the
|
||||
/// the request to the right `Segment`.
|
||||
pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
|
||||
let DocAddress(segment_local_id, doc_id) = doc_address;
|
||||
let store_reader = &self.store_readers[segment_local_id as usize];
|
||||
store_reader.get(doc_id)
|
||||
let store_reader = &self.store_readers[doc_address.segment_ord as usize];
|
||||
store_reader.get(doc_address.doc_id)
|
||||
}
|
||||
|
||||
/// Access the schema associated to the index of this searcher.
|
||||
|
||||
@@ -56,7 +56,7 @@ mod tests {
|
||||
fn test_stored_bytes() -> crate::Result<()> {
|
||||
let searcher = create_index_for_test(STORED)?;
|
||||
assert_eq!(searcher.num_docs(), 1);
|
||||
let retrieved_doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let retrieved_doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let field = searcher.schema().get_field("string_bytes").unwrap();
|
||||
let values: Vec<&Value> = retrieved_doc.get_all(field).collect();
|
||||
assert_eq!(values.len(), 2);
|
||||
@@ -72,7 +72,7 @@ mod tests {
|
||||
fn test_non_stored_bytes() -> crate::Result<()> {
|
||||
let searcher = create_index_for_test(INDEXED)?;
|
||||
assert_eq!(searcher.num_docs(), 1);
|
||||
let retrieved_doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let retrieved_doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let field = searcher.schema().get_field("string_bytes").unwrap();
|
||||
assert!(retrieved_doc.get_first(field).is_none());
|
||||
Ok(())
|
||||
|
||||
@@ -105,7 +105,7 @@ mod tests {
|
||||
let mut facet_ords = Vec::new();
|
||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
||||
assert_eq!(&facet_ords, &[2u64]);
|
||||
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let value = doc.get_first(facet_field).and_then(Value::path);
|
||||
assert_eq!(value, None);
|
||||
Ok(())
|
||||
@@ -128,7 +128,7 @@ mod tests {
|
||||
let mut facet_ords = Vec::new();
|
||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
||||
assert!(facet_ords.is_empty());
|
||||
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let value = doc.get_first(facet_field).and_then(Value::path);
|
||||
assert_eq!(value, Some("/a/b".to_string()));
|
||||
Ok(())
|
||||
@@ -151,7 +151,7 @@ mod tests {
|
||||
let mut facet_ords = Vec::new();
|
||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
||||
assert_eq!(&facet_ords, &[2u64]);
|
||||
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let value = doc.get_first(facet_field).and_then(Value::path);
|
||||
assert_eq!(value, Some("/a/b".to_string()));
|
||||
Ok(())
|
||||
@@ -174,7 +174,7 @@ mod tests {
|
||||
let mut facet_ords = Vec::new();
|
||||
facet_reader.facet_ords(0u32, &mut facet_ords);
|
||||
assert!(facet_ords.is_empty());
|
||||
let doc = searcher.doc(DocAddress(0u32, 0u32))?;
|
||||
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
|
||||
let value = doc.get_first(facet_field).and_then(Value::path);
|
||||
assert_eq!(value, None);
|
||||
Ok(())
|
||||
|
||||
@@ -147,37 +147,50 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: support Date range queries
|
||||
// {
|
||||
// let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
// let range_q = format!("\"{}\"..\"{}\"",
|
||||
// (first_time_stamp + Duration::seconds(1)).to_rfc3339(),
|
||||
// (first_time_stamp + Duration::seconds(3)).to_rfc3339()
|
||||
// );
|
||||
// let query = parser.parse_query(&range_q)
|
||||
// .expect("could not parse query");
|
||||
// let results = searcher.search(&query, &TopDocs::with_limit(5))
|
||||
// .expect("could not query index");
|
||||
//
|
||||
//
|
||||
// assert_eq!(results.len(), 2);
|
||||
// for (i, doc_pair) in results.iter().enumerate() {
|
||||
// let retrieved_doc = searcher.doc(doc_pair.1).expect("cannot fetch doc");
|
||||
// let offset_sec = match i {
|
||||
// 0 => 1,
|
||||
// 1 => 3,
|
||||
// _ => panic!("should not have more than 2 docs")
|
||||
// };
|
||||
// let time_i_val = match i {
|
||||
// 0 => 2,
|
||||
// 1 => 3,
|
||||
// _ => panic!("should not have more than 2 docs")
|
||||
// };
|
||||
// assert_eq!(retrieved_doc.get_first(date_field).expect("cannot find value").date_value().timestamp(),
|
||||
// (first_time_stamp + Duration::seconds(offset_sec)).timestamp());
|
||||
// assert_eq!(retrieved_doc.get_first(time_i).expect("cannot find value").i64_value(), time_i_val);
|
||||
// }
|
||||
// }
|
||||
{
|
||||
let parser = QueryParser::for_index(&index, vec![date_field]);
|
||||
let range_q = format!(
|
||||
"[{} TO {}]",
|
||||
(first_time_stamp + Duration::seconds(1)).to_rfc3339(),
|
||||
(first_time_stamp + Duration::seconds(3)).to_rfc3339()
|
||||
);
|
||||
let query = parser.parse_query(&range_q).expect("could not parse query");
|
||||
let results = searcher
|
||||
.search(&query, &TopDocs::with_limit(5))
|
||||
.expect("could not query index");
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
for (i, doc_pair) in results.iter().enumerate() {
|
||||
let retrieved_doc = searcher.doc(doc_pair.1).expect("cannot fetch doc");
|
||||
let offset_sec = match i {
|
||||
0 => 1,
|
||||
1 => 2,
|
||||
_ => panic!("should not have more than 2 docs"),
|
||||
};
|
||||
let time_i_val = match i {
|
||||
0 => 2,
|
||||
1 => 3,
|
||||
_ => panic!("should not have more than 2 docs"),
|
||||
};
|
||||
assert_eq!(
|
||||
retrieved_doc
|
||||
.get_first(date_field)
|
||||
.expect("cannot find value")
|
||||
.date_value()
|
||||
.expect("value not of Date type")
|
||||
.timestamp(),
|
||||
(first_time_stamp + Duration::seconds(offset_sec)).timestamp()
|
||||
);
|
||||
assert_eq!(
|
||||
retrieved_doc
|
||||
.get_first(time_i)
|
||||
.expect("cannot find value")
|
||||
.i64_value()
|
||||
.expect("value not of i64 type"),
|
||||
time_i_val
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -798,49 +798,53 @@ mod tests {
|
||||
{
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
|
||||
vec![DocAddress(0, 1), DocAddress(0, 2), DocAddress(0, 4)]
|
||||
vec![
|
||||
DocAddress::new(0, 1),
|
||||
DocAddress::new(0, 2),
|
||||
DocAddress::new(0, 4)
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
|
||||
vec![DocAddress(0, 0), DocAddress(0, 3)]
|
||||
vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "g")])?,
|
||||
vec![DocAddress(0, 4)]
|
||||
vec![DocAddress::new(0, 4)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
|
||||
vec![
|
||||
DocAddress(0, 0),
|
||||
DocAddress(0, 1),
|
||||
DocAddress(0, 2),
|
||||
DocAddress(0, 3),
|
||||
DocAddress(0, 4)
|
||||
DocAddress::new(0, 0),
|
||||
DocAddress::new(0, 1),
|
||||
DocAddress::new(0, 2),
|
||||
DocAddress::new(0, 3),
|
||||
DocAddress::new(0, 4)
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_date(date_field, &curr_time)])?,
|
||||
vec![DocAddress(0, 0), DocAddress(0, 3)]
|
||||
vec![DocAddress::new(0, 0), DocAddress::new(0, 3)]
|
||||
);
|
||||
}
|
||||
{
|
||||
let doc = searcher.doc(DocAddress(0, 0))?;
|
||||
let doc = searcher.doc(DocAddress::new(0, 0))?;
|
||||
assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b"));
|
||||
}
|
||||
{
|
||||
let doc = searcher.doc(DocAddress(0, 1))?;
|
||||
let doc = searcher.doc(DocAddress::new(0, 1))?;
|
||||
assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c"));
|
||||
}
|
||||
{
|
||||
let doc = searcher.doc(DocAddress(0, 2))?;
|
||||
let doc = searcher.doc(DocAddress::new(0, 2))?;
|
||||
assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c d"));
|
||||
}
|
||||
{
|
||||
let doc = searcher.doc(DocAddress(0, 3))?;
|
||||
let doc = searcher.doc(DocAddress::new(0, 3))?;
|
||||
assert_eq!(doc.get_first(text_field).unwrap().text(), Some("af b"));
|
||||
}
|
||||
{
|
||||
let doc = searcher.doc(DocAddress(0, 4))?;
|
||||
let doc = searcher.doc(DocAddress::new(0, 4))?;
|
||||
assert_eq!(doc.get_first(text_field).unwrap().text(), Some("a b c g"));
|
||||
}
|
||||
{
|
||||
|
||||
@@ -24,6 +24,7 @@ pub use self::prepared_commit::PreparedCommit;
|
||||
pub use self::segment_entry::SegmentEntry;
|
||||
pub use self::segment_manager::SegmentManager;
|
||||
pub use self::segment_serializer::SegmentSerializer;
|
||||
pub use self::segment_updater::merge_segments;
|
||||
pub use self::segment_writer::SegmentWriter;
|
||||
|
||||
/// Alias for the default merge policy, which is the `LogMergePolicy`.
|
||||
|
||||
@@ -114,7 +114,7 @@ fn merge(
|
||||
// first we need to apply deletes to our segment.
|
||||
let merged_segment = index.new_segment();
|
||||
|
||||
// First we apply all of the delet to the merged segment, up to the target opstamp.
|
||||
// First we apply all of the delete to the merged segment, up to the target opstamp.
|
||||
for segment_entry in &mut segment_entries {
|
||||
let segment = index.segment(segment_entry.meta().clone());
|
||||
advance_deletes(segment, segment_entry, target_opstamp)?;
|
||||
@@ -141,6 +141,81 @@ fn merge(
|
||||
Ok(SegmentEntry::new(segment_meta, delete_cursor, None))
|
||||
}
|
||||
|
||||
/// Advanced: Merges a list of segments from different indices in a new index.
|
||||
///
|
||||
/// Returns `TantivyError` if the the indices list is empty or their
|
||||
/// schemas don't match.
|
||||
///
|
||||
/// `output_directory`: is assumed to be empty.
|
||||
///
|
||||
/// # Warning
|
||||
/// This function does NOT check or take the `IndexWriter` is running. It is not
|
||||
/// meant to work if you have an IndexWriter running for the origin indices, or
|
||||
/// the destination Index.
|
||||
#[doc(hidden)]
|
||||
pub fn merge_segments<Dir: Directory>(
|
||||
indices: &[Index],
|
||||
output_directory: Dir,
|
||||
) -> crate::Result<Index> {
|
||||
if indices.is_empty() {
|
||||
// If there are no indices to merge, there is no need to do anything.
|
||||
return Err(crate::TantivyError::InvalidArgument(
|
||||
"No indices given to marge".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let target_schema = indices[0].schema();
|
||||
|
||||
// let's check that all of the indices have the same schema
|
||||
if indices
|
||||
.iter()
|
||||
.skip(1)
|
||||
.any(|index| index.schema() != target_schema)
|
||||
{
|
||||
return Err(crate::TantivyError::InvalidArgument(
|
||||
"Attempt to merge different schema indices".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut segments: Vec<Segment> = Vec::new();
|
||||
for index in indices {
|
||||
segments.extend(index.searchable_segments()?);
|
||||
}
|
||||
|
||||
let mut merged_index = Index::create(output_directory, target_schema.clone())?;
|
||||
let merged_segment = merged_index.new_segment();
|
||||
let merged_segment_id = merged_segment.id();
|
||||
let merger: IndexMerger = IndexMerger::open(merged_index.schema(), &segments[..])?;
|
||||
let segment_serializer = SegmentSerializer::for_segment(merged_segment)?;
|
||||
let num_docs = merger.write(segment_serializer)?;
|
||||
|
||||
let segment_meta = merged_index.new_segment_meta(merged_segment_id, num_docs);
|
||||
|
||||
let stats = format!(
|
||||
"Segments Merge: [{}]",
|
||||
segments
|
||||
.iter()
|
||||
.fold(String::new(), |sum, current| format!(
|
||||
"{}{} ",
|
||||
sum,
|
||||
current.meta().id().uuid_string()
|
||||
))
|
||||
.trim_end()
|
||||
);
|
||||
|
||||
let index_meta = IndexMeta {
|
||||
segments: vec![segment_meta],
|
||||
schema: target_schema,
|
||||
opstamp: 0u64,
|
||||
payload: Some(stats),
|
||||
};
|
||||
|
||||
// save the meta.json
|
||||
save_metas(&index_meta, merged_index.directory_mut())?;
|
||||
|
||||
Ok(merged_index)
|
||||
}
|
||||
|
||||
pub(crate) struct InnerSegmentUpdater {
|
||||
// we keep a copy of the current active IndexMeta to
|
||||
// avoid loading the file everytime we need it in the
|
||||
@@ -479,7 +554,7 @@ impl SegmentUpdater {
|
||||
if delete_operation.opstamp < committed_opstamp {
|
||||
let index = &segment_updater.index;
|
||||
let segment = index.segment(after_merge_segment_entry.meta().clone());
|
||||
if let Err(e) = advance_deletes(
|
||||
if let Err(advance_deletes_err) = advance_deletes(
|
||||
segment,
|
||||
&mut after_merge_segment_entry,
|
||||
committed_opstamp,
|
||||
@@ -487,7 +562,7 @@ impl SegmentUpdater {
|
||||
error!(
|
||||
"Merge of {:?} was cancelled (advancing deletes failed): {:?}",
|
||||
merge_operation.segment_ids(),
|
||||
e
|
||||
advance_deletes_err
|
||||
);
|
||||
if cfg!(test) {
|
||||
panic!("Merge failed.");
|
||||
@@ -495,7 +570,7 @@ impl SegmentUpdater {
|
||||
// ... cancel merge
|
||||
// `merge_operations` are tracked. As it is dropped, the
|
||||
// the segment_ids will be available again for merge.
|
||||
return Err(e);
|
||||
return Err(advance_deletes_err);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -540,158 +615,201 @@ impl SegmentUpdater {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::merge_segments;
|
||||
use crate::directory::RAMDirectory;
|
||||
use crate::indexer::merge_policy::tests::MergeWheneverPossible;
|
||||
use crate::schema::*;
|
||||
use crate::Index;
|
||||
|
||||
#[test]
|
||||
fn test_delete_during_merge() {
|
||||
fn test_delete_during_merge() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
|
||||
// writing the segment
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.set_merge_policy(Box::new(MergeWheneverPossible));
|
||||
|
||||
{
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
{
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"c"));
|
||||
index_writer.add_document(doc!(text_field=>"d"));
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"c"));
|
||||
index_writer.add_document(doc!(text_field=>"d"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
{
|
||||
index_writer.add_document(doc!(text_field=>"e"));
|
||||
index_writer.add_document(doc!(text_field=>"f"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
index_writer.add_document(doc!(text_field=>"e"));
|
||||
index_writer.add_document(doc!(text_field=>"f"));
|
||||
index_writer.commit()?;
|
||||
|
||||
{
|
||||
let term = Term::from_field_text(text_field, "a");
|
||||
index_writer.delete_term(term);
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
let reader = index.reader().unwrap();
|
||||
let term = Term::from_field_text(text_field, "a");
|
||||
index_writer.delete_term(term);
|
||||
index_writer.commit()?;
|
||||
|
||||
let reader = index.reader()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 302);
|
||||
|
||||
{
|
||||
index_writer
|
||||
.wait_merging_threads()
|
||||
.expect("waiting for merging threads");
|
||||
}
|
||||
index_writer.wait_merging_threads()?;
|
||||
|
||||
reader.reload().unwrap();
|
||||
reader.reload()?;
|
||||
assert_eq!(reader.searcher().segment_readers().len(), 1);
|
||||
assert_eq!(reader.searcher().num_docs(), 302);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn delete_all_docs() {
|
||||
fn delete_all_docs() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
|
||||
// writing the segment
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
|
||||
{
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"c"));
|
||||
index_writer.add_document(doc!(text_field=>"d"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
index_writer.add_document(doc!(text_field=>"e"));
|
||||
index_writer.add_document(doc!(text_field=>"f"));
|
||||
index_writer.commit()?;
|
||||
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
// docs exist, should have at least 1 segment
|
||||
assert!(seg_ids.len() > 0);
|
||||
|
||||
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
|
||||
for term_val in term_vals {
|
||||
let term = Term::from_field_text(text_field, term_val);
|
||||
index_writer.delete_term(term);
|
||||
index_writer.commit()?;
|
||||
}
|
||||
|
||||
{
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"c"));
|
||||
index_writer.add_document(doc!(text_field=>"d"));
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
index_writer.wait_merging_threads()?;
|
||||
|
||||
{
|
||||
index_writer.add_document(doc!(text_field=>"e"));
|
||||
index_writer.add_document(doc!(text_field=>"f"));
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
|
||||
{
|
||||
let seg_ids = index
|
||||
.searchable_segment_ids()
|
||||
.expect("Searchable segments failed.");
|
||||
// docs exist, should have at least 1 segment
|
||||
assert!(seg_ids.len() > 0);
|
||||
}
|
||||
|
||||
{
|
||||
let term_vals = vec!["a", "b", "c", "d", "e", "f"];
|
||||
for term_val in term_vals {
|
||||
let term = Term::from_field_text(text_field, term_val);
|
||||
index_writer.delete_term(term);
|
||||
assert!(index_writer.commit().is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
index_writer
|
||||
.wait_merging_threads()
|
||||
.expect("waiting for merging threads");
|
||||
}
|
||||
|
||||
let reader = index.reader().unwrap();
|
||||
let reader = index.reader()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
|
||||
let seg_ids = index
|
||||
.searchable_segment_ids()
|
||||
.expect("Searchable segments failed.");
|
||||
let seg_ids = index.searchable_segment_ids()?;
|
||||
assert!(seg_ids.is_empty());
|
||||
|
||||
reader.reload().unwrap();
|
||||
reader.reload()?;
|
||||
assert_eq!(reader.searcher().num_docs(), 0);
|
||||
// empty segments should be erased
|
||||
assert!(index.searchable_segment_metas().unwrap().is_empty());
|
||||
assert!(index.searchable_segment_metas()?.is_empty());
|
||||
assert!(reader.searcher().segment_readers().is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_all_segments() {
|
||||
fn test_remove_all_segments() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_ram(schema);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
|
||||
// writing the segment
|
||||
let mut index_writer = index.writer_for_tests().unwrap();
|
||||
|
||||
{
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
assert!(index_writer.commit().is_ok());
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"a"));
|
||||
index_writer.add_document(doc!(text_field=>"b"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
index_writer.segment_updater().remove_all_segments();
|
||||
let seg_vec = index_writer
|
||||
.segment_updater()
|
||||
.segment_manager
|
||||
.segment_entries();
|
||||
assert!(seg_vec.is_empty());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_segments() -> crate::Result<()> {
|
||||
let mut indices = vec![];
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let schema = schema_builder.build();
|
||||
|
||||
for _ in 0..3 {
|
||||
let index = Index::create_in_ram(schema.clone());
|
||||
|
||||
// writing two segments
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
for _ in 0..100 {
|
||||
index_writer.add_document(doc!(text_field=>"fizz"));
|
||||
index_writer.add_document(doc!(text_field=>"buzz"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
|
||||
for _ in 0..1000 {
|
||||
index_writer.add_document(doc!(text_field=>"foo"));
|
||||
index_writer.add_document(doc!(text_field=>"bar"));
|
||||
}
|
||||
index_writer.commit()?;
|
||||
indices.push(index);
|
||||
}
|
||||
|
||||
assert_eq!(indices.len(), 3);
|
||||
let output_directory = RAMDirectory::default();
|
||||
let index = merge_segments(&indices, output_directory)?;
|
||||
assert_eq!(index.schema(), schema);
|
||||
|
||||
let segments = index.searchable_segments()?;
|
||||
assert_eq!(segments.len(), 1);
|
||||
|
||||
let segment_metas = segments[0].meta();
|
||||
assert_eq!(segment_metas.num_deleted_docs(), 0);
|
||||
assert_eq!(segment_metas.num_docs(), 6600);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_empty_indices_array() {
|
||||
let merge_result = merge_segments(&[], RAMDirectory::default());
|
||||
assert!(merge_result.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_merge_mismatched_schema() -> crate::Result<()> {
|
||||
let first_index = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let text_field = schema_builder.add_text_field("text", TEXT);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(text_field=>"some text"));
|
||||
index_writer.commit()?;
|
||||
index
|
||||
};
|
||||
|
||||
let second_index = {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let body_field = schema_builder.add_text_field("body", TEXT);
|
||||
let index = Index::create_in_ram(schema_builder.build());
|
||||
let mut index_writer = index.writer_for_tests()?;
|
||||
index_writer.add_document(doc!(body_field=>"some body"));
|
||||
index_writer.commit()?;
|
||||
index
|
||||
};
|
||||
|
||||
// mismatched schema index list
|
||||
let result = merge_segments(&[first_index, second_index], RAMDirectory::default());
|
||||
assert!(result.is_err());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
49
src/lib.rs
49
src/lib.rs
@@ -163,6 +163,7 @@ pub use crate::core::{Executor, SegmentComponent};
|
||||
pub use crate::core::{Index, IndexMeta, Searcher, Segment, SegmentId, SegmentMeta};
|
||||
pub use crate::core::{InvertedIndexReader, SegmentReader};
|
||||
pub use crate::directory::Directory;
|
||||
pub use crate::indexer::merge_segments;
|
||||
pub use crate::indexer::operation::UserOperation;
|
||||
pub use crate::indexer::IndexWriter;
|
||||
pub use crate::postings::Postings;
|
||||
@@ -254,20 +255,16 @@ pub type Opstamp = u64;
|
||||
/// the document to the search query.
|
||||
pub type Score = f32;
|
||||
|
||||
/// A `SegmentLocalId` identifies a segment.
|
||||
/// It only makes sense for a given searcher.
|
||||
pub type SegmentLocalId = u32;
|
||||
/// A `SegmentOrdinal` identifies a segment, within a `Searcher`.
|
||||
pub type SegmentOrdinal = u32;
|
||||
|
||||
impl DocAddress {
|
||||
/// Return the segment ordinal id that identifies the segment
|
||||
/// hosting the document in the `Searcher` it is called from.
|
||||
pub fn segment_ord(self) -> SegmentLocalId {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Return the segment-local `DocId`
|
||||
pub fn doc(self) -> DocId {
|
||||
self.1
|
||||
/// Creates a new DocAddress from the segment/docId pair.
|
||||
pub fn new(segment_ord: SegmentOrdinal, doc_id: DocId) -> DocAddress {
|
||||
DocAddress {
|
||||
segment_ord,
|
||||
doc_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,7 +277,13 @@ impl DocAddress {
|
||||
/// The id used for the segment is actually an ordinal
|
||||
/// in the list of `Segment`s held by a `Searcher`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct DocAddress(pub SegmentLocalId, pub DocId);
|
||||
pub struct DocAddress {
|
||||
/// The segment ordinal id that identifies the segment
|
||||
/// hosting the document in the `Searcher` it is called from.
|
||||
pub segment_ord: SegmentOrdinal,
|
||||
/// The segment-local `DocId`.
|
||||
pub doc_id: DocId,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
@@ -778,30 +781,38 @@ mod tests {
|
||||
};
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "a")])?,
|
||||
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
||||
vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "af")])?,
|
||||
vec![DocAddress(0, 0)]
|
||||
vec![DocAddress::new(0, 0)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "b")])?,
|
||||
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
||||
vec![
|
||||
DocAddress::new(0, 0),
|
||||
DocAddress::new(0, 1),
|
||||
DocAddress::new(0, 2)
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "c")])?,
|
||||
vec![DocAddress(0, 1), DocAddress(0, 2)]
|
||||
vec![DocAddress::new(0, 1), DocAddress::new(0, 2)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![Term::from_field_text(text_field, "d")])?,
|
||||
vec![DocAddress(0, 2)]
|
||||
vec![DocAddress::new(0, 2)]
|
||||
);
|
||||
assert_eq!(
|
||||
get_doc_ids(vec![
|
||||
Term::from_field_text(text_field, "b"),
|
||||
Term::from_field_text(text_field, "a"),
|
||||
])?,
|
||||
vec![DocAddress(0, 0), DocAddress(0, 1), DocAddress(0, 2)]
|
||||
vec![
|
||||
DocAddress::new(0, 0),
|
||||
DocAddress::new(0, 1),
|
||||
DocAddress::new(0, 2)
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -238,9 +238,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
docs,
|
||||
vec![
|
||||
DocAddress(0u32, 1u32),
|
||||
DocAddress(0u32, 2u32),
|
||||
DocAddress(0u32, 3u32)
|
||||
DocAddress::new(0u32, 1u32),
|
||||
DocAddress::new(0u32, 2u32),
|
||||
DocAddress::new(0u32, 3u32)
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
@@ -264,15 +264,24 @@ mod tests {
|
||||
BooleanQuery::intersection(vec![term_b.box_clone(), term_c.box_clone()]);
|
||||
{
|
||||
let docs = searcher.search(&intersection_ab, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 2u32)].into_iter().collect());
|
||||
assert_eq!(
|
||||
docs,
|
||||
vec![DocAddress::new(0u32, 2u32)].into_iter().collect()
|
||||
);
|
||||
}
|
||||
{
|
||||
let docs = searcher.search(&intersection_ac, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 1u32)].into_iter().collect());
|
||||
assert_eq!(
|
||||
docs,
|
||||
vec![DocAddress::new(0u32, 1u32)].into_iter().collect()
|
||||
);
|
||||
}
|
||||
{
|
||||
let docs = searcher.search(&intersection_bc, &DocSetCollector)?;
|
||||
assert_eq!(docs, vec![DocAddress(0u32, 0u32)].into_iter().collect());
|
||||
assert_eq!(
|
||||
docs,
|
||||
vec![DocAddress::new(0u32, 0u32)].into_iter().collect()
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -128,7 +128,7 @@ mod tests {
|
||||
.docs()
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|doc| doc.1)
|
||||
.map(|doc| doc.doc_id)
|
||||
.collect::<Vec<DocId>>()
|
||||
};
|
||||
{
|
||||
@@ -196,8 +196,8 @@ mod tests {
|
||||
let topdocs_no_excluded = matching_topdocs(&boolean_query_no_excluded);
|
||||
assert_eq!(topdocs_no_excluded.len(), 2);
|
||||
let (top_score, top_doc) = topdocs_no_excluded[0];
|
||||
assert_eq!(top_doc, DocAddress(0, 4));
|
||||
assert_eq!(topdocs_no_excluded[1].1, DocAddress(0, 3)); // ignore score of doc 3.
|
||||
assert_eq!(top_doc, DocAddress::new(0, 4));
|
||||
assert_eq!(topdocs_no_excluded[1].1, DocAddress::new(0, 3)); // ignore score of doc 3.
|
||||
score_doc_4 = top_score;
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ mod tests {
|
||||
let topdocs_excluded = matching_topdocs(&boolean_query_two_excluded);
|
||||
assert_eq!(topdocs_excluded.len(), 1);
|
||||
let (top_score, top_doc) = topdocs_excluded[0];
|
||||
assert_eq!(top_doc, DocAddress(0, 4));
|
||||
assert_eq!(top_doc, DocAddress::new(0, 4));
|
||||
assert_eq!(top_score, score_doc_4);
|
||||
}
|
||||
}
|
||||
@@ -309,7 +309,7 @@ mod tests {
|
||||
IndexRecordOption::Basic,
|
||||
));
|
||||
let query = BooleanQuery::from(vec![(Occur::Should, term_a), (Occur::Should, term_b)]);
|
||||
let explanation = query.explain(&searcher, DocAddress(0, 0u32))?;
|
||||
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32))?;
|
||||
assert_nearly_equals!(explanation.value(), 0.6931472);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -150,7 +150,7 @@ mod tests {
|
||||
let reader = index.reader().unwrap();
|
||||
let searcher = reader.searcher();
|
||||
let query = BoostQuery::new(Box::new(AllQuery), 0.2);
|
||||
let explanation = query.explain(&searcher, DocAddress(0, 0u32)).unwrap();
|
||||
let explanation = query.explain(&searcher, DocAddress::new(0, 0u32)).unwrap();
|
||||
assert_eq!(
|
||||
explanation.to_pretty_json(),
|
||||
"{\n \"value\": 0.2,\n \"description\": \"Boost x0.2 of ...\",\n \"details\": [\n {\n \"value\": 1.0,\n \"description\": \"AllQuery\",\n \"context\": []\n }\n ],\n \"context\": []\n}"
|
||||
|
||||
@@ -58,7 +58,7 @@ pub mod tests {
|
||||
test_fruits
|
||||
.docs()
|
||||
.iter()
|
||||
.map(|docaddr| docaddr.1)
|
||||
.map(|docaddr| docaddr.doc_id)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
assert_eq!(test_query(vec!["a", "b"]), vec![1, 2, 3, 4]);
|
||||
@@ -109,7 +109,7 @@ pub mod tests {
|
||||
test_fruits
|
||||
.docs()
|
||||
.iter()
|
||||
.map(|docaddr| docaddr.1)
|
||||
.map(|docaddr| docaddr.doc_id)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
assert_eq!(test_query(vec!["a", "b", "c"]), vec![2, 4]);
|
||||
@@ -206,8 +206,8 @@ pub mod tests {
|
||||
.docs()
|
||||
.to_vec()
|
||||
};
|
||||
assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress(0, 1)]);
|
||||
assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress(0, 2)]);
|
||||
assert_eq!(test_query(vec!["a", "b"]), vec![DocAddress::new(0, 1)]);
|
||||
assert_eq!(test_query(vec!["b", "a"]), vec![DocAddress::new(0, 2)]);
|
||||
}
|
||||
|
||||
#[test] // motivated by #234
|
||||
@@ -233,7 +233,7 @@ pub mod tests {
|
||||
.expect("search should succeed")
|
||||
.docs()
|
||||
.iter()
|
||||
.map(|doc_address| doc_address.1)
|
||||
.map(|doc_address| doc_address.doc_id)
|
||||
.collect::<Vec<DocId>>()
|
||||
};
|
||||
assert_eq!(test_query(vec![(0, "a"), (1, "b")]), vec![0]);
|
||||
|
||||
@@ -51,9 +51,9 @@ pub trait Query: QueryClone + Send + Sync + downcast_rs::Downcast + fmt::Debug {
|
||||
|
||||
/// Returns an `Explanation` for the score of the document.
|
||||
fn explain(&self, searcher: &Searcher, doc_address: DocAddress) -> crate::Result<Explanation> {
|
||||
let reader = searcher.segment_reader(doc_address.segment_ord());
|
||||
let reader = searcher.segment_reader(doc_address.segment_ord);
|
||||
let weight = self.weight(searcher, true)?;
|
||||
weight.explain(reader, doc_address.doc())
|
||||
weight.explain(reader, doc_address.doc_id)
|
||||
}
|
||||
|
||||
/// Returns the number of documents matching the query.
|
||||
|
||||
@@ -157,7 +157,8 @@ fn trim_ast(logical_ast: LogicalAST) -> Option<LogicalAST> {
|
||||
/// a word lexicographically between `a` and `c` (inclusive lower bound, exclusive upper bound).
|
||||
/// Inclusive bounds are `[]`, exclusive are `{}`.
|
||||
///
|
||||
/// * date values: The query parser supports rfc3339 formatted dates. For example "2002-10-02T15:00:00.05Z"
|
||||
/// * date values: The query parser supports rfc3339 formatted dates. For example `"2002-10-02T15:00:00.05Z"`
|
||||
/// or `some_date_field:[2002-10-02T15:00:00Z TO 2002-10-02T18:00:00Z}`
|
||||
///
|
||||
/// * all docs query: A plain `*` will match all documents in the index.
|
||||
///
|
||||
|
||||
@@ -196,18 +196,18 @@ mod tests {
|
||||
let term_query = TermQuery::new(term_a, IndexRecordOption::Basic);
|
||||
let searcher = index.reader()?.searcher();
|
||||
{
|
||||
let explanation = term_query.explain(&searcher, DocAddress(0u32, 1u32))?;
|
||||
let explanation = term_query.explain(&searcher, DocAddress::new(0u32, 1u32))?;
|
||||
assert_nearly_equals!(explanation.value(), 0.6931472);
|
||||
}
|
||||
{
|
||||
let explanation_err = term_query.explain(&searcher, DocAddress(0u32, 0u32));
|
||||
let explanation_err = term_query.explain(&searcher, DocAddress::new(0u32, 0u32));
|
||||
assert!(matches!(
|
||||
explanation_err,
|
||||
Err(crate::TantivyError::InvalidArgument(_msg))
|
||||
));
|
||||
}
|
||||
{
|
||||
let explanation_err = term_query.explain(&searcher, DocAddress(0u32, 3u32));
|
||||
let explanation_err = term_query.explain(&searcher, DocAddress::new(0u32, 3u32));
|
||||
assert!(matches!(
|
||||
explanation_err,
|
||||
Err(crate::TantivyError::InvalidArgument(_msg))
|
||||
|
||||
@@ -192,7 +192,7 @@ impl<'de> Deserialize<'de> for FieldEntry {
|
||||
Name,
|
||||
Type,
|
||||
Options,
|
||||
};
|
||||
}
|
||||
|
||||
const FIELDS: &[&str] = &["name", "type", "options"];
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ impl SchemaBuilder {
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct InnerSchema {
|
||||
fields: Vec<FieldEntry>,
|
||||
fields_map: HashMap<String, Field>, // transient
|
||||
@@ -226,7 +226,7 @@ impl Eq for InnerSchema {}
|
||||
/// let schema = schema_builder.build();
|
||||
///
|
||||
/// ```
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
#[derive(Clone, Eq, PartialEq, Debug)]
|
||||
pub struct Schema(Arc<InnerSchema>);
|
||||
|
||||
impl Schema {
|
||||
|
||||
@@ -154,7 +154,7 @@ mod tests {
|
||||
let searcher = reader.searcher();
|
||||
assert_eq!(searcher.num_docs(), 30);
|
||||
for i in 0..searcher.num_docs() as u32 {
|
||||
let _doc = searcher.doc(DocAddress(0u32, i))?;
|
||||
let _doc = searcher.doc(DocAddress::new(0u32, i))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::schema::Term;
|
||||
use crate::termdict::TermOrdinal;
|
||||
use crate::termdict::TermStreamer;
|
||||
use std::cmp::Ordering;
|
||||
@@ -114,14 +113,4 @@ impl<'a> TermMerger<'a> {
|
||||
pub fn current_kvs(&self) -> &[HeapItem<'a>] {
|
||||
&self.current_streamers[..]
|
||||
}
|
||||
|
||||
/// Iterates through terms
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))]
|
||||
pub fn next(&mut self) -> Option<Term<&[u8]>> {
|
||||
if self.advance() {
|
||||
Some(Term::wrap(self.current_streamers[0].streamer.key()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user