fixes order_by_u64_field and order_by_fast_field should allow sorting in ascending order #1676 (#2111)

* feat: order_by_fast_field allows sorting using parameter order

* chore: change the corresponding values to original one

* chore: fix formatting issues

* fix: first_or_default_col should also sort by order

* chore: empty doc to testcase and docstest fixes

* chore: fix failure tests

* core: add empty document without fastfield

* chore: fix fmt

* chore: change variable name
This commit is contained in:
Naveen Aiathurai
2023-07-06 08:40:10 +05:30
committed by GitHub
parent 3c300666ad
commit 67bdf3f5f6

View File

@@ -14,7 +14,7 @@ use crate::collector::{
};
use crate::fastfield::{FastFieldNotAvailableError, FastValue};
use crate::query::Weight;
use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
use crate::{DocAddress, DocId, Order, Score, SegmentOrdinal, SegmentReader, TantivyError};
struct FastFieldConvertCollector<
TCollector: Collector<Fruit = Vec<(u64, DocAddress)>>,
@@ -23,6 +23,7 @@ struct FastFieldConvertCollector<
pub collector: TCollector,
pub field: String,
pub fast_value: std::marker::PhantomData<TFastValue>,
order: Order,
}
impl<TCollector, TFastValue> Collector for FastFieldConvertCollector<TCollector, TFastValue>
@@ -70,7 +71,13 @@ where
let raw_result = self.collector.merge_fruits(segment_fruits)?;
let transformed_result = raw_result
.into_iter()
.map(|(score, doc_address)| (TFastValue::from_u64(score), doc_address))
.map(|(score, doc_address)| {
if self.order.is_desc() {
(TFastValue::from_u64(score), doc_address)
} else {
(TFastValue::from_u64(u64::MAX - score), doc_address)
}
})
.collect::<Vec<_>>();
Ok(transformed_result)
}
@@ -131,16 +138,23 @@ impl fmt::Debug for TopDocs {
struct ScorerByFastFieldReader {
sort_column: Arc<dyn ColumnValues<u64>>,
order: Order,
}
impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
fn score(&mut self, doc: DocId) -> u64 {
self.sort_column.get_val(doc)
let value = self.sort_column.get_val(doc);
if self.order.is_desc() {
value
} else {
u64::MAX - value
}
}
}
struct ScorerByField {
field: String,
order: Order,
}
impl CustomScorer<u64> for ScorerByField {
@@ -157,8 +171,13 @@ impl CustomScorer<u64> for ScorerByField {
sort_column_opt.ok_or_else(|| FastFieldNotAvailableError {
field_name: self.field.clone(),
})?;
let mut default_value = 0u64;
if self.order.is_asc() {
default_value = u64::MAX;
}
Ok(ScorerByFastFieldReader {
sort_column: sort_column.first_or_default_col(0u64),
sort_column: sort_column.first_or_default_col(default_value),
order: self.order.clone(),
})
}
}
@@ -230,7 +249,7 @@ impl TopDocs {
///
/// ```rust
/// # use tantivy::schema::{Schema, FAST, TEXT};
/// # use tantivy::{doc, Index, DocAddress};
/// # use tantivy::{doc, Index, DocAddress, Order};
/// # use tantivy::query::{Query, QueryParser};
/// use tantivy::Searcher;
/// use tantivy::collector::TopDocs;
@@ -268,7 +287,7 @@ impl TopDocs {
/// // Note the `rating_field` needs to be a FAST field here.
/// let top_books_by_rating = TopDocs
/// ::with_limit(10)
/// .order_by_u64_field("rating");
/// .order_by_fast_field("rating", Order::Desc);
///
/// // ... and here are our documents. Note this is a simple vec.
/// // The `u64` in the pair is the value of our fast field for
@@ -288,13 +307,15 @@ impl TopDocs {
///
/// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
/// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
pub fn order_by_u64_field(
fn order_by_u64_field(
self,
field: impl ToString,
order: Order,
) -> impl Collector<Fruit = Vec<(u64, DocAddress)>> {
CustomScoreTopCollector::new(
ScorerByField {
field: field.to_string(),
order,
},
self.0.into_tscore(),
)
@@ -316,7 +337,7 @@ impl TopDocs {
///
/// ```rust
/// # use tantivy::schema::{Schema, FAST, TEXT};
/// # use tantivy::{doc, Index, DocAddress};
/// # use tantivy::{doc, Index, DocAddress,Order};
/// # use tantivy::query::{Query, AllQuery};
/// use tantivy::Searcher;
/// use tantivy::collector::TopDocs;
@@ -354,7 +375,7 @@ impl TopDocs {
/// // type `sort_by_field`. revenue_field here is a FAST i64 field.
/// let top_company_by_revenue = TopDocs
/// ::with_limit(2)
/// .order_by_fast_field("revenue");
/// .order_by_fast_field("revenue", Order::Desc);
///
/// // ... and here are our documents. Note this is a simple vec.
/// // The `i64` in the pair is the value of our fast field for
@@ -372,15 +393,17 @@ impl TopDocs {
pub fn order_by_fast_field<TFastValue>(
self,
fast_field: impl ToString,
order: Order,
) -> impl Collector<Fruit = Vec<(TFastValue, DocAddress)>>
where
TFastValue: FastValue,
{
let u64_collector = self.order_by_u64_field(fast_field.to_string());
let u64_collector = self.order_by_u64_field(fast_field.to_string(), order.clone());
FastFieldConvertCollector {
collector: u64_collector,
field: fast_field.to_string(),
fast_value: PhantomData,
order,
}
}
@@ -721,7 +744,7 @@ mod tests {
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Order, Score, SegmentReader};
fn make_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
@@ -882,7 +905,7 @@ mod tests {
});
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
assert_eq!(
&top_docs[..],
@@ -921,7 +944,7 @@ mod tests {
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday");
let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday", Order::Desc);
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
assert_eq!(
&top_docs[..],
@@ -951,7 +974,7 @@ mod tests {
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
let top_docs: Vec<(i64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
assert_eq!(
&top_docs[..],
@@ -981,7 +1004,7 @@ mod tests {
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
let top_docs: Vec<(f64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
assert_eq!(
&top_docs[..],
@@ -1009,7 +1032,7 @@ mod tests {
.unwrap();
});
let searcher = index.reader().unwrap().searcher();
let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field");
let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field", Order::Desc);
let segment_reader = searcher.segment_reader(0u32);
top_collector
.for_segment(0, segment_reader)
@@ -1027,7 +1050,7 @@ mod tests {
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment = searcher.segment_reader(0);
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
let err = top_collector.for_segment(0, segment).err().unwrap();
assert!(matches!(err, crate::TantivyError::InvalidArgument(_)));
Ok(())
@@ -1044,7 +1067,7 @@ mod tests {
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let segment = searcher.segment_reader(0);
let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE);
let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE, Order::Desc);
let err = top_collector.for_segment(0, segment).err().unwrap();
assert!(
matches!(err, crate::TantivyError::SchemaError(msg) if msg == "Field \"size\" is not a fast field.")
@@ -1106,4 +1129,50 @@ mod tests {
let query = query_parser.parse_query(query).unwrap();
(index, query)
}
#[test]
fn test_fast_field_ascending_order() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let title = schema_builder.add_text_field(TITLE, TEXT);
let size = schema_builder.add_u64_field(SIZE, FAST);
let schema = schema_builder.build();
let (index, query) = index("beer", title, schema, |index_writer| {
index_writer
.add_document(doc!(
title => "bottle of beer",
size => 12u64,
))
.unwrap();
index_writer
.add_document(doc!(
title => "growler of beer",
size => 64u64,
))
.unwrap();
index_writer
.add_document(doc!(
title => "pint of beer",
size => 16u64,
))
.unwrap();
index_writer
.add_document(doc!(
title => "empty beer",
))
.unwrap();
});
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(4).order_by_fast_field(SIZE, Order::Asc);
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
assert_eq!(
&top_docs[..],
&[
(12, DocAddress::new(0, 0)),
(16, DocAddress::new(0, 2)),
(64, DocAddress::new(0, 1)),
(18446744073709551615, DocAddress::new(0, 3)),
]
);
Ok(())
}
}