mirror of
https://github.com/quickwit-oss/tantivy.git
synced 2026-06-05 01:50:42 +00:00
fixes order_by_u64_field and order_by_fast_field should allow sorting in ascending order #1676 (#2111)
* feat: order_by_fast_field allows sorting using parameter order * chore: change the corresponding values to original one * chore: fix formatting issues * fix: first_or_default_col should also sort by order * chore: empty doc to testcase and docstest fixes * chore: fix failure tests * core: add empty document without fastfield * chore: fix fmt * chore: change variable name
This commit is contained in:
@@ -14,7 +14,7 @@ use crate::collector::{
|
||||
};
|
||||
use crate::fastfield::{FastFieldNotAvailableError, FastValue};
|
||||
use crate::query::Weight;
|
||||
use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
|
||||
use crate::{DocAddress, DocId, Order, Score, SegmentOrdinal, SegmentReader, TantivyError};
|
||||
|
||||
struct FastFieldConvertCollector<
|
||||
TCollector: Collector<Fruit = Vec<(u64, DocAddress)>>,
|
||||
@@ -23,6 +23,7 @@ struct FastFieldConvertCollector<
|
||||
pub collector: TCollector,
|
||||
pub field: String,
|
||||
pub fast_value: std::marker::PhantomData<TFastValue>,
|
||||
order: Order,
|
||||
}
|
||||
|
||||
impl<TCollector, TFastValue> Collector for FastFieldConvertCollector<TCollector, TFastValue>
|
||||
@@ -70,7 +71,13 @@ where
|
||||
let raw_result = self.collector.merge_fruits(segment_fruits)?;
|
||||
let transformed_result = raw_result
|
||||
.into_iter()
|
||||
.map(|(score, doc_address)| (TFastValue::from_u64(score), doc_address))
|
||||
.map(|(score, doc_address)| {
|
||||
if self.order.is_desc() {
|
||||
(TFastValue::from_u64(score), doc_address)
|
||||
} else {
|
||||
(TFastValue::from_u64(u64::MAX - score), doc_address)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(transformed_result)
|
||||
}
|
||||
@@ -131,16 +138,23 @@ impl fmt::Debug for TopDocs {
|
||||
|
||||
struct ScorerByFastFieldReader {
|
||||
sort_column: Arc<dyn ColumnValues<u64>>,
|
||||
order: Order,
|
||||
}
|
||||
|
||||
impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
|
||||
fn score(&mut self, doc: DocId) -> u64 {
|
||||
self.sort_column.get_val(doc)
|
||||
let value = self.sort_column.get_val(doc);
|
||||
if self.order.is_desc() {
|
||||
value
|
||||
} else {
|
||||
u64::MAX - value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ScorerByField {
|
||||
field: String,
|
||||
order: Order,
|
||||
}
|
||||
|
||||
impl CustomScorer<u64> for ScorerByField {
|
||||
@@ -157,8 +171,13 @@ impl CustomScorer<u64> for ScorerByField {
|
||||
sort_column_opt.ok_or_else(|| FastFieldNotAvailableError {
|
||||
field_name: self.field.clone(),
|
||||
})?;
|
||||
let mut default_value = 0u64;
|
||||
if self.order.is_asc() {
|
||||
default_value = u64::MAX;
|
||||
}
|
||||
Ok(ScorerByFastFieldReader {
|
||||
sort_column: sort_column.first_or_default_col(0u64),
|
||||
sort_column: sort_column.first_or_default_col(default_value),
|
||||
order: self.order.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -230,7 +249,7 @@ impl TopDocs {
|
||||
///
|
||||
/// ```rust
|
||||
/// # use tantivy::schema::{Schema, FAST, TEXT};
|
||||
/// # use tantivy::{doc, Index, DocAddress};
|
||||
/// # use tantivy::{doc, Index, DocAddress, Order};
|
||||
/// # use tantivy::query::{Query, QueryParser};
|
||||
/// use tantivy::Searcher;
|
||||
/// use tantivy::collector::TopDocs;
|
||||
@@ -268,7 +287,7 @@ impl TopDocs {
|
||||
/// // Note the `rating_field` needs to be a FAST field here.
|
||||
/// let top_books_by_rating = TopDocs
|
||||
/// ::with_limit(10)
|
||||
/// .order_by_u64_field("rating");
|
||||
/// .order_by_fast_field("rating", Order::Desc);
|
||||
///
|
||||
/// // ... and here are our documents. Note this is a simple vec.
|
||||
/// // The `u64` in the pair is the value of our fast field for
|
||||
@@ -288,13 +307,15 @@ impl TopDocs {
|
||||
///
|
||||
/// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
|
||||
/// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
|
||||
pub fn order_by_u64_field(
|
||||
fn order_by_u64_field(
|
||||
self,
|
||||
field: impl ToString,
|
||||
order: Order,
|
||||
) -> impl Collector<Fruit = Vec<(u64, DocAddress)>> {
|
||||
CustomScoreTopCollector::new(
|
||||
ScorerByField {
|
||||
field: field.to_string(),
|
||||
order,
|
||||
},
|
||||
self.0.into_tscore(),
|
||||
)
|
||||
@@ -316,7 +337,7 @@ impl TopDocs {
|
||||
///
|
||||
/// ```rust
|
||||
/// # use tantivy::schema::{Schema, FAST, TEXT};
|
||||
/// # use tantivy::{doc, Index, DocAddress};
|
||||
/// # use tantivy::{doc, Index, DocAddress,Order};
|
||||
/// # use tantivy::query::{Query, AllQuery};
|
||||
/// use tantivy::Searcher;
|
||||
/// use tantivy::collector::TopDocs;
|
||||
@@ -354,7 +375,7 @@ impl TopDocs {
|
||||
/// // type `sort_by_field`. revenue_field here is a FAST i64 field.
|
||||
/// let top_company_by_revenue = TopDocs
|
||||
/// ::with_limit(2)
|
||||
/// .order_by_fast_field("revenue");
|
||||
/// .order_by_fast_field("revenue", Order::Desc);
|
||||
///
|
||||
/// // ... and here are our documents. Note this is a simple vec.
|
||||
/// // The `i64` in the pair is the value of our fast field for
|
||||
@@ -372,15 +393,17 @@ impl TopDocs {
|
||||
pub fn order_by_fast_field<TFastValue>(
|
||||
self,
|
||||
fast_field: impl ToString,
|
||||
order: Order,
|
||||
) -> impl Collector<Fruit = Vec<(TFastValue, DocAddress)>>
|
||||
where
|
||||
TFastValue: FastValue,
|
||||
{
|
||||
let u64_collector = self.order_by_u64_field(fast_field.to_string());
|
||||
let u64_collector = self.order_by_u64_field(fast_field.to_string(), order.clone());
|
||||
FastFieldConvertCollector {
|
||||
collector: u64_collector,
|
||||
field: fast_field.to_string(),
|
||||
fast_value: PhantomData,
|
||||
order,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -721,7 +744,7 @@ mod tests {
|
||||
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
|
||||
use crate::time::format_description::well_known::Rfc3339;
|
||||
use crate::time::OffsetDateTime;
|
||||
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
|
||||
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Order, Score, SegmentReader};
|
||||
|
||||
fn make_index() -> crate::Result<Index> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
@@ -882,7 +905,7 @@ mod tests {
|
||||
});
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
|
||||
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
@@ -921,7 +944,7 @@ mod tests {
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday");
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday", Order::Desc);
|
||||
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
@@ -951,7 +974,7 @@ mod tests {
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
|
||||
let top_docs: Vec<(i64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
@@ -981,7 +1004,7 @@ mod tests {
|
||||
))?;
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
|
||||
let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
|
||||
let top_docs: Vec<(f64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
@@ -1009,7 +1032,7 @@ mod tests {
|
||||
.unwrap();
|
||||
});
|
||||
let searcher = index.reader().unwrap().searcher();
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field");
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field", Order::Desc);
|
||||
let segment_reader = searcher.segment_reader(0u32);
|
||||
top_collector
|
||||
.for_segment(0, segment_reader)
|
||||
@@ -1027,7 +1050,7 @@ mod tests {
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let segment = searcher.segment_reader(0);
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
|
||||
let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
|
||||
let err = top_collector.for_segment(0, segment).err().unwrap();
|
||||
assert!(matches!(err, crate::TantivyError::InvalidArgument(_)));
|
||||
Ok(())
|
||||
@@ -1044,7 +1067,7 @@ mod tests {
|
||||
index_writer.commit()?;
|
||||
let searcher = index.reader()?.searcher();
|
||||
let segment = searcher.segment_reader(0);
|
||||
let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE);
|
||||
let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE, Order::Desc);
|
||||
let err = top_collector.for_segment(0, segment).err().unwrap();
|
||||
assert!(
|
||||
matches!(err, crate::TantivyError::SchemaError(msg) if msg == "Field \"size\" is not a fast field.")
|
||||
@@ -1106,4 +1129,50 @@ mod tests {
|
||||
let query = query_parser.parse_query(query).unwrap();
|
||||
(index, query)
|
||||
}
|
||||
#[test]
|
||||
fn test_fast_field_ascending_order() -> crate::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
let title = schema_builder.add_text_field(TITLE, TEXT);
|
||||
let size = schema_builder.add_u64_field(SIZE, FAST);
|
||||
let schema = schema_builder.build();
|
||||
let (index, query) = index("beer", title, schema, |index_writer| {
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
title => "bottle of beer",
|
||||
size => 12u64,
|
||||
))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
title => "growler of beer",
|
||||
size => 64u64,
|
||||
))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
title => "pint of beer",
|
||||
size => 16u64,
|
||||
))
|
||||
.unwrap();
|
||||
index_writer
|
||||
.add_document(doc!(
|
||||
title => "empty beer",
|
||||
))
|
||||
.unwrap();
|
||||
});
|
||||
let searcher = index.reader()?.searcher();
|
||||
|
||||
let top_collector = TopDocs::with_limit(4).order_by_fast_field(SIZE, Order::Asc);
|
||||
let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
|
||||
assert_eq!(
|
||||
&top_docs[..],
|
||||
&[
|
||||
(12, DocAddress::new(0, 0)),
|
||||
(16, DocAddress::new(0, 2)),
|
||||
(64, DocAddress::new(0, 1)),
|
||||
(18446744073709551615, DocAddress::new(0, 3)),
|
||||
]
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user