document 1 unused bit in skiplist

add small doc on some queries using fast field when not indexed
2025-12-29 05:22:55 +00:00 · 2025-12-19 10:34:20 +01:00 · 2025-12-19 10:34:20 +01:00
3 changed files with 22 additions and 123 deletions
--- a/src/collector/sort_key/order.rs
+++ b/src/collector/sort_key/order.rs
@@ -12,13 +12,8 @@ pub trait Comparator<T>: Send + Sync + std::fmt::Debug + Default {
    fn compare(&self, lhs: &T, rhs: &T) -> Ordering;
 }

-/// Compare values naturally (e.g. 1 < 2).
-///
-/// When used with `TopDocs`, which reverses the order, this results in a
-/// "Descending" sort (Greatest values first).
-///
-/// `None` (or Null for `OwnedValue`) values are considered to be smaller than any other value,
-/// and will therefore appear last in a descending sort (e.g. `[Some(20), Some(10), None]`).
+/// With the natural comparator, the top k collector will return
+/// the top documents in decreasing order.
 #[derive(Debug, Copy, Clone, Default, Serialize, Deserialize)]
 pub struct NaturalComparator;

@@ -29,18 +24,14 @@ impl<T: PartialOrd> Comparator<T> for NaturalComparator {
    }
 }

-/// Compare values in reverse (e.g. 2 < 1).
+/// Sorts document in reverse order.
 ///
-/// When used with `TopDocs`, which reverses the order, this results in an
-/// "Ascending" sort (Smallest values first).
-///
-/// `None` is considered smaller than `Some` in the underlying comparator, but because the
-/// comparison is reversed, `None` is effectively treated as the lowest value in the resulting
-/// Ascending sort (e.g. `[None, Some(10), Some(20)]`).
+/// If the sort key is None, it will considered as the lowest value, and will therefore appear
+/// first.
 ///
 /// The ReverseComparator does not necessarily imply that the sort order is reversed compared
 /// to the NaturalComparator. In presence of a tie on the sort key, documents will always be
-/// sorted by ascending `DocId`/`DocAddress` in TopN results, regardless of the sort key's order.
+/// sorted by ascending `DocId`/`DocAddress` in TopN results, regardless of the comparator.
 #[derive(Debug, Copy, Clone, Default, Serialize, Deserialize)]
 pub struct ReverseComparator;

@@ -53,15 +44,11 @@ where NaturalComparator: Comparator<T>
    }
 }

-/// Compare values in reverse, but treating `None` as lower than `Some`.
-///
-/// When used with `TopDocs`, which reverses the order, this results in an
-/// "Ascending" sort (Smallest values first), but with `None` values appearing last
-/// (e.g. `[Some(10), Some(20), None]`).
+/// Sorts document in reverse order, but considers None as having the lowest value.
 ///
 /// This is usually what is wanted when sorting by a field in an ascending order.
-/// For instance, in an e-commerce website, if sorting by price ascending,
-/// the cheapest items would appear first, and items without a price would appear last.
+/// For instance, in a e-commerce website, if I sort by price ascending, I most likely want the
+/// cheapest items first, and the items without a price at last.
 #[derive(Debug, Copy, Clone, Default)]
 pub struct ReverseNoneIsLowerComparator;

@@ -121,70 +108,6 @@ impl Comparator<String> for ReverseNoneIsLowerComparator {
    }
 }

-/// Compare values naturally, but treating `None` as higher than `Some`.
-///
-/// When used with `TopDocs`, which reverses the order, this results in a
-/// "Descending" sort (Greatest values first), but with `None` values appearing first
-/// (e.g. `[None, Some(20), Some(10)]`).
-#[derive(Debug, Copy, Clone, Default, Serialize, Deserialize)]
-pub struct NaturalNoneIsHigherComparator;
-
-impl<T> Comparator<Option<T>> for NaturalNoneIsHigherComparator
-where NaturalComparator: Comparator<T>
-{
-    #[inline(always)]
-    fn compare(&self, lhs_opt: &Option<T>, rhs_opt: &Option<T>) -> Ordering {
-        match (lhs_opt, rhs_opt) {
-            (None, None) => Ordering::Equal,
-            (None, Some(_)) => Ordering::Greater,
-            (Some(_), None) => Ordering::Less,
-            (Some(lhs), Some(rhs)) => NaturalComparator.compare(lhs, rhs),
-        }
-    }
-}
-
-impl Comparator<u32> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &u32, rhs: &u32) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
-impl Comparator<u64> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &u64, rhs: &u64) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
-impl Comparator<f64> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &f64, rhs: &f64) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
-impl Comparator<f32> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &f32, rhs: &f32) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
-impl Comparator<i64> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &i64, rhs: &i64) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
-impl Comparator<String> for NaturalNoneIsHigherComparator {
-    #[inline(always)]
-    fn compare(&self, lhs: &String, rhs: &String) -> Ordering {
-        NaturalComparator.compare(lhs, rhs)
-    }
-}
-
 /// An enum representing the different sort orders.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
 pub enum ComparatorEnum {
@@ -193,10 +116,8 @@ pub enum ComparatorEnum {
    Natural,
    /// Reverse order (See [ReverseComparator])
    Reverse,
-    /// Reverse order by treating None as the lowest value. (See [ReverseNoneLowerComparator])
+    /// Reverse order by treating None as the lowest value.(See [ReverseNoneLowerComparator])
    ReverseNoneLower,
-    /// Natural order but treating None as the highest value. (See [NaturalNoneIsHigherComparator])
-    NaturalNoneHigher,
 }

 impl From<Order> for ComparatorEnum {
@@ -213,7 +134,6 @@ where
    ReverseNoneIsLowerComparator: Comparator<T>,
    NaturalComparator: Comparator<T>,
    ReverseComparator: Comparator<T>,
-    NaturalNoneIsHigherComparator: Comparator<T>,
 {
    #[inline(always)]
    fn compare(&self, lhs: &T, rhs: &T) -> Ordering {
@@ -221,7 +141,6 @@ where
            ComparatorEnum::Natural => NaturalComparator.compare(lhs, rhs),
            ComparatorEnum::Reverse => ReverseComparator.compare(lhs, rhs),
            ComparatorEnum::ReverseNoneLower => ReverseNoneIsLowerComparator.compare(lhs, rhs),
-            ComparatorEnum::NaturalNoneHigher => NaturalNoneIsHigherComparator.compare(lhs, rhs),
        }
    }
 }
@@ -428,31 +347,3 @@ where
            .convert_segment_sort_key(sort_key)
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_natural_none_is_higher() {
-        let comp = NaturalNoneIsHigherComparator;
-        let null = None;
-        let v1 = Some(1_u64);
-        let v2 = Some(2_u64);
-
-        // NaturalNoneIsGreaterComparator logic:
-        // 1. Delegates to NaturalComparator for non-nulls.
-        // NaturalComparator compare(2, 1) -> 2.cmp(1) -> Greater.
-        assert_eq!(comp.compare(&v2, &v1), Ordering::Greater);
-
-        // 2. Treats None (Null) as Greater than any value.
-        // compare(None, Some(2)) should be Greater.
-        assert_eq!(comp.compare(&null, &v2), Ordering::Greater);
-
-        // compare(Some(1), None) should be Less.
-        assert_eq!(comp.compare(&v1, &null), Ordering::Less);
-
-        // compare(None, None) should be Equal.
-        assert_eq!(comp.compare(&null, &null), Ordering::Equal);
-    }
-}
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -6,17 +6,21 @@ use crate::{DocId, Score, TERMINATED};

 // doc num bits uses the following encoding:
 // given 0b a b cdefgh
-//         |1|2|   3  |
+//         |1|2|3|  4  |
 // - 1: unused
 // - 2: is delta-1 encoded. 0 if not, 1, if yes
-// - 3: a 6 bit number in 0..=32, the actual bitwidth
+// - 3: unused
+// - 4: a 5 bit number in 0..32, the actual bitwidth. Bitpacking could in theory say this is 32
+//   (requiring a 6th bit), but the biggest doc_id we can want to encode is TERMINATED-1, which can
+//   be represented on 31b without delta encoding.
 fn encode_bitwidth(bitwidth: u8, delta_1: bool) -> u8 {
+    assert!(bitwidth < 32);
    bitwidth | ((delta_1 as u8) << 6)
 }

 fn decode_bitwidth(raw_bitwidth: u8) -> (u8, bool) {
    let delta_1 = ((raw_bitwidth >> 6) & 1) != 0;
-    let bitwidth = raw_bitwidth & 0x3f;
+    let bitwidth = raw_bitwidth & 0x1f;
    (bitwidth, delta_1)
 }

@@ -430,7 +434,7 @@ mod tests {

    #[test]
    fn test_encode_decode_bitwidth() {
-        for bitwidth in 0..=32 {
+        for bitwidth in 0..32 {
            for delta_1 in [false, true] {
                assert_eq!(
                    (bitwidth, delta_1),
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -98,6 +98,10 @@
 //! make it possible to access the value given the doc id rapidly. This is useful if the value
 //! of the field is required during scoring or collection for instance.
 //!
+//! Some queries may leverage Fast fields when run on a field that is not indexed. This can be
+//! handy if that kind of request is infrequent, however note that searching on a Fast field is
+//! generally much slower than searching in an index.
+//!
 //! ```
 //! use tantivy::schema::*;
 //! let mut schema_builder = Schema::builder();
Author	SHA1	Message	Date
trinity Pointard	32a8f8646f	document 1 unused bit in skiplist	2025-12-19 10:34:20 +01:00
trinity Pointard	53c4b8346c	add small doc on some queries using fast field when not indexed	2025-12-19 10:34:20 +01:00