Test.

2026-01-08 18:12:55 +00:00 · 2023-07-05 21:58:52 +02:00
29 changed files with 367 additions and 425 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -53,7 +53,7 @@ jobs:
    strategy:
      matrix:
        features: [
-            { label: "all", flags: "mmap,stopwords,lz4-compression,zstd-compression,failpoints" },
+            { label: "all", flags: "mmap,stopwords,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints" },
            { label: "quickwit", flags: "mmap,quickwit,failpoints" }
        ]

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,14 +1,5 @@

-Tantivy 0.20.2
-================================
- Align numerical type priority order on the search side.  [#2088](https://github.com/quickwit-oss/tantivy/issues/2088) (@fmassot)
- Fix is_child_of function not considering the root facet. [#2086](https://github.com/quickwit-oss/tantivy/issues/2086) (@adamreichhold)
-
-Tantivy 0.20.1
-================================
- Fix building on windows with mmap [#2070](https://github.com/quickwit-oss/tantivy/issues/2070) (@ChillFish8)
-
-Tantivy 0.20
+Tantivy 0.20 [Unreleased]
 ================================
 #### Bugfixes
 - Fix phrase queries with slop (slop supports now transpositions, algorithm that carries slop so far for num terms > 2) [#2031](https://github.com/quickwit-oss/tantivy/issues/2031)[#2020](https://github.com/quickwit-oss/tantivy/issues/2020)(@PSeitz)
@@ -47,14 +38,12 @@ Tantivy 0.20
    - Add aggregation support for JSON type [#1888](https://github.com/quickwit-oss/tantivy/issues/1888) (@PSeitz)
    - Mixed types support on JSON fields in aggs [#1971](https://github.com/quickwit-oss/tantivy/issues/1971) (@PSeitz)
  - Perf: Fetch blocks of vals in aggregation for all cardinality [#1950](https://github.com/quickwit-oss/tantivy/issues/1950) (@PSeitz)
-  - Allow histogram bounds to be passed as Rfc3339 [#2076](https://github.com/quickwit-oss/tantivy/issues/2076) (@PSeitz)
 - `Searcher` with disabled scoring via `EnableScoring::Disabled` [#1780](https://github.com/quickwit-oss/tantivy/issues/1780) (@shikhar)
 - Enable tokenizer on json fields [#2053](https://github.com/quickwit-oss/tantivy/issues/2053) (@PSeitz)
 - Enforcing "NOT" and "-" queries consistency in UserInputAst [#1609](https://github.com/quickwit-oss/tantivy/issues/1609) (@bazhenov)
 - Faster indexing
  - Refactor tokenization pipeline to use GATs [#1924](https://github.com/quickwit-oss/tantivy/issues/1924) (@trinity-1686a)
  - Faster term hash map [#2058](https://github.com/quickwit-oss/tantivy/issues/2058)[#1940](https://github.com/quickwit-oss/tantivy/issues/1940) (@PSeitz)
-  - tokenizer-api: reduce Tokenizer allocation overhead [#2062](https://github.com/quickwit-oss/tantivy/issues/2062) (@PSeitz)
  - Refactor vint [#2010](https://github.com/quickwit-oss/tantivy/issues/2010) (@PSeitz)
 - Faster search
  - Work in batches of docs on the SegmentCollector (Only for cases without score for now) [#1937](https://github.com/quickwit-oss/tantivy/issues/1937) (@PSeitz)
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,7 +25,9 @@ aho-corasick = "1.0"
 tantivy-fst = "0.4.0"
 memmap2 = { version = "0.7.1", optional = true }
 lz4_flex = { version = "0.11", default-features = false, optional = true }
+brotli = { version = "3.3.4", optional = true }
 zstd = { version = "0.12", optional = true, default-features = false }
+snap = { version = "1.0.5", optional = true }
 tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
@@ -47,9 +49,9 @@ murmurhash32 = "0.3.0"
 time = { version = "0.3.10", features = ["serde-well-known"] }
 smallvec = "1.8.0"
 rayon = "1.5.2"
-lru = "0.11.0"
+lru = "0.10.0"
 fastdivide = "0.4.0"
-itertools = "0.11.0"
+itertools = "0.10.3"
 measure_time = "0.8.2"
 async-trait = "0.1.53"
 arc-swap = "1.5.0"
@@ -105,7 +107,9 @@ default = ["mmap", "stopwords", "lz4-compression"]
 mmap = ["fs4", "tempfile", "memmap2"]
 stopwords = []

+brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
+snappy-compression = ["snap"]
 zstd-compression = ["zstd"]

 failpoints = ["fail", "fail/failpoints"]
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Details about the benchmark can be found at this [repository](https://github.com
 - Single valued and multivalued u64, i64, and f64 fast fields (equivalent of doc values in Lucene)
 - `&[u8]` fast fields
 - Text, i64, u64, f64, dates, ip, bool, and hierarchical facet fields
- Compressed document store (LZ4, Zstd, None)
+- Compressed document store (LZ4, Zstd, None, Brotli, Snap)
 - Range queries
 - Faceted search
 - Configurable indexing (optional term frequency and position indexing)
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -9,7 +9,7 @@ description = "column oriented storage for tantivy"
 categories = ["database-implementations", "data-structures", "compression"]

 [dependencies]
-itertools = "0.11.0"
+itertools = "0.10.5"
 fnv = "1.0.7"
 fastdivide = "0.4.0"

--- a/examples/aggregation.rs
+++ b/examples/aggregation.rs
@@ -37,7 +37,7 @@ fn main() -> tantivy::Result<()> {
                .set_index_option(IndexRecordOption::WithFreqs)
                .set_tokenizer("raw"),
        )
-        .set_fast("default")
+        .set_fast(None)
        .set_stored();
    schema_builder.add_text_field("category", text_fieldtype);
    schema_builder.add_f64_field("stock", FAST);
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1293,13 +1293,13 @@ mod tests {
        // searching for terma, but min_doc_count will return all terms
        let res = exec_request_with_query(agg_req, &index, Some(("string2", "hit")))?;

-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "a");
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "A");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);
        assert_eq!(
            res["my_texts"]["buckets"][0]["elhistogram"]["buckets"],
            json!([{ "doc_count": 1, "key": 1.0 }, { "doc_count": 1, "key": 2.0 } ])
        );
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "b");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "B");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);
        assert_eq!(
            res["my_texts"]["buckets"][1]["elhistogram"]["buckets"],
@@ -1421,10 +1421,10 @@ mod tests {
        let res = exec_request_with_query(agg_req, &index, None).unwrap();
        println!("{}", serde_json::to_string_pretty(&res).unwrap());

-        assert_eq!(res["my_texts"]["buckets"][0]["key"], "hallo hallo");
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "Hallo Hallo");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 1);

-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "hello hello");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "Hello Hello");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 1);

        Ok(())
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -411,7 +411,7 @@ mod tests {
                    .set_index_option(IndexRecordOption::Basic)
                    .set_fieldnorms(false),
            )
-            .set_fast("default")
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype.clone());
        let text_field_id = schema_builder.add_text_field("text_id", text_fieldtype);
@@ -466,7 +466,7 @@ mod tests {
            .set_indexing_options(
                TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
            )
-            .set_fast("default")
+            .set_fast(None)
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
        let date_field = schema_builder.add_date_field("date", FAST);
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -14,7 +14,7 @@ use crate::collector::{
 };
 use crate::fastfield::{FastFieldNotAvailableError, FastValue};
 use crate::query::Weight;
-use crate::{DocAddress, DocId, Order, Score, SegmentOrdinal, SegmentReader, TantivyError};
+use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};

 struct FastFieldConvertCollector<
    TCollector: Collector<Fruit = Vec<(u64, DocAddress)>>,
@@ -23,7 +23,6 @@ struct FastFieldConvertCollector<
    pub collector: TCollector,
    pub field: String,
    pub fast_value: std::marker::PhantomData<TFastValue>,
-    order: Order,
 }

 impl<TCollector, TFastValue> Collector for FastFieldConvertCollector<TCollector, TFastValue>
@@ -71,13 +70,7 @@ where
        let raw_result = self.collector.merge_fruits(segment_fruits)?;
        let transformed_result = raw_result
            .into_iter()
-            .map(|(score, doc_address)| {
-                if self.order.is_desc() {
-                    (TFastValue::from_u64(score), doc_address)
-                } else {
-                    (TFastValue::from_u64(u64::MAX - score), doc_address)
-                }
-            })
+            .map(|(score, doc_address)| (TFastValue::from_u64(score), doc_address))
            .collect::<Vec<_>>();
        Ok(transformed_result)
    }
@@ -138,23 +131,16 @@ impl fmt::Debug for TopDocs {

 struct ScorerByFastFieldReader {
    sort_column: Arc<dyn ColumnValues<u64>>,
-    order: Order,
 }

 impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
    fn score(&mut self, doc: DocId) -> u64 {
-        let value = self.sort_column.get_val(doc);
-        if self.order.is_desc() {
-            value
-        } else {
-            u64::MAX - value
-        }
+        self.sort_column.get_val(doc)
    }
 }

 struct ScorerByField {
    field: String,
-    order: Order,
 }

 impl CustomScorer<u64> for ScorerByField {
@@ -171,13 +157,8 @@ impl CustomScorer<u64> for ScorerByField {
            sort_column_opt.ok_or_else(|| FastFieldNotAvailableError {
                field_name: self.field.clone(),
            })?;
-        let mut default_value = 0u64;
-        if self.order.is_asc() {
-            default_value = u64::MAX;
-        }
        Ok(ScorerByFastFieldReader {
-            sort_column: sort_column.first_or_default_col(default_value),
-            order: self.order.clone(),
+            sort_column: sort_column.first_or_default_col(0u64),
        })
    }
 }
@@ -249,7 +230,7 @@ impl TopDocs {
    ///
    /// ```rust
    /// # use tantivy::schema::{Schema, FAST, TEXT};
-    /// # use tantivy::{doc, Index, DocAddress, Order};
+    /// # use tantivy::{doc, Index, DocAddress};
    /// # use tantivy::query::{Query, QueryParser};
    /// use tantivy::Searcher;
    /// use tantivy::collector::TopDocs;
@@ -287,7 +268,7 @@ impl TopDocs {
    ///     // Note the `rating_field` needs to be a FAST field here.
    ///     let top_books_by_rating = TopDocs
    ///                 ::with_limit(10)
-    ///                  .order_by_fast_field("rating", Order::Desc);
+    ///                  .order_by_u64_field("rating");
    ///
    ///     // ... and here are our documents. Note this is a simple vec.
    ///     // The `u64` in the pair is the value of our fast field for
@@ -307,15 +288,13 @@ impl TopDocs {
    ///
    /// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
    /// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
-    fn order_by_u64_field(
+    pub fn order_by_u64_field(
        self,
        field: impl ToString,
-        order: Order,
    ) -> impl Collector<Fruit = Vec<(u64, DocAddress)>> {
        CustomScoreTopCollector::new(
            ScorerByField {
                field: field.to_string(),
-                order,
            },
            self.0.into_tscore(),
        )
@@ -337,7 +316,7 @@ impl TopDocs {
    ///
    /// ```rust
    /// # use tantivy::schema::{Schema, FAST, TEXT};
-    /// # use tantivy::{doc, Index, DocAddress,Order};
+    /// # use tantivy::{doc, Index, DocAddress};
    /// # use tantivy::query::{Query, AllQuery};
    /// use tantivy::Searcher;
    /// use tantivy::collector::TopDocs;
@@ -375,7 +354,7 @@ impl TopDocs {
    ///     // type `sort_by_field`. revenue_field here is a FAST i64 field.
    ///     let top_company_by_revenue = TopDocs
    ///                 ::with_limit(2)
-    ///                  .order_by_fast_field("revenue", Order::Desc);
+    ///                  .order_by_fast_field("revenue");
    ///
    ///     // ... and here are our documents. Note this is a simple vec.
    ///     // The `i64` in the pair is the value of our fast field for
@@ -393,17 +372,15 @@ impl TopDocs {
    pub fn order_by_fast_field<TFastValue>(
        self,
        fast_field: impl ToString,
-        order: Order,
    ) -> impl Collector<Fruit = Vec<(TFastValue, DocAddress)>>
    where
        TFastValue: FastValue,
    {
-        let u64_collector = self.order_by_u64_field(fast_field.to_string(), order.clone());
+        let u64_collector = self.order_by_u64_field(fast_field.to_string());
        FastFieldConvertCollector {
            collector: u64_collector,
            field: fast_field.to_string(),
            fast_value: PhantomData,
-            order,
        }
    }

@@ -744,7 +721,7 @@ mod tests {
    use crate::schema::{Field, Schema, FAST, STORED, TEXT};
    use crate::time::format_description::well_known::Rfc3339;
    use crate::time::OffsetDateTime;
-    use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Order, Score, SegmentReader};
+    use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};

    fn make_index() -> crate::Result<Index> {
        let mut schema_builder = Schema::builder();
@@ -905,7 +882,7 @@ mod tests {
        });
        let searcher = index.reader()?.searcher();

-        let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
        assert_eq!(
            &top_docs[..],
@@ -944,7 +921,7 @@ mod tests {
        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
-        let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday", Order::Desc);
+        let top_collector = TopDocs::with_limit(3).order_by_fast_field("birthday");
        let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
        assert_eq!(
            &top_docs[..],
@@ -974,7 +951,7 @@ mod tests {
        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
-        let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
+        let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
        let top_docs: Vec<(i64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
        assert_eq!(
            &top_docs[..],
@@ -1004,7 +981,7 @@ mod tests {
        ))?;
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
-        let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude", Order::Desc);
+        let top_collector = TopDocs::with_limit(3).order_by_fast_field("altitude");
        let top_docs: Vec<(f64, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
        assert_eq!(
            &top_docs[..],
@@ -1032,7 +1009,7 @@ mod tests {
                .unwrap();
        });
        let searcher = index.reader().unwrap().searcher();
-        let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field", Order::Desc);
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field("missing_field");
        let segment_reader = searcher.segment_reader(0u32);
        top_collector
            .for_segment(0, segment_reader)
@@ -1050,7 +1027,7 @@ mod tests {
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let segment = searcher.segment_reader(0);
-        let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE, Order::Desc);
+        let top_collector = TopDocs::with_limit(4).order_by_u64_field(SIZE);
        let err = top_collector.for_segment(0, segment).err().unwrap();
        assert!(matches!(err, crate::TantivyError::InvalidArgument(_)));
        Ok(())
@@ -1067,7 +1044,7 @@ mod tests {
        index_writer.commit()?;
        let searcher = index.reader()?.searcher();
        let segment = searcher.segment_reader(0);
-        let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE, Order::Desc);
+        let top_collector = TopDocs::with_limit(4).order_by_fast_field::<i64>(SIZE);
        let err = top_collector.for_segment(0, segment).err().unwrap();
        assert!(
            matches!(err, crate::TantivyError::SchemaError(msg) if msg == "Field \"size\" is not a fast field.")
@@ -1129,50 +1106,4 @@ mod tests {
        let query = query_parser.parse_query(query).unwrap();
        (index, query)
    }
-    #[test]
-    fn test_fast_field_ascending_order() -> crate::Result<()> {
-        let mut schema_builder = Schema::builder();
-        let title = schema_builder.add_text_field(TITLE, TEXT);
-        let size = schema_builder.add_u64_field(SIZE, FAST);
-        let schema = schema_builder.build();
-        let (index, query) = index("beer", title, schema, |index_writer| {
-            index_writer
-                .add_document(doc!(
-                    title => "bottle of beer",
-                    size => 12u64,
-                ))
-                .unwrap();
-            index_writer
-                .add_document(doc!(
-                    title => "growler of beer",
-                    size => 64u64,
-                ))
-                .unwrap();
-            index_writer
-                .add_document(doc!(
-                    title => "pint of beer",
-                    size => 16u64,
-                ))
-                .unwrap();
-            index_writer
-                .add_document(doc!(
-                    title => "empty beer",
-                ))
-                .unwrap();
-        });
-        let searcher = index.reader()?.searcher();
-
-        let top_collector = TopDocs::with_limit(4).order_by_fast_field(SIZE, Order::Asc);
-        let top_docs: Vec<(u64, DocAddress)> = searcher.search(&query, &top_collector)?;
-        assert_eq!(
-            &top_docs[..],
-            &[
-                (12, DocAddress::new(0, 0)),
-                (16, DocAddress::new(0, 2)),
-                (64, DocAddress::new(0, 1)),
-                (18446744073709551615, DocAddress::new(0, 3)),
-            ]
-        );
-        Ok(())
-    }
 }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -120,8 +120,8 @@ impl IndexBuilder {
        Self {
            schema: None,
            index_settings: IndexSettings::default(),
-            tokenizer_manager: TokenizerManager::default_for_indexing(),
-            fast_field_tokenizer_manager: TokenizerManager::default_for_fast_fields(),
+            tokenizer_manager: TokenizerManager::default(),
+            fast_field_tokenizer_manager: TokenizerManager::default(),
        }
    }

@@ -400,8 +400,8 @@ impl Index {
            settings: metas.index_settings.clone(),
            directory,
            schema,
-            tokenizers: TokenizerManager::default_for_indexing(),
-            fast_field_tokenizers: TokenizerManager::default_for_fast_fields(),
+            tokenizers: TokenizerManager::default(),
+            fast_field_tokenizers: TokenizerManager::default(),
            executor: Arc::new(Executor::single_thread()),
            inventory,
        }
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -410,9 +410,7 @@ mod tests {
    use super::IndexMeta;
    use crate::core::index_meta::UntrackedIndexMeta;
    use crate::schema::{Schema, TEXT};
-    use crate::store::Compressor;
-    #[cfg(feature = "zstd-compression")]
-    use crate::store::ZstdCompressor;
+    use crate::store::{Compressor, ZstdCompressor};
    use crate::{IndexSettings, IndexSortByField, Order};

    #[test]
@@ -448,7 +446,6 @@ mod tests {
    }

    #[test]
-    #[cfg(feature = "zstd-compression")]
    fn test_serialize_metas_zstd_compressor() {
        let schema = {
            let mut schema_builder = Schema::builder();
@@ -485,14 +482,13 @@ mod tests {
    }

    #[test]
-    #[cfg(all(feature = "lz4-compression", feature = "zstd-compression"))]
    fn test_serialize_metas_invalid_comp() {
        let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zsstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;

        let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
        assert_eq!(
            err.to_string(),
-            "unknown variant `zsstd`, expected one of `none`, `lz4`, `zstd`, \
+            "unknown variant `zsstd`, expected one of `none`, `lz4`, `brotli`, `snappy`, `zstd`, \
             `zstd(compression_level=5)` at line 1 column 96"
                .to_string()
        );
@@ -506,20 +502,6 @@ mod tests {
        );
    }

-    #[test]
-    #[cfg(not(feature = "zstd-compression"))]
-    fn test_serialize_metas_unsupported_comp() {
-        let json = r#"{"index_settings":{"sort_by_field":{"field":"text","order":"Asc"},"docstore_compression":"zstd","docstore_blocksize":1000000},"segments":[],"schema":[{"name":"text","type":"text","options":{"indexing":{"record":"position","fieldnorms":true,"tokenizer":"default"},"stored":false,"fast":false}}],"opstamp":0}"#;
-
-        let err = serde_json::from_str::<UntrackedIndexMeta>(json).unwrap_err();
-        assert_eq!(
-            err.to_string(),
-            "unsupported variant `zstd`, please enable Tantivy's `zstd-compression` feature at \
-             line 1 column 95"
-                .to_string()
-        );
-    }
-
    #[test]
    #[cfg(feature = "lz4-compression")]
    fn test_index_settings_default() {
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -446,8 +446,7 @@ mod tests {
    #[test]
    fn test_text_fastfield() {
        let mut schema_builder = Schema::builder();
-        let text_options: TextOptions = TextOptions::from(TEXT).set_fast("raw");
-        let text_field = schema_builder.add_text_field("text", text_options);
+        let text_field = schema_builder.add_text_field("text", TEXT | FAST);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);

@@ -1083,7 +1082,7 @@ mod tests {
    #[test]
    fn test_fast_field_in_json_field_expand_dots_disabled() {
        let mut schema_builder = Schema::builder();
-        let json_option = JsonObjectOptions::default().set_fast("default");
+        let json_option = JsonObjectOptions::default().set_fast(None);
        let json = schema_builder.add_json_field("json", json_option);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -1109,7 +1108,7 @@ mod tests {
    #[test]
    fn test_fast_field_in_json_field_with_tokenizer() {
        let mut schema_builder = Schema::builder();
-        let json_option = JsonObjectOptions::default().set_fast("default");
+        let json_option = JsonObjectOptions::default().set_fast(Some("default"));
        let json = schema_builder.add_json_field("json", json_option);
        let schema = schema_builder.build();
        let index = Index::create_in_ram(schema);
@@ -1135,7 +1134,7 @@ mod tests {
    fn test_fast_field_in_json_field_expand_dots_enabled() {
        let mut schema_builder = Schema::builder();
        let json_option = JsonObjectOptions::default()
-            .set_fast("default")
+            .set_fast(None)
            .set_expand_dots_enabled();
        let json = schema_builder.add_json_field("json", json_option);
        let schema = schema_builder.build();
@@ -1203,10 +1202,10 @@ mod tests {
    #[test]
    fn test_fast_field_tokenizer() {
        let mut schema_builder = Schema::builder();
-        let opt = TextOptions::default().set_fast("custom_lowercase");
+        let opt = TextOptions::default().set_fast(Some("custom_lowercase"));
        let text_field = schema_builder.add_text_field("text", opt);
        let schema = schema_builder.build();
-        let ff_tokenizer_manager = TokenizerManager::default_for_fast_fields();
+        let ff_tokenizer_manager = TokenizerManager::default();
        ff_tokenizer_manager.register(
            "custom_lowercase",
            TextAnalyzer::builder(RawTokenizer::default())
@@ -1239,7 +1238,7 @@ mod tests {
                    .set_index_option(crate::schema::IndexRecordOption::WithFreqs)
                    .set_tokenizer("raw"),
            )
-            .set_fast("default")
+            .set_fast(Some("default"))
            .set_stored();

        let log_field = schema_builder.add_text_field("log_level", text_fieldtype);
@@ -1272,7 +1271,7 @@ mod tests {
    fn test_shadowing_fast_field_with_expand_dots() {
        let mut schema_builder = Schema::builder();
        let json_option = JsonObjectOptions::default()
-            .set_fast("default")
+            .set_fast(None)
            .set_expand_dots_enabled();
        let json_field = schema_builder.add_json_field("jsonfield", json_option.clone());
        let shadowing_json_field = schema_builder.add_json_field("jsonfield.attr", json_option);
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -349,7 +349,7 @@ mod tests {
        schema_builder.add_json_field(
            "json_expand_dots_enabled",
            JsonObjectOptions::default()
-                .set_fast("default")
+                .set_fast(None)
                .set_expand_dots_enabled(),
        );
        let dynamic_field = schema_builder.add_json_field("_dyna", FAST);
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -18,8 +18,6 @@ const JSON_DEPTH_LIMIT: usize = 20;
 pub struct FastFieldsWriter {
    columnar_writer: ColumnarWriter,
    fast_field_names: Vec<Option<String>>, //< TODO see if we can hash the field name hash too.
-    // Field -> Fast field tokenizer mapping.
-    // All text fast fields should have a tokenizer.
    per_field_tokenizer: Vec<Option<TextAnalyzer>>,
    date_precisions: Vec<DateTimePrecision>,
    expand_dots: Vec<bool>,
@@ -63,7 +61,7 @@ impl FastFieldsWriter {
                if let Some(tokenizer_name) = json_object_options.get_fast_field_tokenizer_name() {
                    let text_analyzer = tokenizer_manager.get(tokenizer_name).ok_or_else(|| {
                        TantivyError::InvalidArgument(format!(
-                            "Tokenizer `{tokenizer_name}` not found"
+                            "Tokenizer {tokenizer_name:?} not found"
                        ))
                    })?;
                    per_field_tokenizer[field_id.field_id() as usize] = Some(text_analyzer);
@@ -159,6 +157,9 @@ impl FastFieldsWriter {
                                    &token.text,
                                );
                            })
+                        } else {
+                            self.columnar_writer
+                                .record_str(doc_id, field_name.as_str(), text_val);
                        }
                    }
                    Value::Bytes(bytes_val) => {
@@ -200,20 +201,18 @@ impl FastFieldsWriter {
                        self.json_path_buffer.clear();
                        self.json_path_buffer.push_str(field_name);

-                        let text_analyzer_opt =
+                        let text_analyzer =
                            &mut self.per_field_tokenizer[field_value.field().field_id() as usize];

-                        if let Some(text_analyzer) = text_analyzer_opt {
-                            record_json_obj_to_columnar_writer(
-                                doc_id,
-                                json_obj,
-                                expand_dots,
-                                JSON_DEPTH_LIMIT,
-                                &mut self.json_path_buffer,
-                                &mut self.columnar_writer,
-                                text_analyzer,
-                            );
-                        }
+                        record_json_obj_to_columnar_writer(
+                            doc_id,
+                            json_obj,
+                            expand_dots,
+                            JSON_DEPTH_LIMIT,
+                            &mut self.json_path_buffer,
+                            &mut self.columnar_writer,
+                            text_analyzer,
+                        );
                    }
                    Value::IpAddr(ip_addr) => {
                        self.columnar_writer
@@ -264,7 +263,7 @@ fn record_json_obj_to_columnar_writer(
    remaining_depth_limit: usize,
    json_path_buffer: &mut String,
    columnar_writer: &mut columnar::ColumnarWriter,
-    text_analyzer: &mut TextAnalyzer,
+    tokenizer: &mut Option<TextAnalyzer>,
 ) {
    for (key, child) in json_obj {
        let len_path = json_path_buffer.len();
@@ -289,7 +288,7 @@ fn record_json_obj_to_columnar_writer(
            remaining_depth_limit,
            json_path_buffer,
            columnar_writer,
-            text_analyzer,
+            tokenizer,
        );
        // popping our sub path.
        json_path_buffer.truncate(len_path);
@@ -303,7 +302,7 @@ fn record_json_value_to_columnar_writer(
    mut remaining_depth_limit: usize,
    json_path_writer: &mut String,
    columnar_writer: &mut columnar::ColumnarWriter,
-    text_analyzer: &mut TextAnalyzer,
+    tokenizer: &mut Option<TextAnalyzer>,
 ) {
    if remaining_depth_limit == 0 {
        return;
@@ -322,10 +321,14 @@ fn record_json_value_to_columnar_writer(
            }
        }
        serde_json::Value::String(text) => {
-            let mut token_stream = text_analyzer.token_stream(text);
-            token_stream.process(&mut |token| {
-                columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text);
-            });
+            if let Some(text_analyzer) = tokenizer.as_mut() {
+                let mut token_stream = text_analyzer.token_stream(text);
+                token_stream.process(&mut |token| {
+                    columnar_writer.record_str(doc, json_path_writer.as_str(), &token.text);
+                })
+            } else {
+                columnar_writer.record_str(doc, json_path_writer.as_str(), text);
+            }
        }
        serde_json::Value::Array(arr) => {
            for el in arr {
@@ -336,7 +339,7 @@ fn record_json_value_to_columnar_writer(
                    remaining_depth_limit,
                    json_path_writer,
                    columnar_writer,
-                    text_analyzer,
+                    tokenizer,
                );
            }
        }
@@ -348,7 +351,7 @@ fn record_json_value_to_columnar_writer(
                remaining_depth_limit,
                json_path_writer,
                columnar_writer,
-                text_analyzer,
+                tokenizer,
            );
        }
    }
@@ -368,9 +371,6 @@ mod tests {
    ) -> ColumnarReader {
        let mut columnar_writer = ColumnarWriter::default();
        let mut json_path = String::new();
-        let mut text_analyzer = crate::tokenizer::TokenizerManager::default_for_fast_fields()
-            .get(crate::schema::DEFAULT_FAST_FIELD_TOKENIZER)
-            .unwrap();
        for (doc, json_doc) in json_docs.iter().enumerate() {
            record_json_value_to_columnar_writer(
                doc as u32,
@@ -379,7 +379,7 @@ mod tests {
                JSON_DEPTH_LIMIT,
                &mut json_path,
                &mut columnar_writer,
-                &mut text_analyzer,
+                &mut None,
            );
        }
        let mut buffer = Vec::new();
@@ -399,7 +399,6 @@ mod tests {
        });
        let columnar_reader = test_columnar_from_jsons_aux(&[json_doc], false);
        let columns = columnar_reader.list_columns().unwrap();
-        assert_eq!(columns.len(), 5);
        {
            assert_eq!(columns[0].0, "arr");
            let column_arr_opt: Option<StrColumn> = columns[0].1.open().unwrap().into();
@@ -435,9 +434,7 @@ mod tests {
        {
            assert_eq!(columns[4].0, "text");
            let column_text_opt: Option<StrColumn> = columns[4].1.open().unwrap().into();
-            let column_text = column_text_opt.unwrap();
-            let term_ords: Vec<u64> = column_text.term_ords(0).collect();
-            assert_eq!(&term_ords[..], &[0]);
+            assert!(column_text_opt.unwrap().term_ords(0).eq([0].into_iter()));
        }
    }

--- a/src/lib.rs
+++ b/src/lib.rs
@@ -191,7 +191,7 @@ pub use crate::schema::{DateOptions, DateTimePrecision, Document, Term};
 /// Index format version.
 const INDEX_FORMAT_VERSION: u32 = 5;

-#[cfg(all(feature = "mmap", unix))]
+#[cfg(unix)]
 pub use memmap2::Advice;

 /// Structure version for the index.
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -956,7 +956,7 @@ mod test {
            .iter()
            .flat_map(|field_name| schema.get_field(field_name))
            .collect();
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        tokenizer_manager.register(
            "en_with_stop_words",
            TextAnalyzer::builder(SimpleTokenizer::default())
@@ -1447,7 +1447,7 @@ mod test {
        let title = schema_builder.add_text_field("title", text_options);
        let schema = schema_builder.build();
        let default_fields = vec![title];
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        let query_parser = QueryParser::new(schema, default_fields, tokenizer_manager);

        assert_matches!(
@@ -1622,8 +1622,7 @@ mod test {
        let mut schema_builder = Schema::builder();
        schema_builder.add_text_field(r#"a\.b"#, STRING);
        let schema = schema_builder.build();
-        let query_parser =
-            QueryParser::new(schema, Vec::new(), TokenizerManager::default_for_indexing());
+        let query_parser = QueryParser::new(schema, Vec::new(), TokenizerManager::default());
        let query = query_parser.parse_query(r#"a\.b:hello"#).unwrap();
        assert_eq!(
            format!("{query:?}"),
@@ -1640,11 +1639,8 @@ mod test {
        schema_builder.add_text_field("first.toto.titi", STRING);
        schema_builder.add_text_field("third.a.b.c", STRING);
        let schema = schema_builder.build();
-        let query_parser = QueryParser::new(
-            schema.clone(),
-            Vec::new(),
-            TokenizerManager::default_for_indexing(),
-        );
+        let query_parser =
+            QueryParser::new(schema.clone(), Vec::new(), TokenizerManager::default());
        assert_eq!(
            query_parser.split_full_path("first.toto"),
            Some((schema.get_field("first.toto").unwrap(), ""))
--- a/src/query/set_query.rs
+++ b/src/query/set_query.rs
@@ -72,14 +72,6 @@ impl Query for TermSetQuery {
    fn weight(&self, enable_scoring: EnableScoring<'_>) -> crate::Result<Box<dyn Weight>> {
        Ok(Box::new(self.specialized_weight(enable_scoring.schema())?))
    }
-
-    fn query_terms<'a>(&'a self, visitor: &mut dyn FnMut(&'a Term, bool)) {
-        for terms in self.terms_map.values() {
-            for term in terms {
-                visitor(term, false);
-            }
-        }
-    }
 }

 struct SetDfaWrapper(Map<Vec<u8>>);
--- a/src/schema/json_object_options.rs
+++ b/src/schema/json_object_options.rs
@@ -4,7 +4,7 @@ use serde::{Deserialize, Serialize};

 use super::text_options::{FastFieldTextOptions, TokenizerName};
 use crate::schema::flags::{FastFlag, SchemaFlagList, StoredFlag};
-use crate::schema::{TextFieldIndexing, TextOptions, DEFAULT_FAST_FIELD_TOKENIZER};
+use crate::schema::{TextFieldIndexing, TextOptions};

 /// The `JsonObjectOptions` make it possible to
 /// configure how a json object field should be indexed and stored.
@@ -58,19 +58,20 @@ impl JsonObjectOptions {
    /// Returns true if and only if the json object fields are
    /// to be treated as fast fields.
    pub fn is_fast(&self) -> bool {
-        match self.fast {
-            FastFieldTextOptions::Disabled => false,
-            FastFieldTextOptions::Enabled { .. } => true,
-        }
+        matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
+            || matches!(
+                &self.fast,
+                FastFieldTextOptions::EnabledWithTokenizer { with_tokenizer: _ }
+            )
    }

    /// Returns true if and only if the value is a fast field.
    pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
        match &self.fast {
-            FastFieldTextOptions::Disabled => None,
-            FastFieldTextOptions::Enabled {
-                tokenizer: with_tokenizer,
-            } => Some(with_tokenizer.name()),
+            FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
+            FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: tokenizer,
+            } => Some(tokenizer.name()),
        }
    }

@@ -129,11 +130,15 @@ impl JsonObjectOptions {
    /// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term)
    /// from the dictionary.
    #[must_use]
-    pub fn set_fast(mut self, tokenizer_name: &str) -> Self {
-        let with_tokenizer = TokenizerName::from_name(tokenizer_name);
-        self.fast = FastFieldTextOptions::Enabled {
-            tokenizer: with_tokenizer,
-        };
+    pub fn set_fast(mut self, tokenizer_name: Option<&str>) -> Self {
+        if let Some(tokenizer) = tokenizer_name {
+            let tokenizer = TokenizerName::from_name(tokenizer);
+            self.fast = FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: tokenizer,
+            }
+        } else {
+            self.fast = FastFieldTextOptions::IsEnabled(true);
+        }
        self
    }

@@ -161,9 +166,7 @@ impl From<FastFlag> for JsonObjectOptions {
        JsonObjectOptions {
            stored: false,
            indexing: None,
-            fast: FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static(DEFAULT_FAST_FIELD_TOKENIZER),
-            },
+            fast: FastFieldTextOptions::IsEnabled(true),
            expand_dots_enabled: false,
        }
    }
--- a/src/schema/mod.rs
+++ b/src/schema/mod.rs
@@ -1,6 +1,6 @@
 //! Schema definition for tantivy's indices.
-//! # Setting your schema in Tantivy
 //!
+//! # Setting your schema in Tantivy
 //!
 //! Tantivy has a very strict schema.
 //! The schema defines information about the fields your index contains, that is, for each field:
@@ -153,8 +153,6 @@ pub use self::term::{Term, ValueBytes, JSON_END_OF_PATH};
 pub use self::text_options::{TextFieldIndexing, TextOptions, STRING, TEXT};
 pub use self::value::Value;

-pub(crate) const DEFAULT_FAST_FIELD_TOKENIZER: &str = "default";
-
 /// Validator for a potential `field_name`.
 /// Returns true if the name can be use for a field name.
 ///
--- a/src/schema/text_options.rs
+++ b/src/schema/text_options.rs
@@ -24,68 +24,19 @@ pub struct TextOptions {
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
-#[serde(
-    into = "FastFieldTextOptionsForSerialization",
-    from = "FastFieldTextOptionsForSerialization"
-)]
+#[serde(untagged)]
 /// Enum to control how the fast field setting of a text field.
-#[derive(Default)]
 pub(crate) enum FastFieldTextOptions {
-    /// Fastfield disabled
-    #[default]
-    Disabled,
+    /// Flag to enable/disable
+    IsEnabled(bool),
    /// Enable with tokenizer. The tokenizer must be available on the fast field tokenizer manager.
    /// `Index::fast_field_tokenizer`.
-    Enabled { tokenizer: TokenizerName },
+    EnabledWithTokenizer { with_tokenizer: TokenizerName },
 }

-/// Enum used to control the way we serialize fast field text options.
-///
-/// For backward compatiblity reasons, we folow the format introduce in tantivy 0.19.
-/// `false` -> Disabled
-/// `true` -> Enabled with default tokenizer
-/// `{ tokenizer: "something" }` -> Enabled with a specific tokenizer.
-#[derive(Serialize, Deserialize)]
-#[serde(untagged)]
-enum FastFieldTextOptionsForSerialization {
-    IsEnabled(bool),
-    EnabledWithTokenizer {
-        #[serde(alias = "with_tokenizer")]
-        tokenizer: TokenizerName,
-    },
-}
-
-impl From<FastFieldTextOptionsForSerialization> for FastFieldTextOptions {
-    fn from(value: FastFieldTextOptionsForSerialization) -> Self {
-        match value {
-            FastFieldTextOptionsForSerialization::IsEnabled(enabled) => {
-                if enabled {
-                    FastFieldTextOptions::Enabled {
-                        tokenizer: TokenizerName::from_static(
-                            crate::schema::DEFAULT_FAST_FIELD_TOKENIZER,
-                        ),
-                    }
-                } else {
-                    FastFieldTextOptions::Disabled
-                }
-            }
-            FastFieldTextOptionsForSerialization::EnabledWithTokenizer { tokenizer } => {
-                FastFieldTextOptions::Enabled { tokenizer }
-            }
-        }
-    }
-}
-
-impl From<FastFieldTextOptions> for FastFieldTextOptionsForSerialization {
-    fn from(value: FastFieldTextOptions) -> Self {
-        match value {
-            FastFieldTextOptions::Disabled => {
-                FastFieldTextOptionsForSerialization::IsEnabled(false)
-            }
-            FastFieldTextOptions::Enabled { tokenizer } => {
-                FastFieldTextOptionsForSerialization::EnabledWithTokenizer { tokenizer }
-            }
-        }
+impl Default for FastFieldTextOptions {
+    fn default() -> Self {
+        FastFieldTextOptions::IsEnabled(false)
    }
 }

@@ -94,13 +45,23 @@ impl BitOr<FastFieldTextOptions> for FastFieldTextOptions {

    fn bitor(self, other: FastFieldTextOptions) -> FastFieldTextOptions {
        match (self, other) {
-            (FastFieldTextOptions::Enabled { tokenizer }, _)
-            | (_, FastFieldTextOptions::Enabled { tokenizer }) => {
-                FastFieldTextOptions::Enabled { tokenizer }
-            }
-            (FastFieldTextOptions::Disabled, FastFieldTextOptions::Disabled) => {
-                FastFieldTextOptions::Disabled
-            }
+            (
+                FastFieldTextOptions::EnabledWithTokenizer {
+                    with_tokenizer: tokenizer,
+                },
+                _,
+            )
+            | (
+                _,
+                FastFieldTextOptions::EnabledWithTokenizer {
+                    with_tokenizer: tokenizer,
+                },
+            ) => FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: tokenizer,
+            },
+            (FastFieldTextOptions::IsEnabled(true), _)
+            | (_, FastFieldTextOptions::IsEnabled(true)) => FastFieldTextOptions::IsEnabled(true),
+            (_, FastFieldTextOptions::IsEnabled(false)) => FastFieldTextOptions::IsEnabled(false),
        }
    }
 }
@@ -122,17 +83,20 @@ impl TextOptions {

    /// Returns true if and only if the value is a fast field.
    pub fn is_fast(&self) -> bool {
-        match &self.fast {
-            FastFieldTextOptions::Disabled => false,
-            FastFieldTextOptions::Enabled { .. } => true,
-        }
+        matches!(self.fast, FastFieldTextOptions::IsEnabled(true))
+            || matches!(
+                &self.fast,
+                FastFieldTextOptions::EnabledWithTokenizer { with_tokenizer: _ }
+            )
    }

    /// Returns true if and only if the value is a fast field.
    pub fn get_fast_field_tokenizer_name(&self) -> Option<&str> {
        match &self.fast {
-            FastFieldTextOptions::Disabled => None,
-            FastFieldTextOptions::Enabled { tokenizer } => Some(tokenizer.name()),
+            FastFieldTextOptions::IsEnabled(true) | FastFieldTextOptions::IsEnabled(false) => None,
+            FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: tokenizer,
+            } => Some(tokenizer.name()),
        }
    }

@@ -157,9 +121,15 @@ impl TextOptions {
    /// [`TermDictionary::ord_to_term()`](crate::termdict::TermDictionary::ord_to_term)
    /// from the dictionary.
    #[must_use]
-    pub fn set_fast(mut self, tokenizer_name: &str) -> TextOptions {
-        let tokenizer = TokenizerName::from_name(tokenizer_name);
-        self.fast = FastFieldTextOptions::Enabled { tokenizer };
+    pub fn set_fast(mut self, tokenizer_name: Option<&str>) -> TextOptions {
+        if let Some(tokenizer) = tokenizer_name {
+            let tokenizer = TokenizerName::from_name(tokenizer);
+            self.fast = FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: tokenizer,
+            }
+        } else {
+            self.fast = FastFieldTextOptions::IsEnabled(true);
+        }
        self
    }

@@ -293,7 +263,7 @@ pub const STRING: TextOptions = TextOptions {
        record: IndexRecordOption::Basic,
    }),
    stored: false,
-    fast: FastFieldTextOptions::Disabled,
+    fast: FastFieldTextOptions::IsEnabled(false),
    coerce: false,
 };

@@ -306,7 +276,7 @@ pub const TEXT: TextOptions = TextOptions {
    }),
    stored: false,
    coerce: false,
-    fast: FastFieldTextOptions::Disabled,
+    fast: FastFieldTextOptions::IsEnabled(false),
 };

 impl<T: Into<TextOptions>> BitOr<T> for TextOptions {
@@ -356,9 +326,7 @@ impl From<FastFlag> for TextOptions {
        TextOptions {
            indexing: None,
            stored: false,
-            fast: FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static(crate::schema::DEFAULT_FAST_FIELD_TOKENIZER),
-            },
+            fast: FastFieldTextOptions::IsEnabled(true),
            coerce: false,
        }
    }
@@ -424,21 +392,21 @@ mod tests {
    #[test]
    fn serde_fast_field_tokenizer() {
        let json = r#" {
-            "fast": { "tokenizer": "default" }
+            "fast": { "with_tokenizer": "default" }
        } "#;
        let options: TextOptions = serde_json::from_str(json).unwrap();
        assert_eq!(
            options.fast,
-            FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static("default")
+            FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: TokenizerName::from_static("default")
            }
        );
        let options: TextOptions =
            serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
        assert_eq!(
            options.fast,
-            FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static("default")
+            FastFieldTextOptions::EnabledWithTokenizer {
+                with_tokenizer: TokenizerName::from_static("default")
            }
        );

@@ -446,28 +414,18 @@ mod tests {
            "fast": true
        } "#;
        let options: TextOptions = serde_json::from_str(json).unwrap();
-        assert_eq!(
-            options.fast,
-            FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static(DEFAULT_FAST_FIELD_TOKENIZER)
-            }
-        );
+        assert_eq!(options.fast, FastFieldTextOptions::IsEnabled(true));
        let options: TextOptions =
            serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
-        assert_eq!(
-            options.fast,
-            FastFieldTextOptions::Enabled {
-                tokenizer: TokenizerName::from_static(DEFAULT_FAST_FIELD_TOKENIZER)
-            }
-        );
+        assert_eq!(options.fast, FastFieldTextOptions::IsEnabled(true));

        let json = r#" {
            "fast": false
        } "#;
        let options: TextOptions = serde_json::from_str(json).unwrap();
-        assert_eq!(options.fast, FastFieldTextOptions::Disabled);
+        assert_eq!(options.fast, FastFieldTextOptions::IsEnabled(false));
        let options: TextOptions =
            serde_json::from_str(&serde_json::to_string(&options).unwrap()).unwrap();
-        assert_eq!(options.fast, FastFieldTextOptions::Disabled);
+        assert_eq!(options.fast, FastFieldTextOptions::IsEnabled(false));
    }
 }
--- a/src/store/compression_brotli.rs
+++ b/src/store/compression_brotli.rs
@@ -0,0 +1,19 @@
+use std::io;
+
+#[inline]
+pub fn compress(mut uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
+    let params = brotli::enc::BrotliEncoderParams {
+        quality: 5,
+        ..Default::default()
+    };
+    compressed.clear();
+    brotli::BrotliCompress(&mut uncompressed, compressed, &params)?;
+    Ok(())
+}
+
+#[inline]
+pub fn decompress(mut compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
+    decompressed.clear();
+    brotli::BrotliDecompress(&mut compressed, decompressed)?;
+    Ok(())
+}
--- a/src/store/compression_snap.rs
+++ b/src/store/compression_snap.rs
@@ -0,0 +1,17 @@
+use std::io::{self, Read, Write};
+
+#[inline]
+pub fn compress(uncompressed: &[u8], compressed: &mut Vec<u8>) -> io::Result<()> {
+    compressed.clear();
+    let mut encoder = snap::write::FrameEncoder::new(compressed);
+    encoder.write_all(uncompressed)?;
+    encoder.flush()?;
+    Ok(())
+}
+
+#[inline]
+pub fn decompress(compressed: &[u8], decompressed: &mut Vec<u8>) -> io::Result<()> {
+    decompressed.clear();
+    snap::read::FrameDecoder::new(compressed).read_to_end(decompressed)?;
+    Ok(())
+}
--- a/src/store/compressors.rs
+++ b/src/store/compressors.rs
@@ -17,10 +17,12 @@ pub enum Compressor {
    /// No compression
    None,
    /// Use the lz4 compressor (block format)
-    #[cfg(feature = "lz4-compression")]
    Lz4,
+    /// Use the brotli compressor
+    Brotli,
+    /// Use the snap compressor
+    Snappy,
    /// Use the zstd compressor
-    #[cfg(feature = "zstd-compression")]
    Zstd(ZstdCompressor),
 }

@@ -29,9 +31,9 @@ impl Serialize for Compressor {
    where S: serde::Serializer {
        match *self {
            Compressor::None => serializer.serialize_str("none"),
-            #[cfg(feature = "lz4-compression")]
            Compressor::Lz4 => serializer.serialize_str("lz4"),
-            #[cfg(feature = "zstd-compression")]
+            Compressor::Brotli => serializer.serialize_str("brotli"),
+            Compressor::Snappy => serializer.serialize_str("snappy"),
            Compressor::Zstd(zstd) => serializer.serialize_str(&zstd.ser_to_string()),
        }
    }
@@ -43,38 +45,27 @@ impl<'de> Deserialize<'de> for Compressor {
        let buf = String::deserialize(deserializer)?;
        let compressor = match buf.as_str() {
            "none" => Compressor::None,
-            #[cfg(feature = "lz4-compression")]
            "lz4" => Compressor::Lz4,
-            #[cfg(not(feature = "lz4-compression"))]
-            "lz4" => {
-                return Err(serde::de::Error::custom(
-                    "unsupported variant `lz4`, please enable Tantivy's `lz4-compression` feature",
-                ))
-            }
-            #[cfg(feature = "zstd-compression")]
-            _ if buf.starts_with("zstd") => Compressor::Zstd(
-                ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?,
-            ),
-            #[cfg(not(feature = "zstd-compression"))]
-            _ if buf.starts_with("zstd") => {
-                return Err(serde::de::Error::custom(
-                    "unsupported variant `zstd`, please enable Tantivy's `zstd-compression` \
-                     feature",
-                ))
-            }
+            "brotli" => Compressor::Brotli,
+            "snappy" => Compressor::Snappy,
            _ => {
-                return Err(serde::de::Error::unknown_variant(
-                    &buf,
-                    &[
-                        "none",
-                        #[cfg(feature = "lz4-compression")]
-                        "lz4",
-                        #[cfg(feature = "zstd-compression")]
-                        "zstd",
-                        #[cfg(feature = "zstd-compression")]
-                        "zstd(compression_level=5)",
-                    ],
-                ));
+                if buf.starts_with("zstd") {
+                    Compressor::Zstd(
+                        ZstdCompressor::deser_from_str(&buf).map_err(serde::de::Error::custom)?,
+                    )
+                } else {
+                    return Err(serde::de::Error::unknown_variant(
+                        &buf,
+                        &[
+                            "none",
+                            "lz4",
+                            "brotli",
+                            "snappy",
+                            "zstd",
+                            "zstd(compression_level=5)",
+                        ],
+                    ));
+                }
            }
        };

@@ -136,15 +127,18 @@ impl ZstdCompressor {
 }

 impl Default for Compressor {
-    #[allow(unreachable_code)]
    fn default() -> Self {
-        #[cfg(feature = "lz4-compression")]
-        return Compressor::Lz4;
-
-        #[cfg(feature = "zstd-compression")]
-        return Compressor::Zstd(ZstdCompressor::default());
-
-        Compressor::None
+        if cfg!(feature = "lz4-compression") {
+            Compressor::Lz4
+        } else if cfg!(feature = "brotli-compression") {
+            Compressor::Brotli
+        } else if cfg!(feature = "snappy-compression") {
+            Compressor::Snappy
+        } else if cfg!(feature = "zstd-compression") {
+            Compressor::Zstd(ZstdCompressor::default())
+        } else {
+            Compressor::None
+        }
    }
 }

@@ -161,14 +155,50 @@ impl Compressor {
                compressed.extend_from_slice(uncompressed);
                Ok(())
            }
-            #[cfg(feature = "lz4-compression")]
-            Self::Lz4 => super::compression_lz4_block::compress(uncompressed, compressed),
-            #[cfg(feature = "zstd-compression")]
-            Self::Zstd(_zstd_compressor) => super::compression_zstd_block::compress(
-                uncompressed,
-                compressed,
-                _zstd_compressor.compression_level,
-            ),
+            Self::Lz4 => {
+                #[cfg(feature = "lz4-compression")]
+                {
+                    super::compression_lz4_block::compress(uncompressed, compressed)
+                }
+                #[cfg(not(feature = "lz4-compression"))]
+                {
+                    panic!("lz4-compression feature flag not activated");
+                }
+            }
+            Self::Brotli => {
+                #[cfg(feature = "brotli-compression")]
+                {
+                    super::compression_brotli::compress(uncompressed, compressed)
+                }
+                #[cfg(not(feature = "brotli-compression"))]
+                {
+                    panic!("brotli-compression-compression feature flag not activated");
+                }
+            }
+            Self::Snappy => {
+                #[cfg(feature = "snappy-compression")]
+                {
+                    super::compression_snap::compress(uncompressed, compressed)
+                }
+                #[cfg(not(feature = "snappy-compression"))]
+                {
+                    panic!("snappy-compression feature flag not activated");
+                }
+            }
+            Self::Zstd(_zstd_compressor) => {
+                #[cfg(feature = "zstd-compression")]
+                {
+                    super::compression_zstd_block::compress(
+                        uncompressed,
+                        compressed,
+                        _zstd_compressor.compression_level,
+                    )
+                }
+                #[cfg(not(feature = "zstd-compression"))]
+                {
+                    panic!("zstd-compression feature flag not activated");
+                }
+            }
        }
    }
 }
--- a/src/store/decompressors.rs
+++ b/src/store/decompressors.rs
@@ -16,10 +16,12 @@ pub enum Decompressor {
    /// No compression
    None,
    /// Use the lz4 decompressor (block format)
-    #[cfg(feature = "lz4-compression")]
    Lz4,
+    /// Use the brotli decompressor
+    Brotli,
+    /// Use the snap decompressor
+    Snappy,
    /// Use the zstd decompressor
-    #[cfg(feature = "zstd-compression")]
    Zstd,
 }

@@ -27,9 +29,9 @@ impl From<Compressor> for Decompressor {
    fn from(compressor: Compressor) -> Self {
        match compressor {
            Compressor::None => Decompressor::None,
-            #[cfg(feature = "lz4-compression")]
            Compressor::Lz4 => Decompressor::Lz4,
-            #[cfg(feature = "zstd-compression")]
+            Compressor::Brotli => Decompressor::Brotli,
+            Compressor::Snappy => Decompressor::Snappy,
            Compressor::Zstd(_) => Decompressor::Zstd,
        }
    }
@@ -39,9 +41,9 @@ impl Decompressor {
    pub(crate) fn from_id(id: u8) -> Decompressor {
        match id {
            0 => Decompressor::None,
-            #[cfg(feature = "lz4-compression")]
            1 => Decompressor::Lz4,
-            #[cfg(feature = "zstd-compression")]
+            2 => Decompressor::Brotli,
+            3 => Decompressor::Snappy,
            4 => Decompressor::Zstd,
            _ => panic!("unknown compressor id {id:?}"),
        }
@@ -50,9 +52,9 @@ impl Decompressor {
    pub(crate) fn get_id(&self) -> u8 {
        match self {
            Self::None => 0,
-            #[cfg(feature = "lz4-compression")]
            Self::Lz4 => 1,
-            #[cfg(feature = "zstd-compression")]
+            Self::Brotli => 2,
+            Self::Snappy => 3,
            Self::Zstd => 4,
        }
    }
@@ -75,10 +77,46 @@ impl Decompressor {
                decompressed.extend_from_slice(compressed);
                Ok(())
            }
-            #[cfg(feature = "lz4-compression")]
-            Self::Lz4 => super::compression_lz4_block::decompress(compressed, decompressed),
-            #[cfg(feature = "zstd-compression")]
-            Self::Zstd => super::compression_zstd_block::decompress(compressed, decompressed),
+            Self::Lz4 => {
+                #[cfg(feature = "lz4-compression")]
+                {
+                    super::compression_lz4_block::decompress(compressed, decompressed)
+                }
+                #[cfg(not(feature = "lz4-compression"))]
+                {
+                    panic!("lz4-compression feature flag not activated");
+                }
+            }
+            Self::Brotli => {
+                #[cfg(feature = "brotli-compression")]
+                {
+                    super::compression_brotli::decompress(compressed, decompressed)
+                }
+                #[cfg(not(feature = "brotli-compression"))]
+                {
+                    panic!("brotli-compression feature flag not activated");
+                }
+            }
+            Self::Snappy => {
+                #[cfg(feature = "snappy-compression")]
+                {
+                    super::compression_snap::decompress(compressed, decompressed)
+                }
+                #[cfg(not(feature = "snappy-compression"))]
+                {
+                    panic!("snappy-compression feature flag not activated");
+                }
+            }
+            Self::Zstd => {
+                #[cfg(feature = "zstd-compression")]
+                {
+                    super::compression_zstd_block::decompress(compressed, decompressed)
+                }
+                #[cfg(not(feature = "zstd-compression"))]
+                {
+                    panic!("zstd-compression feature flag not activated");
+                }
+            }
        }
    }
 }
@@ -91,9 +129,9 @@ mod tests {
    #[test]
    fn compressor_decompressor_id_test() {
        assert_eq!(Decompressor::from(Compressor::None), Decompressor::None);
-        #[cfg(feature = "lz4-compression")]
        assert_eq!(Decompressor::from(Compressor::Lz4), Decompressor::Lz4);
-        #[cfg(feature = "zstd-compression")]
+        assert_eq!(Decompressor::from(Compressor::Brotli), Decompressor::Brotli);
+        assert_eq!(Decompressor::from(Compressor::Snappy), Decompressor::Snappy);
        assert_eq!(
            Decompressor::from(Compressor::Zstd(Default::default())),
            Decompressor::Zstd
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -4,8 +4,8 @@
 //! order to be handled in the `Store`.
 //!
 //! Internally, documents (or rather their stored fields) are serialized to a buffer.
-//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed
-//! using LZ4 or Zstd and the resulting block is written to disk.
+//! When the buffer exceeds `block_size` (defaults to 16K), the buffer is compressed using `brotli`,
+//! `LZ4` or `snappy` and the resulting block is written to disk.
 //!
 //! One can then request for a specific `DocId`.
 //! A skip list helps navigating to the right block,
@@ -48,6 +48,12 @@ pub(crate) const DOC_STORE_VERSION: u32 = 1;
 #[cfg(feature = "lz4-compression")]
 mod compression_lz4_block;

+#[cfg(feature = "brotli-compression")]
+mod compression_brotli;
+
+#[cfg(feature = "snappy-compression")]
+mod compression_snap;
+
 #[cfg(feature = "zstd-compression")]
 mod compression_zstd_block;

@@ -194,6 +200,16 @@ pub mod tests {
    fn test_store_lz4_block() -> crate::Result<()> {
        test_store(Compressor::Lz4, BLOCK_SIZE, true)
    }
+    #[cfg(feature = "snappy-compression")]
+    #[test]
+    fn test_store_snap() -> crate::Result<()> {
+        test_store(Compressor::Snappy, BLOCK_SIZE, true)
+    }
+    #[cfg(feature = "brotli-compression")]
+    #[test]
+    fn test_store_brotli() -> crate::Result<()> {
+        test_store(Compressor::Brotli, BLOCK_SIZE, true)
+    }

    #[cfg(feature = "zstd-compression")]
    #[test]
@@ -245,8 +261,8 @@ pub mod tests {
        Ok(())
    }

+    #[cfg(feature = "snappy-compression")]
    #[cfg(feature = "lz4-compression")]
-    #[cfg(feature = "zstd-compression")]
    #[test]
    fn test_merge_with_changed_compressor() -> crate::Result<()> {
        let mut schema_builder = schema::Schema::builder();
@@ -278,7 +294,7 @@ pub mod tests {
        );
        // Change compressor, this disables stacking on merging
        let index_settings = index.settings_mut();
-        index_settings.docstore_compression = Compressor::Zstd(Default::default());
+        index_settings.docstore_compression = Compressor::Snappy;
        // Merging the segments
        {
            let segment_ids = index
@@ -300,7 +316,7 @@ pub mod tests {
                LOREM.to_string()
            );
        }
-        assert_eq!(store.decompressor(), Decompressor::Zstd);
+        assert_eq!(store.decompressor(), Decompressor::Snappy);

        Ok(())
    }
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -189,7 +189,7 @@ pub mod tests {

    #[test]
    fn test_raw_tokenizer2() {
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        let mut en_tokenizer = tokenizer_manager.get("raw").unwrap();
        let mut tokens: Vec<Token> = vec![];
        {
@@ -206,7 +206,7 @@ pub mod tests {

    #[test]
    fn test_en_tokenizer() {
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        assert!(tokenizer_manager.get("en_doesnotexist").is_none());
        let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
        let mut tokens: Vec<Token> = vec![];
@@ -228,7 +228,7 @@ pub mod tests {

    #[test]
    fn test_non_en_tokenizer() {
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        tokenizer_manager.register(
            "el_stem",
            TextAnalyzer::builder(SimpleTokenizer::default())
@@ -256,7 +256,7 @@ pub mod tests {

    #[test]
    fn test_tokenizer_empty() {
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        let mut en_tokenizer = tokenizer_manager.get("en_stem").unwrap();
        {
            let mut tokens: Vec<Token> = vec![];
@@ -282,7 +282,7 @@ pub mod tests {

    #[test]
    fn test_whitespace_tokenizer() {
-        let tokenizer_manager = TokenizerManager::default_for_indexing();
+        let tokenizer_manager = TokenizerManager::default();
        let mut ws_tokenizer = tokenizer_manager.get("whitespace").unwrap();
        let mut tokens: Vec<Token> = vec![];
        {
--- a/src/tokenizer/regex_tokenizer.rs
+++ b/src/tokenizer/regex_tokenizer.rs
@@ -49,6 +49,7 @@ use crate::TantivyError;
 pub struct RegexTokenizer {
    regex: Regex,
    token: Token,
+    group: usize,
 }

 impl RegexTokenizer {
@@ -59,6 +60,7 @@ impl RegexTokenizer {
            .map(|regex| Self {
                regex,
                token: Token::default(),
+                group: 0,
            })
    }
 }
--- a/src/tokenizer/split_compound_words.rs
+++ b/src/tokenizer/split_compound_words.rs
@@ -86,8 +86,6 @@ impl TokenFilter for SplitCompoundWords {
        SplitCompoundWordsFilter {
            dict: self.dict,
            inner: tokenizer,
-            cuts: Vec::new(),
-            parts: Vec::new(),
        }
    }
 }
@@ -96,33 +94,29 @@ impl TokenFilter for SplitCompoundWords {
 pub struct SplitCompoundWordsFilter<T> {
    dict: AhoCorasick,
    inner: T,
-    cuts: Vec<usize>,
-    parts: Vec<Token>,
 }

 impl<T: Tokenizer> Tokenizer for SplitCompoundWordsFilter<T> {
-    type TokenStream<'a> = SplitCompoundWordsTokenStream<'a, T::TokenStream<'a>>;
+    type TokenStream<'a> = SplitCompoundWordsTokenStream<T::TokenStream<'a>>;

    fn token_stream<'a>(&'a mut self, text: &'a str) -> Self::TokenStream<'a> {
-        self.cuts.clear();
-        self.parts.clear();
        SplitCompoundWordsTokenStream {
            dict: self.dict.clone(),
            tail: self.inner.token_stream(text),
-            cuts: &mut self.cuts,
-            parts: &mut self.parts,
+            cuts: Vec::new(),
+            parts: Vec::new(),
        }
    }
 }

-pub struct SplitCompoundWordsTokenStream<'a, T> {
+pub struct SplitCompoundWordsTokenStream<T> {
    dict: AhoCorasick,
    tail: T,
-    cuts: &'a mut Vec<usize>,
-    parts: &'a mut Vec<Token>,
+    cuts: Vec<usize>,
+    parts: Vec<Token>,
 }

-impl<'a, T: TokenStream> SplitCompoundWordsTokenStream<'a, T> {
+impl<T: TokenStream> SplitCompoundWordsTokenStream<T> {
    // Will use `self.cuts` to fill `self.parts` if `self.tail.token()`
    // can fully be split into consecutive matches against `self.dict`.
    fn split(&mut self) {
@@ -158,7 +152,7 @@ impl<'a, T: TokenStream> SplitCompoundWordsTokenStream<'a, T> {
    }
 }

-impl<'a, T: TokenStream> TokenStream for SplitCompoundWordsTokenStream<'a, T> {
+impl<T: TokenStream> TokenStream for SplitCompoundWordsTokenStream<T> {
    fn advance(&mut self) -> bool {
        self.parts.pop();

--- a/src/tokenizer/tokenizer_manager.rs
+++ b/src/tokenizer/tokenizer_manager.rs
@@ -27,7 +27,6 @@ pub struct TokenizerManager {

 impl TokenizerManager {
    /// Creates an empty tokenizer manager.
-    #[allow(clippy::new_without_default)]
    pub fn new() -> Self {
        Self {
            tokenizers: Arc::new(RwLock::new(HashMap::new())),
@@ -52,10 +51,12 @@ impl TokenizerManager {
            .get(tokenizer_name)
            .cloned()
    }
+}

+impl Default for TokenizerManager {
    /// Creates an `TokenizerManager` prepopulated with
    /// the default pre-configured tokenizers of `tantivy`.
-    pub fn default_for_indexing() -> TokenizerManager {
+    fn default() -> TokenizerManager {
        let manager = TokenizerManager::new();
        manager.register("raw", RawTokenizer::default());
        manager.register(
@@ -76,28 +77,4 @@ impl TokenizerManager {
        manager.register("whitespace", WhitespaceTokenizer::default());
        manager
    }
-
-    /// Creates an `TokenizerManager` prepopulated with
-    /// the default pre-configured tokenizers of `tantivy`
-    /// for fast fields.
-    ///
-    /// Fast fields usually do not really tokenize the text.
-    /// It is however very useful to filter / normalize the text.
-    pub fn default_for_fast_fields() -> TokenizerManager {
-        let manager = TokenizerManager::new();
-        let raw_tokenizer = TextAnalyzer::builder(RawTokenizer::default())
-            .filter(RemoveLongFilter::limit(255))
-            .build();
-        let lower_tokenizer = TextAnalyzer::builder(RawTokenizer::default())
-            .filter(RemoveLongFilter::limit(255))
-            .filter(LowerCaser)
-            .build();
-        manager.register(
-            crate::schema::DEFAULT_FAST_FIELD_TOKENIZER,
-            lower_tokenizer.clone(),
-        );
-        manager.register("raw", raw_tokenizer);
-        manager.register("lower", lower_tokenizer);
-        manager
-    }
 }