Support for columnar

Support for NotNaN in fast fields
Minor refactoring
2026-06-15 06:50:41 +00:00 · 2022-12-21 12:21:30 +09:00 · 2022-12-21 12:20:48 +09:00 · 2022-12-21 12:18:33 +09:00 · 2022-12-21 12:16:00 +09:00 · 2022-12-20 15:30:33 +01:00
276 changed files with 19408 additions and 5872 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-cpp/* linguist-vendored
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -12,12 +12,14 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Install Rust
-        run: rustup toolchain install nightly --component llvm-tools-preview
+        run: rustup toolchain install nightly --profile minimal --component llvm-tools-preview
+      - uses: Swatinem/rust-cache@v2
      - uses: taiki-e/install-action@cargo-llvm-cov
      - name: Generate code coverage
        run: cargo +nightly llvm-cov --all-features --workspace --lcov --output-path lcov.info
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v3
+        continue-on-error: true
        with:
          token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos
          files: lcov.info
--- a/.github/workflows/long_running.yml
+++ b/.github/workflows/long_running.yml
@@ -19,11 +19,10 @@ jobs:
      uses: actions-rs/toolchain@v1
      with:
          toolchain: stable
+          profile: minimal
          override: true
-          components: rustfmt, clippy

    - name: Run indexing_unsorted
      run: cargo test indexing_unsorted -- --ignored
    - name: Run indexing_sorted
      run: cargo test indexing_sorted -- --ignored
-
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -10,34 +10,27 @@ env:
  CARGO_TERM_COLOR: always

 jobs:
-  test:
+  check:

    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3
-    - name: Install latest nightly to test also against unstable feature flag
+
+    - name: Install nightly
      uses: actions-rs/toolchain@v1
      with:
            toolchain: nightly
-            override: true
+            profile: minimal
            components: rustfmt
-
    - name: Install stable
      uses: actions-rs/toolchain@v1
      with:
            toolchain: stable
-            override: true
-            components: rustfmt, clippy
+            profile: minimal
+            components: clippy

-    - name: Build
-      run: cargo build --verbose --workspace
-
-    - name: Run tests
-      run: cargo +stable test --features mmap,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints --verbose --workspace
-
-    - name: Run tests quickwit feature
-      run: cargo +stable test --features mmap,quickwit,failpoints --verbose --workspace
+    - uses: Swatinem/rust-cache@v2

    - name: Check Formatting
      run: cargo +nightly fmt --all -- --check
@@ -48,3 +41,34 @@ jobs:
        token: ${{ secrets.GITHUB_TOKEN }}
        args: --tests

+  test:
+
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        features: [
+            { label: "all", flags: "mmap,stopwords,brotli-compression,lz4-compression,snappy-compression,zstd-compression,failpoints" },
+            { label: "quickwit", flags: "mmap,quickwit,failpoints" }
+        ]
+
+    name: test-${{ matrix.features.label}}
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Install stable
+      uses: actions-rs/toolchain@v1
+      with:
+            toolchain: stable
+            profile: minimal
+            override: true
+
+    - uses: taiki-e/install-action@nextest
+    - uses: Swatinem/rust-cache@v2
+
+    - name: Run tests
+      run: cargo +stable nextest run --features ${{ matrix.features.flags }} --verbose --workspace
+
+    - name: Run doctests
+      run: cargo +stable test --doc --features ${{ matrix.features.flags }} --verbose --workspace
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,6 @@ target/release
 Cargo.lock
 benchmark
 .DS_Store
-cpp/simdcomp/bitpackingbenchmark
 *.bk
 .idea
 trace.dat
--- a/ARCHITECTURE.md
+++ b/ARCHITECTURE.md
@@ -10,6 +10,7 @@ Tantivy's bread and butter is to address the problem of full-text search :
 Given a large set of textual documents, and a text query, return the K-most relevant documents in a very efficient way. To execute these queries rapidly, the tantivy needs to build an index beforehand. The relevance score implemented in the tantivy is not configurable. Tantivy uses the same score as the default similarity used in Lucene / Elasticsearch, called [BM25](https://en.wikipedia.org/wiki/Okapi_BM25).

 But tantivy's scope does not stop there. Numerous features are required to power rich-search applications. For instance, one may want to:
+
 - compute the count of documents matching a query in the different section of an e-commerce website,
 - display an average price per meter square for a real estate search engine,
 - take into account historical user data to rank documents in a specific way,
@@ -22,27 +23,28 @@ rapidly select all documents matching a given predicate (also known as a query)
 collect some information about them ([See collector](#collector-define-what-to-do-with-matched-documents)).

 Roughly speaking the design is following these guiding principles:
+
 - Search should be O(1) in memory.
 - Indexing should be O(1) in memory. (In practice it is just sublinear)
 - Search should be as fast as possible

 This comes at the cost of the dynamicity of the index: while it is possible to add, and delete documents from our corpus, the tantivy is designed to handle these updates in large batches.

-## [core/](src/core): Index, segments, searchers.
+## [core/](src/core): Index, segments, searchers

 Core contains all of the high-level code to make it possible to create an index, add documents, delete documents and commit.

 This is both the most high-level part of tantivy, the least performance-sensitive one, the seemingly most mundane code... And paradoxically the most complicated part.

-### Index and Segments...
+### Index and Segments

-A tantivy index is a collection of smaller independent immutable segments. 
+A tantivy index is a collection of smaller independent immutable segments.
 Each segment contains its own independent set of data structures.

 A segment is identified by a segment id that is in fact a UUID.
 The file of a segment has the format

- ```segment-id . ext ```
+ ```segment-id . ext```

 The extension signals which data structure (or [`SegmentComponent`](src/core/segment_component.rs)) is stored in the file.

@@ -52,17 +54,15 @@ On commit, one segment per indexing thread is written to disk, and the `meta.jso

 For a better idea of how indexing works, you may read the [following blog post](https://fulmicoton.com/posts/behold-tantivy-part2/).

-
 ### Deletes

 Deletes happen by deleting a "term". Tantivy does not offer any notion of primary id, so it is up to the user to use a field in their schema as if it was a primary id, and delete the associated term if they want to delete only one specific document.

 On commit, tantivy will find all of the segments with documents matching this existing term and remove from [alive bitset file](src/fastfield/alive_bitset.rs) that represents the bitset of the alive document ids.
-Like all segment files, this file is immutable. Because it is possible to have more than one alive bitset file at a given instant, the alive bitset filename has the format ``` segment_id . commit_opstamp . del```.
+Like all segment files, this file is immutable. Because it is possible to have more than one alive bitset file at a given instant, the alive bitset filename has the format ```segment_id . commit_opstamp . del```.

 An opstamp is simply an incremental id that identifies any operation applied to the index. For instance, performing a commit or adding a document.

-
 ### DocId

 Within a segment, all documents are identified by a DocId that ranges within `[0, max_doc)`.
@@ -74,6 +74,7 @@ The DocIds are simply allocated in the order documents are added to the index.

 In separate threads, tantivy's index writer search for opportunities to merge segments.
 The point of segment merge is to:
+
 - eventually get rid of tombstoned documents
 - reduce the otherwise ever-growing number of segments.

@@ -94,7 +95,7 @@ called [`Directory`](src/directory/directory.rs).
 Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object.
 Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details.

-Tantivy ships two main directory implementation: the `MMapDirectory` and the `RAMDirectory`,
+Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`,
 but users can extend tantivy with their own implementation.

 ## [schema/](src/schema): What are documents?
@@ -104,6 +105,7 @@ Tantivy's document follows a very strict schema, decided before building any ind
 The schema defines all of the fields that the indexes [`Document`](src/schema/document.rs) may and should contain, their types (`text`, `i64`, `u64`, `Date`, ...) as well as how it should be indexed / represented in tantivy.

 Depending on the type of the field, you can decide to
+
 - put it in the docstore
 - store it as a fast field
 - index it
@@ -117,9 +119,10 @@ As of today, tantivy's schema imposes a 1:1 relationship between a field that is

 This is not something tantivy supports, and it is up to the user to duplicate field / concatenate fields before feeding them to tantivy.

-## General information about these data structures.
+## General information about these data structures

 All data structures in tantivy, have:
+
 - a writer
 - a serializer
 - a reader
@@ -132,7 +135,7 @@ This conversion is done by the serializer.
 Finally, the reader is in charge of offering an API to read on this on-disk read-only representation.
 In tantivy, readers are designed to require very little anonymous memory. The data is read straight from an mmapped file, and loading an index is as fast as mmapping its files.

-## [store/](src/store): Here is my DocId, Gimme my document!
+## [store/](src/store): Here is my DocId, Gimme my document

 The docstore is a row-oriented storage that, for each document, stores a subset of the fields
 that are marked as stored in the schema. The docstore is compressed using a general-purpose algorithm
@@ -146,6 +149,7 @@ Once the top 10 documents have been identified, we fetch them from the store, an
 **Not useful for**

 Fetching a document from the store is typically a "slow" operation. It usually consists in
+
 - searching into a compact tree-like data structure to find the position of the right block.
 - decompressing a small block
 - returning the document from this block.
@@ -154,8 +158,7 @@ It is NOT meant to be called for every document matching a query.

 As a rule of thumb, if you hit the docstore more than 100 times per search query, you are probably misusing tantivy.

-
-## [fastfield/](src/fastfield): Here is my DocId, Gimme my value!
+## [fastfield/](src/fastfield): Here is my DocId, Gimme my value

 Fast fields are stored in a column-oriented storage that allows for random access.
 The only compression applied is bitpacking. The column comes with two meta data.
@@ -163,7 +166,7 @@ The minimum value in the column and the number of bits per doc.

 Fetching a value for a `DocId` is then as simple as computing

-```
+```rust
 min_value + fetch_bits(num_bits * doc_id..num_bits * (doc_id+1))
 ```

@@ -190,7 +193,7 @@ For advanced search engine, it is possible to store all of the features required

 Finally facets are a specific kind of fast field, and the associated source code is in [`fastfield/facet_reader.rs`](src/fastfield/facet_reader.rs).

-# The inverted search index.
+# The inverted search index

 The inverted index is the core part of full-text search.
 When presented a new document with the text field "Hello, happy tax payer!", tantivy breaks it into a list of so-called tokens. In addition to just splitting these strings into tokens, it might also do different kinds of operations like dropping the punctuation, converting the character to lowercase, apply stemming, etc. Tantivy makes it possible to configure the operations to be applied in the schema (tokenizer/ is the place where these operations are implemented).
@@ -215,19 +218,18 @@ The inverted index actually consists of two data structures chained together.

 Where [TermInfo](src/postings/term_info.rs) is an object containing some meta data about a term.

-
-## [termdict/](src/termdict): Here is a term, give me the [TermInfo](src/postings/term_info.rs)!
+## [termdict/](src/termdict): Here is a term, give me the [TermInfo](src/postings/term_info.rs)

 Tantivy's term dictionary is mainly in charge of supplying the function

 [Term](src/schema/term.rs) ⟶ [TermInfo](src/postings/term_info.rs)

 It is itself broken into two parts.
+
 - [Term](src/schema/term.rs) ⟶ [TermOrdinal](src/termdict/mod.rs) is addressed by a finite state transducer, implemented by the fst crate.
 - [TermOrdinal](src/termdict/mod.rs) ⟶ [TermInfo](src/postings/term_info.rs) is addressed by the term info store.

-
-## [postings/](src/postings): Iterate over documents... very fast!
+## [postings/](src/postings): Iterate over documents... very fast

 A posting list makes it possible to store a sorted list of doc ids and for each doc store
 a term frequency as well.
@@ -257,7 +259,6 @@ we advance the position reader by the number of term frequencies of the current
 The [BM25](https://en.wikipedia.org/wiki/Okapi_BM25) formula also requires to know the number of tokens stored in a specific field for a given document. We store this information on one byte per document in the fieldnorm.
 The fieldnorm is therefore compressed. Values up to 40 are encoded unchanged.

-
 ## [tokenizer/](src/tokenizer): How should we process text?

 Text processing is key to a good search experience.
@@ -268,7 +269,6 @@ Text processing can be configured by selecting an off-the-shelf [`Tokenizer`](./

 Tantivy's comes with few tokenizers, but external crates are offering advanced tokenizers, such as [Lindera](https://crates.io/crates/lindera) for Japanese.

-
 ## [query/](src/query): Define and compose queries

 The [Query](src/query/query.rs) trait defines what a query is.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,12 +1,41 @@
 Tantivy 0.19
 ================================
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
-  The `DateTime` type has been updated to hold timestamps with microseconds precision.
-  `DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing).
- Remove Searcher pool and make `Searcher` cloneable.
+#### Bugfixes
+- Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz)
+- Fix interpolation overflow in linear interpolation fastfield codec [#1480](https://github.com/quickwit-oss/tantivy/pull/1480) (@PSeitz @fulmicoton)
+
+#### Features/Improvements
+- Add support for `IN` in queryparser , e.g. `field: IN [val1 val2 val3]` [#1683](https://github.com/quickwit-oss/tantivy/pull/1683) (@trinity-1686a)
+- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz)
+- Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz)
+- The `DateTime` type has been updated to hold timestamps with microseconds precision.
+  `DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing) [#1396](https://github.com/quickwit-oss/tantivy/pull/1396) (@evanxg852000)
+- Add IP address field type [#1553](https://github.com/quickwit-oss/tantivy/pull/1553) (@PSeitz)
+- Add boolean field type [#1382](https://github.com/quickwit-oss/tantivy/pull/1382) (@boraarslan)
+- Remove Searcher pool and make `Searcher` cloneable. (@PSeitz)
+- Validate settings on create [#1570](https://github.com/quickwit-oss/tantivy/pull/1570) (@PSeitz)
+- Detect and apply gcd on fastfield codecs [#1418](https://github.com/quickwit-oss/tantivy/pull/1418) (@PSeitz)
+- Doc store
+  - use separate thread to compress block store [#1389](https://github.com/quickwit-oss/tantivy/pull/1389) [#1510](https://github.com/quickwit-oss/tantivy/pull/1510) (@PSeitz @fulmicoton)
+  - Expose doc store cache size [#1403](https://github.com/quickwit-oss/tantivy/pull/1403) (@PSeitz)
+  - Enable compression levels for doc store [#1378](https://github.com/quickwit-oss/tantivy/pull/1378) (@PSeitz)
+  - Make block size configurable [#1374](https://github.com/quickwit-oss/tantivy/pull/1374) (@kryesh)
+- Make `tantivy::TantivyError` cloneable [#1402](https://github.com/quickwit-oss/tantivy/pull/1402) (@PSeitz)
+- Add support for phrase slop in query language [#1393](https://github.com/quickwit-oss/tantivy/pull/1393) (@saroh)
+- Aggregation
+  - Add aggregation support for date type [#1693](https://github.com/quickwit-oss/tantivy/pull/1693)(@PSeitz)
+  - Add support for keyed parameter in range and histgram aggregations [#1424](https://github.com/quickwit-oss/tantivy/pull/1424) (@k-yomo)
+  - Add aggregation bucket limit [#1363](https://github.com/quickwit-oss/tantivy/pull/1363) (@PSeitz)
+- Faster indexing
+  - [#1610](https://github.com/quickwit-oss/tantivy/pull/1610) (@PSeitz)
+  - [#1594](https://github.com/quickwit-oss/tantivy/pull/1594) (@PSeitz)
+  - [#1582](https://github.com/quickwit-oss/tantivy/pull/1582) (@PSeitz)
+  - [#1611](https://github.com/quickwit-oss/tantivy/pull/1611) (@PSeitz)
+  - Added a pre-configured stop word filter for various language [#1666](https://github.com/quickwit-oss/tantivy/pull/1666) (@adamreichold)

 Tantivy 0.18
 ================================
+
 - For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
  - The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
  - The type alias `tantivy::DateTime` has been removed.
@@ -20,8 +49,13 @@ Tantivy 0.18
 - Add terms aggregation (@PSeitz)
 - Add support for zstd compression (@kryesh)

+Tantivy 0.18.1
+================================
+- Hotfix: positions computation.  #1629 (@fmassot, @fulmicoton, @PSeitz)
+
 Tantivy 0.17
 ================================
+
 - LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)
 - Adds a searcher Warmer API (@shikhar @fulmicoton)
 - Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
@@ -36,33 +70,39 @@ Tantivy 0.17

 Tantivy 0.16.2
 ================================
+
 - Bugfix in FuzzyTermQuery. (transposition_cost_one was not doing anything)

 Tantivy 0.16.1
 ========================
+
 - Major Bugfix on multivalued fastfield.  #1151
 - Demux operation (@PSeitz)

 Tantivy 0.16.0
 =========================
+
 - Bugfix in the filesum check. (@evanxg852000) #1127
 - Bugfix in positions when the index is sorted by a field. (@appaquet) #1125

 Tantivy 0.15.3
 =========================
- Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101

+- Major bugfix. Deleting documents was broken when the index was sorted by a field. (@appaquet, @fulmicoton) #1101

 Tantivy 0.15.2
 ========================
+
 - Major bugfix. DocStore still panics when a deleted doc is at the beginning of a block. (@appaquet) #1088

 Tantivy 0.15.1
 =========================
+
 - Major bugfix. DocStore panics when first block is deleted. (@appaquet) #1077

 Tantivy 0.15.0
 =========================
+
 - API Changes. Using Range instead of (start, end) in the API and internals (`FileSlice`, `OwnedBytes`, `Snippets`, ...)
  This change is breaking but migration is trivial.
 - Added an Histogram collector. (@fulmicoton) #994
@@ -84,9 +124,9 @@ Tantivy 0.15.0
 - Updated TermMerger implementation to rely on the union feature of the FST (@scampi) #469
 - Add boolean marking whether position is required in the query_terms API call (@fulmicoton). #1070

-
 Tantivy 0.14.0
 =========================
+
 - Remove dependency to atomicwrites #833 .Implemented by @fulmicoton upon suggestion and research from @asafigan).
 - Migrated tantivy error from the now deprecated `failure` crate to `thiserror` #760. (@hirevo)
 - API Change. Accessing the typed value off a `Schema::Value` now returns an Option instead of panicking if the type does not match.
@@ -105,16 +145,19 @@ This version breaks compatibility and requires users to reindex everything.

 Tantivy 0.13.2
 ===================
+
 Bugfix. Acquiring a facet reader on a segment that does not contain any
 doc with this facet returns `None`. (#896)

 Tantivy 0.13.1
 ===================
+
 Made `Query` and `Collector` `Send + Sync`.
 Updated misc dependency versions.

 Tantivy 0.13.0
 ======================
+
 Tantivy 0.13 introduce a change in the index format that will require
 you to reindex your index (BlockWAND information are added in the skiplist).
 The index size increase is minor as this information is only added for
@@ -129,6 +172,7 @@ so that we can discuss possible solutions.
 A freshly created DocSet point directly to their first doc. A sentinel value called TERMINATED marks the end of a DocSet.
 `.advance()` returns the new DocId. `Scorer::skip(target)` has been replaced by `Scorer::seek(target)` and returns the resulting DocId.
 As a result, iterating through DocSet now looks as follows
+
 ```rust
 let mut doc = docset.doc();
 while doc != TERMINATED {
@@ -136,7 +180,9 @@ while doc != TERMINATED {
   doc = docset.advance();
 }
 ```
+
 The change made it possible to greatly simplify a lot of the docset's code.
+
 - Misc internal optimization and introduction of the `Scorer::for_each_pruning` function. (@fulmicoton)
 - Added an offset option to the Top(.*)Collectors. (@robyoung)
 - Added Block WAND. Performance on TOP-K on term-unions should be greatly increased. (@fulmicoton, and special thanks
@@ -144,6 +190,7 @@ to the PISA team for answering all my questions!)

 Tantivy 0.12.0
 ======================
+
 - Removing static dispatch in tokenizers for simplicity. (#762)
 - Added backward iteration for `TermDictionary` stream. (@halvorboe)
 - Fixed a performance issue when searching for the posting lists of a missing term (@audunhalland)
@@ -154,30 +201,32 @@ Tantivy 0.12.0
 ## How to update?

 Crates relying on custom tokenizer, or registering tokenizer in the manager will require some
-minor changes. Check https://github.com/quickwit-oss/tantivy/blob/main/examples/custom_tokenizer.rs
+minor changes. Check <https://github.com/quickwit-oss/tantivy/blob/main/examples/custom_tokenizer.rs>
 to check for some code sample.

 Tantivy 0.11.3
 =======================
+
 - Fixed DateTime as a fast field (#735)

 Tantivy 0.11.2
 =======================
+
 - The future returned by `IndexWriter::merge` does not borrow `self` mutably anymore (#732)
 - Exposing a constructor for `WatchHandle` (#731)

 Tantivy 0.11.1
 =====================
- Bug fix #729

+- Bug fix #729

 Tantivy 0.11.0
 =====================

 - Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
 - Various bugfixes in the query parser.
-    - Better handling of hyphens in query parser. (#609)
-    - Better handling of whitespaces.
+  - Better handling of hyphens in query parser. (#609)
+  - Better handling of whitespaces.
 - Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
 - API change around `Box<BoxableTokenizer>`. See detail in #629
 - Avoid rebuilding Regex automaton whenever a regex query is reused. #639 (@brainlock)
@@ -208,7 +257,6 @@ Tantivy 0.10.1
 Avoid watching the mmap directory until someone effectively creates a reader that uses
 this functionality.

-
 Tantivy 0.10.0
 =====================

@@ -224,6 +272,7 @@ Tantivy 0.10.0

 Minor
 ---------
+
 - Switched to Rust 2018 (@uvd)
 - Small simplification of the code.
 Calling .freq() or .doc() when .advance() has never been called
@@ -231,8 +280,7 @@ on segment postings should panic from now on.
 - Tokens exceeding `u16::max_value() - 4` chars are discarded silently instead of panicking.
 - Fast fields are now preloaded when the `SegmentReader` is created.
 - `IndexMeta` is now public.  (@hntd187)
- `IndexWriter` `add_document`, `delete_term`. `IndexWriter` is `Sync`, making it possible to use it with a `
-Arc<RwLock<IndexWriter>>`. `add_document` and `delete_term` can
+- `IndexWriter` `add_document`, `delete_term`. `IndexWriter` is `Sync`, making it possible to use it with a `Arc<RwLock<IndexWriter>>`. `add_document` and `delete_term` can
 only require a read lock. (@fulmicoton)
 - Introducing `Opstamp` as an expressive type alias for `u64`. (@petr-tik)
 - Stamper now relies on `AtomicU64` on all platforms (@petr-tik)
@@ -248,16 +296,17 @@ Your program should be usable as is.
 Fast fields used to be accessed directly from the `SegmentReader`.
 The API changed, you are now required to acquire your fast field reader via the
 `segment_reader.fast_fields()`, and use one of the typed method:
+
 - `.u64()`, `.i64()` if your field is single-valued ;
 - `.u64s()`, `.i64s()` if your field is multi-valued ;
 - `.bytes()` if your field is bytes fast field.

-
-
 Tantivy 0.9.0
 =====================
+
 *0.9.0 index format is not compatible with the
 previous index format.*
+
 - MAJOR BUGFIX :
  Some `Mmap` objects were being leaked, and would never get released. (@fulmicoton)
 - Removed most unsafe (@fulmicoton)
@@ -301,37 +350,40 @@ To update from tantivy 0.8, you will need to go through the following steps.

    ```

-
 Tantivy 0.8.2
 =====================
+
 Fixing build for x86_64 platforms. (#496)
 No need to update from 0.8.1 if tantivy
 is building on your platform.

-
 Tantivy 0.8.1
 =====================
+
 Hotfix of #476.

 Merge was reflecting deletes before commit was passed.
 Thanks @barrotsteindev  for reporting the bug.

-
 Tantivy 0.8.0
 =====================
+
 *No change in the index format*
+
 - API Breaking change in the collector API. (@jwolfe, @fulmicoton)
 - Multithreaded search (@jwolfe, @fulmicoton)

-
 Tantivy 0.7.1
 =====================
+
 *No change in the index format*
+
 - Bugfix: NGramTokenizer panics on non ascii chars
 - Added a space usage API

 Tantivy 0.7
 =====================
+
 - Skip data for doc ids and positions (@fulmicoton),
  greatly improving performance
 - Tantivy error now rely on the failure crate (@drusellers)
@@ -341,15 +393,15 @@ Tantivy 0.7

 Tantivy 0.6.1
 =========================
+
 - Bugfix #324. GC removing was removing file that were still in useful
 - Added support for parsing AllQuery and RangeQuery via QueryParser
-    - AllQuery: `*`
-    - RangeQuery:
-        - Inclusive `field:[startIncl to endIncl]`
-        - Exclusive `field:{startExcl to endExcl}`
-        - Mixed `field:[startIncl to endExcl}` and vice versa
-        - Unbounded `field:[start to *]`, `field:[* to end]`
-
+  - AllQuery: `*`
+  - RangeQuery:
+    - Inclusive `field:[startIncl to endIncl]`
+    - Exclusive `field:{startExcl to endExcl}`
+    - Mixed `field:[startIncl to endExcl}` and vice versa
+    - Unbounded `field:[start to *]`, `field:[* to end]`

 Tantivy 0.6
 ==========================
@@ -362,58 +414,53 @@ to this release!
 - Approximate field norms encoded over 1 byte. (@fulmicoton)
 - Compiles on stable rust (@fulmicoton)
 - Add &[u8] fastfield for associating arbitrary bytes to each document (@jason-wolfe) (#270)
-    - Completely uncompressed
-    - Internally: One u64 fast field for indexes, one fast field for the bytes themselves.
+  - Completely uncompressed
+  - Internally: One u64 fast field for indexes, one fast field for the bytes themselves.
 - Add NGram token support (@drusellers)
 - Add Stopword Filter support (@drusellers)
 - Add a FuzzyTermQuery (@drusellers)
 - Add a RegexQuery (@drusellers)
 - Various performance improvements (@fulmicoton)_

-
 Tantivy 0.5.2
 ===========================
+
 - bugfix #274
 - bugfix #280
 - bugfix #289

-
 Tantivy 0.5.1
 ==========================
- bugfix #254 : tantivy failed if no documents in a segment contained a specific field.

+- bugfix #254 : tantivy failed if no documents in a segment contained a specific field.

 Tantivy 0.5
 ==========================
+
 - Faceting
 - RangeQuery
 - Configurable tokenization pipeline
 - Bugfix in PhraseQuery
 - Various query optimisation
 - Allowing very large indexes
-    - 64 bits file address
-    - Smarter encoding of the `TermInfo` objects
-
-
+  - 64 bits file address
+  - Smarter encoding of the `TermInfo` objects

 Tantivy 0.4.3
 ==========================

 - Bugfix race condition when deleting files. (#198)

-
 Tantivy 0.4.2
 ==========================

 - Prevent usage of AVX2 instructions (#201)

-
 Tantivy 0.4.1
 ==========================

 - Bugfix for non-indexed fields. (#199)

-
 Tantivy 0.4.0
 ==========================

@@ -428,37 +475,31 @@ Tantivy 0.4.0
  - Searching for a non-indexed field returns an explicit Error
  - Phrase query for non-tokenized field are not tokenized by the query parser.
 - Faster/Better indexing (@fulmicoton)
-    - using murmurhash2
-    - faster merging
-    - more memory efficient fast field writer (@lnicola )
-    - better handling of collisions
-    - lesser memory usage
+  - using murmurhash2
+  - faster merging
+  - more memory efficient fast field writer (@lnicola )
+  - better handling of collisions
+  - lesser memory usage
 - Added API, most notably to iterate over ranges of terms (@fulmicoton)
 - Bugfix that was preventing to unmap segment files, on index drop (@fulmicoton)
 - Made the doc! macro public (@fulmicoton)
 - Added an alternative implementation of the streaming dictionary (@fulmicoton)

-
-
 Tantivy 0.3.1
 ==========================

 - Expose a method to trigger files garbage collection

-
-
 Tantivy 0.3
 ==========================

-
 Special thanks to @Kodraus @lnicola @Ameobea @manuel-woelker @celaus
 for their contribution to this release.

 Thanks also to everyone in tantivy gitter chat
 for their advise and company :)

-https://gitter.im/tantivy-search/tantivy
-
+<https://gitter.im/tantivy-search/tantivy>

 Warning:

@@ -467,19 +508,16 @@ code and index format.
 You should not expect backward compatibility before
 tantivy 1.0.

-
-
 New Features
 ------------

 - Delete. You can now delete documents from an index.
 - Support for windows (Thanks to @lnicola)

-
 Various Bugfixes & small improvements
 ----------------------------------------

- Added CI for Windows (https://ci.appveyor.com/project/fulmicoton/tantivy)
+- Added CI for Windows (<https://ci.appveyor.com/project/fulmicoton/tantivy>)
 Thanks to @KodrAus ! (#108)
 - Various dependy version update (Thanks to @Ameobea) #76
 - Fixed several race conditions in `Index.wait_merge_threads`
@@ -491,7 +529,3 @@ Thanks to @KodrAus ! (#108)
 - Building binary targets for tantivy-cli (Thanks to @KodrAus)
 - Misc invisible bug fixes, and code cleanup.
 - Use
-
-
-
-
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.18.0"
+version = "0.19.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
@@ -11,40 +11,37 @@ repository = "https://github.com/quickwit-oss/tantivy"
 readme = "README.md"
 keywords = ["search", "information", "retrieval"]
 edition = "2021"
+rust-version = "1.62"

 [dependencies]
-oneshot = "0.1.3"
-base64 = "0.13.0"
+oneshot = "0.1.5"
+base64 = "0.20.0"
 byteorder = "1.4.3"
 crc32fast = "1.3.2"
 once_cell = "1.10.0"
 regex = { version = "1.5.5", default-features = false, features = ["std", "unicode"] }
-tantivy-fst = "0.3.0"
+aho-corasick = "0.7"
+tantivy-fst = "0.4.0"
 memmap2 = { version = "0.5.3", optional = true }
 lz4_flex = { version = "0.9.2", default-features = false, features = ["checked-decode"], optional = true }
 brotli = { version = "3.3.4", optional = true }
-zstd = { version = "0.11", optional = true }
+zstd = { version = "0.12", optional = true, default-features = false }
 snap = { version = "1.0.5", optional = true }
 tempfile = { version = "3.3.0", optional = true }
 log = "0.4.16"
 serde = { version = "1.0.136", features = ["derive"] }
 serde_json = "1.0.79"
 num_cpus = "1.13.1"
-fs2={ version = "0.4.3", optional = true }
+fs2 = { version = "0.4.3", optional = true }
 levenshtein_automata = "0.2.1"
 uuid = { version = "1.0.0", features = ["v4", "serde"] }
 crossbeam-channel = "0.5.4"
-tantivy-query-grammar = { version="0.18.0", path="./query-grammar" }
-tantivy-bitpacker = { version="0.2", path="./bitpacker" }
-common = { version = "0.3", path = "./common/", package = "tantivy-common" }
-fastfield_codecs = { version="0.2", path="./fastfield_codecs", default-features = false }
-ownedbytes = { version="0.3", path="./ownedbytes" }
 stable_deref_trait = "1.2.0"
 rust-stemmers = "1.2.0"
 downcast-rs = "1.2.0"
 bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
 census = "0.4.0"
-fnv = "1.0.7"
+rustc-hash = "1.1.0"
 thiserror = "1.0.30"
 htmlescape = "0.3.1"
 fail = "0.5.0"
@@ -56,11 +53,16 @@ lru = "0.7.5"
 fastdivide = "0.4.0"
 itertools = "0.10.3"
 measure_time = "0.8.2"
-pretty_assertions = "1.2.1"
-serde_cbor = { version = "0.11.2", optional = true }
 async-trait = "0.1.53"
 arc-swap = "1.5.0"
-gcd = "2.1.0"
+
+sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optional = true }
+stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
+tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
+tantivy-bitpacker = 		{ version= "0.3", path="./bitpacker" }
+common = 								{ version= "0.5", path = "./common/", package = "tantivy-common" }
+fastfield_codecs = 			{ version= "0.3", path="./fastfield_codecs", default-features = false }
+ownedbytes = 						{ version= "0.5", path="./ownedbytes" }

 [target.'cfg(windows)'.dependencies]
 winapi = "0.3.9"
@@ -69,11 +71,12 @@ winapi = "0.3.9"
 rand = "0.8.5"
 maplit = "1.0.2"
 matches = "0.1.9"
+pretty_assertions = "1.2.1"
 proptest = "1.0.0"
-criterion = "0.3.5"
+criterion = "0.4"
 test-log = "0.2.10"
-env_logger = "0.9.0"
-pprof = { version = "0.10.0", features = ["flamegraph", "criterion"] }
+env_logger = "0.10.0"
+pprof = { version = "0.11.0", features = ["flamegraph", "criterion"] }
 futures = "0.3.21"

 [dev-dependencies.fail]
@@ -90,8 +93,9 @@ debug-assertions = true
 overflow-checks = true

 [features]
-default = ["mmap", "lz4-compression" ]
+default = ["mmap", "stopwords", "lz4-compression"]
 mmap = ["fs2", "tempfile", "memmap2"]
+stopwords = []

 brotli-compression = ["brotli"]
 lz4-compression = ["lz4_flex"]
@@ -101,10 +105,10 @@ zstd-compression = ["zstd"]
 failpoints = ["fail/failpoints"]
 unstable = [] # useful for benches.

-quickwit = ["serde_cbor"]
+quickwit = ["sstable"]

 [workspace]
-members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes"]
+members = ["query-grammar", "bitpacker", "common", "fastfield_codecs", "ownedbytes", "stacker", "sstable", "columnar"]

 # Following the "fail" crate best practises, we isolate
 # tests that define specific behavior in fail check points
--- a/README.md
+++ b/README.md
@@ -5,7 +5,6 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![Crates.io](https://img.shields.io/crates/v/tantivy.svg)](https://crates.io/crates/tantivy)

-
 ![Tantivy](https://tantivy-search.github.io/logo/tantivy-logo.png)

 **Tantivy** is a **full-text search engine library** written in Rust.
@@ -16,7 +15,7 @@ to build such a search engine.

 Tantivy is, in fact, strongly inspired by Lucene's design.

-If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our search engine built on top of Tantivy. 
+If you are looking for an alternative to Elasticsearch or Apache Solr, check out [Quickwit](https://github.com/quickwit-oss/quickwit), our search engine built on top of Tantivy.

 # Benchmark

@@ -57,10 +56,9 @@ Your mileage WILL vary depending on the nature of queries and their load.

 Distributed search is out of the scope of Tantivy, but if you are looking for this feature, check out [Quickwit](https://github.com/quickwit-oss/quickwit/).

-
 # Getting started

-Tantivy works on stable Rust (>= 1.27) and supports Linux, macOS, and Windows.
+Tantivy works on stable Rust and supports Linux, macOS, and Windows.

 - [Tantivy's simple search example](https://tantivy-search.github.io/examples/basic_search.html)
 - [tantivy-cli and its tutorial](https://github.com/quickwit-oss/tantivy-cli) - `tantivy-cli` is an actual command-line interface that makes it easy for you to create a search engine,
@@ -83,9 +81,13 @@ There are many ways to support this project.

 We use the GitHub Pull Request workflow: reference a GitHub ticket and/or include a comprehensive commit message when opening a PR.

+## Minimum supported Rust version
+
+Tantivy currently requires at least Rust 1.62 or later to compile.
+
 ## Clone and build locally

-Tantivy compiles on stable Rust but requires `Rust >= 1.27`.
+Tantivy compiles on stable Rust.
 To check out and run tests, you can simply run:

 ```bash
@@ -125,20 +127,23 @@ By default, `rustc` compiles everything in the `examples/` directory in debug mo
 rust-gdb target/debug/examples/$EXAMPLE_NAME
 $ gdb run
 ```
-# Companies Using Tantivy 
+
+# Companies Using Tantivy

 <p align="left">
+<img align="center" src="doc/assets/images/etsy.png" alt="Etsy" height="25" width="auto" />&nbsp;
 <img align="center" src="doc/assets/images/Nuclia.png#gh-light-mode-only" alt="Nuclia" height="25" width="auto" /> &nbsp;
 <img align="center" src="doc/assets/images/humanfirst.png#gh-light-mode-only" alt="Humanfirst.ai" height="30" width="auto" />
 <img align="center" src="doc/assets/images/element.io.svg#gh-light-mode-only" alt="Element.io" height="25" width="auto" />
 <img align="center" src="doc/assets/images/nuclia-dark-theme.png#gh-dark-mode-only" alt="Nuclia" height="35" width="auto" /> &nbsp;
 <img align="center" src="doc/assets/images/humanfirst.ai-dark-theme.png#gh-dark-mode-only" alt="Humanfirst.ai" height="25" width="auto" />&nbsp; &nbsp;
 <img align="center" src="doc/assets/images/element-dark-theme.png#gh-dark-mode-only" alt="Element.io" height="25" width="auto" />
-</p> 
-
+</p>

 # FAQ
+
 ### Can I use Tantivy in other languages?
+
 - Python → [tantivy-py](https://github.com/quickwit-oss/tantivy-py)
 - Ruby → [tantiny](https://github.com/baygeldin/tantiny)

@@ -152,13 +157,17 @@ You can also find other bindings on [GitHub](https://github.com/search?q=tantivy
 - and [more](https://github.com/search?q=tantivy)!

 ### On average, how much faster is Tantivy compared to Lucene?
+
 - According to our [search latency benchmark](https://tantivy-search.github.io/bench/), Tantivy is approximately 2x faster than Lucene.

 ### Does tantivy support incremental indexing?
+
 - Yes.

 ### How can I edit documents?
+
 - Data in tantivy is immutable. To edit a document, the document needs to be deleted and reindexed.

 ### When will my documents be searchable during indexing?
+
 - Documents will be searchable after a `commit` is called on an `IndexWriter`. Existing `IndexReader`s will also need to be reloaded in order to reflect the changes. Finally, changes are only visible to newly acquired `Searcher`.
--- a/bitpacker/Cargo.toml
+++ b/bitpacker/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-bitpacker"
-version = "0.2.0"
+version = "0.3.0"
 edition = "2021"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
@@ -8,6 +8,8 @@ categories = []
 description = """Tantivy-sub crate: bitpacking"""
 repository = "https://github.com/quickwit-oss/tantivy"
 keywords = []
+documentation = "https://docs.rs/tantivy-bitpacker/latest/tantivy_bitpacker"
+homepage = "https://github.com/quickwit-oss/tantivy"


 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
--- a/bitpacker/src/bitpacker.rs
+++ b/bitpacker/src/bitpacker.rs
@@ -25,15 +25,14 @@ impl BitPacker {
        num_bits: u8,
        output: &mut TWrite,
    ) -> io::Result<()> {
-        let val_u64 = val as u64;
        let num_bits = num_bits as usize;
        if self.mini_buffer_written + num_bits > 64 {
-            self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
+            self.mini_buffer |= val.wrapping_shl(self.mini_buffer_written as u32);
            output.write_all(self.mini_buffer.to_le_bytes().as_ref())?;
-            self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
+            self.mini_buffer = val.wrapping_shr((64 - self.mini_buffer_written) as u32);
            self.mini_buffer_written = self.mini_buffer_written + num_bits - 64;
        } else {
-            self.mini_buffer |= val_u64 << self.mini_buffer_written;
+            self.mini_buffer |= val << self.mini_buffer_written;
            self.mini_buffer_written += num_bits;
            if self.mini_buffer_written == 64 {
                output.write_all(self.mini_buffer.to_le_bytes().as_ref())?;
@@ -82,26 +81,28 @@ impl BitUnpacker {
        }
    }

+    pub fn bit_width(&self) -> u8 {
+        self.num_bits as u8
+    }
+
    #[inline]
-    pub fn get(&self, idx: u64, data: &[u8]) -> u64 {
+    pub fn get(&self, idx: u32, data: &[u8]) -> u64 {
        if self.num_bits == 0 {
            return 0u64;
        }
-        let num_bits = self.num_bits;
-        let mask = self.mask;
-        let addr_in_bits = idx * num_bits;
+        let addr_in_bits = idx * self.num_bits as u32;
        let addr = addr_in_bits >> 3;
        let bit_shift = addr_in_bits & 7;
        debug_assert!(
-            addr + 8 <= data.len() as u64,
+            addr + 8 <= data.len() as u32,
            "The fast field field should have been padded with 7 bytes."
        );
        let bytes: [u8; 8] = (&data[(addr as usize)..(addr as usize) + 8])
            .try_into()
            .unwrap();
        let val_unshifted_unmasked: u64 = u64::from_le_bytes(bytes);
-        let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
-        val_shifted & mask
+        let val_shifted: u64 = val_unshifted_unmasked >> bit_shift;
+        val_shifted & self.mask
    }
 }

@@ -128,7 +129,7 @@ mod test {
    fn test_bitpacker_util(len: usize, num_bits: u8) {
        let (bitunpacker, vals, data) = create_fastfield_bitpacker(len, num_bits);
        for (i, val) in vals.iter().enumerate() {
-            assert_eq!(bitunpacker.get(i as u64, &data), *val);
+            assert_eq!(bitunpacker.get(i as u32, &data), *val);
        }
    }

--- a/bitpacker/src/blocked_bitpacker.rs
+++ b/bitpacker/src/blocked_bitpacker.rs
@@ -58,6 +58,10 @@ fn metadata_test() {
    assert_eq!(meta.num_bits(), 6);
 }

+fn mem_usage<T>(items: &Vec<T>) -> usize {
+    items.capacity() * std::mem::size_of::<T>()
+}
+
 impl BlockedBitpacker {
    pub fn new() -> Self {
        let mut compressed_blocks = vec![];
@@ -73,16 +77,14 @@ impl BlockedBitpacker {
    pub fn mem_usage(&self) -> usize {
        std::mem::size_of::<BlockedBitpacker>()
            + self.compressed_blocks.capacity()
-            + self.offset_and_bits.capacity()
-                * std::mem::size_of_val(&self.offset_and_bits.get(0).cloned().unwrap_or_default())
-            + self.buffer.capacity()
-                * std::mem::size_of_val(&self.buffer.get(0).cloned().unwrap_or_default())
+            + mem_usage(&self.offset_and_bits)
+            + mem_usage(&self.buffer)
    }

    #[inline]
    pub fn add(&mut self, val: u64) {
        self.buffer.push(val);
-        if self.buffer.len() == BLOCK_SIZE as usize {
+        if self.buffer.len() == BLOCK_SIZE {
            self.flush();
        }
    }
@@ -124,11 +126,11 @@ impl BlockedBitpacker {
    }
    #[inline]
    pub fn get(&self, idx: usize) -> u64 {
-        let metadata_pos = idx / BLOCK_SIZE as usize;
-        let pos_in_block = idx % BLOCK_SIZE as usize;
+        let metadata_pos = idx / BLOCK_SIZE;
+        let pos_in_block = idx % BLOCK_SIZE;
        if let Some(metadata) = self.offset_and_bits.get(metadata_pos) {
            let unpacked = BitUnpacker::new(metadata.num_bits()).get(
-                pos_in_block as u64,
+                pos_in_block as u32,
                &self.compressed_blocks[metadata.offset() as usize..],
            );
            unpacked + metadata.base_value()
--- a/columnar/Cargo.toml
+++ b/columnar/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "tantivy-columnar"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+stacker = { path = "../stacker", package="tantivy-stacker"}
+serde_json = "1"
+thiserror = "1"
+fnv = "1"
+tantivy-fst =  "0.4.0"
+sstable = { path = "../sstable", package = "tantivy-sstable" }
+common = { path = "../common", package = "tantivy-common" }
+fastfield_codecs = { path = "../fastfield_codecs"}
+ordered-float = "3.4"
+itertools = "0.10"
+
+[features]
+# default = ["quickwit"]
+# quickwit = ["common/quickwit"]
+
+
+
+
+[dev-dependencies]
+proptest = "1"
--- a/columnar/README.md
+++ b/columnar/README.md
@@ -0,0 +1,33 @@
+# Columnar format
+
+This crate describes columnar format used in tantivy.
+
+
+## Goals
+
+This format is special in the following way.
+- it needs to be compact
+- it does not required to be loaded in memory.
+- it is designed to fit well with quickwit's strange constraint:
+we need to be able to load columns rapidly.
+- columns of several types can be associated with the same column name.
+- it needs to support columns with different types `(str, u64, i64, f64)`
+and different cardinality `(required, optional, multivalued)`.
+- columns, once loaded, offer cheap random access.
+
+# Format
+
+A quickwit/tantivy style sstable associated
+`(column names, column_cardinality, column_type) to range of bytes.
+
+The format of the key is:
+`[column_name][ZERO_BYTE][column_type_header: u8]`
+
+Column name may not contain the zero byte.
+
+Listing all columns associated to `column_name` can therefore
+be done by listing all keys prefixed by
+`[column_name][ZERO_BYTE]`
+
+The associated range of bytes refer to a range of bytes
+
--- a/columnar/src/column_type_header.rs
+++ b/columnar/src/column_type_header.rs
@@ -0,0 +1,154 @@
+use crate::value::NumericalType;
+
+#[derive(Clone, Copy, Hash, Default, Debug, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(u8)]
+pub enum Cardinality {
+    #[default]
+    Required = 0,
+    Optional = 1,
+    Multivalued = 2,
+}
+
+impl Cardinality {
+    pub fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub fn try_from_code(code: u8) -> Option<Cardinality> {
+        match code {
+            0 => Some(Cardinality::Required),
+            1 => Some(Cardinality::Optional),
+            2 => Some(Cardinality::Multivalued),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Hash, Eq, PartialEq, Debug, Clone, Copy)]
+pub enum ColumnType {
+    Bytes,
+    Numerical(NumericalType),
+}
+
+impl ColumnType {
+    pub fn to_code(self) -> u8 {
+        match self {
+            ColumnType::Bytes => 0u8,
+            ColumnType::Numerical(numerical_type) => 1u8 | (numerical_type.to_code() << 1),
+        }
+    }
+
+    pub fn try_from_code(code: u8) -> Option<ColumnType> {
+        if code == 0u8 {
+            return Some(ColumnType::Bytes);
+        }
+        if code & 1u8 == 0u8 {
+            return None;
+        }
+        let numerical_type = NumericalType::try_from_code(code >> 1)?;
+        Some(ColumnType::Numerical(numerical_type))
+    }
+}
+
+/// Represents the type and cardinality of a column.
+/// This is encoded over one-byte and added to a column key in the
+/// columnar sstable.
+///
+/// Cardinality is encoded as the first two highest two bits.
+/// The low 6 bits encode the column type.
+#[derive(Eq, Hash, PartialEq, Debug, Copy, Clone)]
+pub struct ColumnTypeAndCardinality {
+    pub cardinality: Cardinality,
+    pub typ: ColumnType,
+}
+
+#[inline]
+const fn compute_mask(num_bits: u8) -> u8 {
+    if num_bits == 8 {
+        u8::MAX
+    } else {
+        (1u8 << num_bits) - 1
+    }
+}
+
+#[inline]
+fn select_bits<const START: u8, const END: u8>(code: u8) -> u8 {
+    assert!(START <= END);
+    assert!(END <= 8);
+    let num_bits: u8 = END - START;
+    let mask: u8 = compute_mask(num_bits);
+    (code >> START) & mask
+}
+
+#[inline]
+fn place_bits<const START: u8, const END: u8>(code: u8) -> u8 {
+    assert!(START <= END);
+    assert!(END <= 8);
+    let num_bits: u8 = END - START;
+    let mask: u8 = compute_mask(num_bits);
+    assert!(code <= mask);
+    code << START
+}
+
+impl ColumnTypeAndCardinality {
+    pub fn to_code(self) -> u8 {
+        place_bits::<6, 8>(self.cardinality.to_code()) | place_bits::<0, 6>(self.typ.to_code())
+    }
+
+    pub fn try_from_code(code: u8) -> Option<ColumnTypeAndCardinality> {
+        let typ_code = select_bits::<0, 6>(code);
+        let cardinality_code = select_bits::<6, 8>(code);
+        let cardinality = Cardinality::try_from_code(cardinality_code)?;
+        let typ = ColumnType::try_from_code(typ_code)?;
+        assert_eq!(typ.to_code(), typ_code);
+        Some(ColumnTypeAndCardinality { cardinality, typ })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    use super::ColumnTypeAndCardinality;
+    use crate::column_type_header::{Cardinality, ColumnType};
+
+    #[test]
+    fn test_column_type_header_to_code() {
+        let mut column_type_header_set: HashSet<ColumnTypeAndCardinality> = HashSet::new();
+        for code in u8::MIN..=u8::MAX {
+            if let Some(column_type_header) = ColumnTypeAndCardinality::try_from_code(code) {
+                assert_eq!(column_type_header.to_code(), code);
+                assert!(column_type_header_set.insert(column_type_header));
+            }
+        }
+        assert_eq!(
+            column_type_header_set.len(),
+            3 /* cardinality */ * (1 + 3) // column_types
+        );
+    }
+
+    #[test]
+    fn test_column_type_to_code() {
+        let mut column_type_set: HashSet<ColumnType> = HashSet::new();
+        for code in u8::MIN..=u8::MAX {
+            if let Some(column_type) = ColumnType::try_from_code(code) {
+                assert_eq!(column_type.to_code(), code);
+                assert!(column_type_set.insert(column_type));
+            }
+        }
+        assert_eq!(column_type_set.len(), 1 + 3);
+    }
+
+    #[test]
+    fn test_cardinality_to_code() {
+        let mut num_cardinality = 0;
+        for code in u8::MIN..=u8::MAX {
+            let cardinality_opt = Cardinality::try_from_code(code);
+            if let Some(cardinality) = cardinality_opt {
+                assert_eq!(cardinality.to_code(), code);
+                num_cardinality += 1;
+            }
+        }
+        assert_eq!(num_cardinality, 3);
+    }
+}
--- a/columnar/src/dictionary.rs
+++ b/columnar/src/dictionary.rs
@@ -0,0 +1,78 @@
+use std::io;
+
+use fnv::FnvHashMap;
+
+fn fst_err_into_io_err(fst_err: tantivy_fst::Error) -> io::Error {
+    match fst_err {
+        tantivy_fst::Error::Fst(fst_err) => {
+            io::Error::new(io::ErrorKind::Other, format!("FST Error: {:?}", fst_err))
+        }
+        tantivy_fst::Error::Io(io_err) => io_err,
+    }
+}
+
+/// `DictionaryBuilder` for dictionary encoding.
+///
+/// It stores the different terms encounterred and assigns them a temporary value
+/// we call unordered id.
+///
+/// Upon serialization, we will sort the ids and hence build a `UnorderedId -> Term ordinal`
+/// mapping.
+#[derive(Default)]
+pub struct DictionaryBuilder {
+    dict: FnvHashMap<Vec<u8>, UnorderedId>,
+}
+
+pub struct IdMapping {
+    unordered_to_ord: Vec<OrderedId>,
+}
+
+impl IdMapping {
+    pub fn to_ord(&self, unordered: UnorderedId) -> OrderedId {
+        self.unordered_to_ord[unordered.0 as usize]
+    }
+}
+
+impl DictionaryBuilder {
+    /// Get or allocate an unordered id.
+    /// (This ID is simply an auto-incremented id.)
+    pub fn get_or_allocate_id(&mut self, term: &[u8]) -> UnorderedId {
+        if let Some(term_id) = self.dict.get(term) {
+            return *term_id;
+        }
+        let new_id = UnorderedId(self.dict.len() as u32);
+        self.dict.insert(term.to_vec(), new_id);
+        new_id
+    }
+
+    /// Serialize the dictionary into an fst, and returns the
+    /// `UnorderedId -> TermOrdinal` map.
+    pub fn serialize<'a, W: io::Write + 'a>(&self, wrt: &mut W) -> io::Result<IdMapping> {
+        serialize_inner(&self.dict, wrt).map_err(fst_err_into_io_err)
+    }
+}
+
+/// Helper function just there for error conversion.
+fn serialize_inner<'a, W: io::Write + 'a>(
+    dict: &FnvHashMap<Vec<u8>, UnorderedId>,
+    wrt: &mut W,
+) -> tantivy_fst::Result<IdMapping> {
+    let mut terms: Vec<(&[u8], UnorderedId)> =
+        dict.iter().map(|(k, v)| (k.as_slice(), *v)).collect();
+    terms.sort_unstable_by_key(|(key, _)| *key);
+    let mut unordered_to_ord: Vec<OrderedId> = vec![OrderedId(0u32); terms.len()];
+    let mut fst_builder = tantivy_fst::MapBuilder::new(wrt)?;
+    for (ord, (key, unordered_id)) in terms.into_iter().enumerate() {
+        let ordered_id = OrderedId(ord as u32);
+        fst_builder.insert(key, ord as u64)?;
+        unordered_to_ord[unordered_id.0 as usize] = ordered_id;
+    }
+    fst_builder.finish()?;
+    Ok(IdMapping { unordered_to_ord })
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct UnorderedId(pub u32);
+
+#[derive(Clone, Copy)]
+pub struct OrderedId(pub u32);
--- a/columnar/src/lib.rs
+++ b/columnar/src/lib.rs
@@ -0,0 +1,69 @@
+// Copyright (C) 2022 Quickwit, Inc.
+//
+// Quickwit is offered under the AGPL v3.0 and as commercial software.
+// For commercial licensing, contact us at hello@quickwit.io.
+//
+// AGPL:
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as
+// published by the Free Software Foundation, either version 3 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+mod column_type_header;
+mod dictionary;
+mod reader;
+mod serializer;
+mod value;
+mod writer;
+
+pub use column_type_header::Cardinality;
+pub use reader::ColumnarReader;
+pub use serializer::ColumnarSerializer;
+pub use writer::ColumnarWriter;
+
+pub type DocId = u32;
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use common::file_slice::FileSlice;
+
+    use crate::column_type_header::ColumnTypeAndCardinality;
+    use crate::reader::ColumnarReader;
+    use crate::serializer::ColumnarSerializer;
+    use crate::value::NumericalValue;
+    use crate::ColumnarWriter;
+
+    #[test]
+    fn test_dataframe_writer() {
+        let mut dataframe_writer = ColumnarWriter::default();
+        dataframe_writer.record_numerical(1u32, b"srical.value", NumericalValue::U64(1u64));
+        dataframe_writer.record_numerical(2u32, b"srical.value", NumericalValue::U64(2u64));
+        dataframe_writer.record_numerical(4u32, b"srical.value", NumericalValue::I64(2i64));
+        let mut buffer: Vec<u8> = Vec::new();
+        let serializer = ColumnarSerializer::new(&mut buffer);
+        dataframe_writer.serialize(5, serializer).unwrap();
+        let columnar_fileslice = FileSlice::from(buffer);
+        let columnar = ColumnarReader::open(columnar_fileslice).unwrap();
+        assert_eq!(columnar.num_columns(), 1);
+        let cols: Vec<(ColumnTypeAndCardinality, Range<u64>)> =
+            columnar.read_columns("srical.value").unwrap();
+        assert_eq!(cols.len(), 1);
+        // Right now this 31 bytes are spent as follows
+        //
+        // - header 14 bytes
+        // - vals  8 //< due to padding? could have been 1byte?.
+        // - null footer 6 bytes
+        // - version footer 3 bytes // Should be file-wide
+        assert_eq!(cols[0].1, 0..31);
+    }
+}
--- a/columnar/src/reader/mod.rs
+++ b/columnar/src/reader/mod.rs
@@ -0,0 +1,66 @@
+use std::ops::Range;
+use std::{io, mem};
+
+use common::file_slice::FileSlice;
+use common::BinarySerializable;
+use sstable::{Dictionary, SSTableRange};
+
+use crate::column_type_header::ColumnTypeAndCardinality;
+
+fn io_invalid_data(msg: String) -> io::Error {
+    io::Error::new(io::ErrorKind::InvalidData, msg) // format!("Invalid key found.
+                                                    // {key_bytes:?}")));
+}
+pub struct ColumnarReader {
+    column_dictionary: Dictionary<SSTableRange>,
+    column_data: FileSlice,
+}
+
+impl ColumnarReader {
+    pub fn num_columns(&self) -> usize {
+        self.column_dictionary.num_terms()
+    }
+
+    pub fn open(file_slice: FileSlice) -> io::Result<ColumnarReader> {
+        let (file_slice_without_sstable_len, sstable_len_bytes) =
+            file_slice.split_from_end(mem::size_of::<u64>());
+        let mut sstable_len_bytes = sstable_len_bytes.read_bytes()?;
+        let sstable_len = u64::deserialize(&mut sstable_len_bytes)?;
+        let (column_data, sstable) =
+            file_slice_without_sstable_len.split_from_end(sstable_len as usize);
+        let column_dictionary = Dictionary::open(sstable)?;
+        Ok(ColumnarReader {
+            column_dictionary,
+            column_data,
+        })
+    }
+
+    pub fn read_columns(
+        &self,
+        field_name: &str,
+    ) -> io::Result<Vec<(ColumnTypeAndCardinality, Range<u64>)>> {
+        let mut start_key = field_name.to_string();
+        start_key.push('\0');
+        let mut end_key = field_name.to_string();
+        end_key.push(1u8 as char);
+        let mut stream = self
+            .column_dictionary
+            .range()
+            .ge(start_key.as_bytes())
+            .lt(end_key.as_bytes())
+            .into_stream()?;
+        let mut results = Vec::new();
+        while stream.advance() {
+            let key_bytes: &[u8] = stream.key();
+            if !key_bytes.starts_with(start_key.as_bytes()) {
+                return Err(io_invalid_data(format!("Invalid key found. {key_bytes:?}")));
+            }
+            let column_code: u8 = key_bytes.last().cloned().unwrap();
+            let column_type_and_cardinality = ColumnTypeAndCardinality::try_from_code(column_code)
+                .ok_or_else(|| io_invalid_data(format!("Unknown column code `{column_code}`")))?;
+            let range = stream.value().clone();
+            results.push((column_type_and_cardinality, range));
+        }
+        Ok(results)
+    }
+}
--- a/columnar/src/serializer.rs
+++ b/columnar/src/serializer.rs
@@ -0,0 +1,39 @@
+use std::io;
+use std::io::Write;
+use std::ops::Range;
+
+use common::CountingWriter;
+use sstable::value::RangeWriter;
+use sstable::SSTableRange;
+
+pub struct ColumnarSerializer<W: io::Write> {
+    wrt: CountingWriter<W>,
+    sstable_range: sstable::Writer<Vec<u8>, RangeWriter>,
+}
+
+impl<W: io::Write> ColumnarSerializer<W> {
+    pub fn new(wrt: W) -> ColumnarSerializer<W> {
+        let sstable_range: sstable::Writer<Vec<u8>, RangeWriter> =
+            sstable::Dictionary::<SSTableRange>::builder(Vec::with_capacity(100_000)).unwrap();
+        ColumnarSerializer {
+            wrt: CountingWriter::wrap(wrt),
+            sstable_range,
+        }
+    }
+
+    pub fn record_column_offsets(&mut self, key: &[u8], byte_range: Range<u64>) -> io::Result<()> {
+        self.sstable_range.insert(key, &byte_range)
+    }
+
+    pub fn wrt(&mut self) -> &mut CountingWriter<W> {
+        &mut self.wrt
+    }
+
+    pub fn finalize(mut self) -> io::Result<()> {
+        let sstable_bytes: Vec<u8> = self.sstable_range.finish()?;
+        let sstable_num_bytes: u64 = sstable_bytes.len() as u64;
+        self.wrt.write_all(&sstable_bytes)?;
+        self.wrt.write_all(&sstable_num_bytes.to_le_bytes()[..])?;
+        Ok(())
+    }
+}
--- a/columnar/src/value.rs
+++ b/columnar/src/value.rs
@@ -0,0 +1,123 @@
+use ordered_float::NotNan;
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum NumericalValue {
+    I64(i64),
+    U64(u64),
+    F64(NotNan<f64>),
+}
+
+impl From<u64> for NumericalValue {
+    fn from(val: u64) -> NumericalValue {
+        NumericalValue::U64(val)
+    }
+}
+
+impl From<i64> for NumericalValue {
+    fn from(val: i64) -> Self {
+        NumericalValue::I64(val)
+    }
+}
+
+impl From<NotNan<f64>> for NumericalValue {
+    fn from(val: NotNan<f64>) -> Self {
+        NumericalValue::F64(val)
+    }
+}
+
+impl NumericalValue {
+    pub fn numerical_type(&self) -> NumericalType {
+        match self {
+            NumericalValue::F64(_) => NumericalType::F64,
+            NumericalValue::I64(_) => NumericalType::I64,
+            NumericalValue::U64(_) => NumericalType::U64,
+        }
+    }
+}
+
+impl Eq for NumericalValue {}
+
+#[derive(Clone, Copy, Debug, Default, Hash, Eq, PartialEq)]
+#[repr(u8)]
+pub enum NumericalType {
+    #[default]
+    I64 = 0,
+    U64 = 1,
+    F64 = 2,
+}
+
+impl NumericalType {
+    pub fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub fn try_from_code(code: u8) -> Option<NumericalType> {
+        match code {
+            0 => Some(NumericalType::I64),
+            1 => Some(NumericalType::U64),
+            2 => Some(NumericalType::F64),
+            _ => None,
+        }
+    }
+}
+
+/// We voluntarily avoid using `Into` here to keep this
+/// implementation quirk as private as possible.
+///
+/// This coercion trait actually panics if it is used
+/// to convert a loose types to a stricter type.
+///
+/// The level is strictness is somewhat arbitrary.
+/// - i64
+/// - u64
+/// - f64.
+pub(crate) trait Coerce {
+    fn coerce(numerical_value: NumericalValue) -> Self;
+}
+
+impl Coerce for i64 {
+    fn coerce(value: NumericalValue) -> Self {
+        match value {
+            NumericalValue::I64(val) => val,
+            NumericalValue::U64(val) => val as i64,
+            NumericalValue::F64(_) => unreachable!(),
+        }
+    }
+}
+
+impl Coerce for u64 {
+    fn coerce(value: NumericalValue) -> Self {
+        match value {
+            NumericalValue::I64(val) => val as u64,
+            NumericalValue::U64(val) => val,
+            NumericalValue::F64(_) => unreachable!(),
+        }
+    }
+}
+
+impl Coerce for NotNan<f64> {
+    fn coerce(value: NumericalValue) -> Self {
+        match value {
+            NumericalValue::I64(val) => unsafe { NotNan::new_unchecked(val as f64) },
+            NumericalValue::U64(val) => unsafe { NotNan::new_unchecked(val as f64) },
+            NumericalValue::F64(val) => val,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::NumericalType;
+
+    #[test]
+    fn test_numerical_type_code() {
+        let mut num_numerical_type = 0;
+        for code in u8::MIN..=u8::MAX {
+            if let Some(numerical_type) = NumericalType::try_from_code(code) {
+                assert_eq!(numerical_type.to_code(), code);
+                num_numerical_type += 1;
+            }
+        }
+        assert_eq!(num_numerical_type, 3);
+    }
+}
--- a/columnar/src/writer/column_operation.rs
+++ b/columnar/src/writer/column_operation.rs
@@ -0,0 +1,321 @@
+use std::fmt;
+use std::num::NonZeroU8;
+
+use ordered_float::NotNan;
+use thiserror::Error;
+
+use crate::dictionary::UnorderedId;
+use crate::value::NumericalValue;
+use crate::DocId;
+
+/// When we build a columnar dataframe, we first just group
+/// all mutations per column, and append them in append-only object.
+///
+/// We represents all of these operations as `ColumnOperation`.
+#[derive(Eq, PartialEq, Debug, Clone, Copy)]
+pub(crate) enum ColumnOperation<T> {
+    NewDoc(DocId),
+    Value(T),
+}
+
+impl<T> From<T> for ColumnOperation<T> {
+    fn from(value: T) -> Self {
+        ColumnOperation::Value(value)
+    }
+}
+
+#[allow(clippy::from_over_into)]
+pub(crate) trait SymbolValue: Into<MiniBuffer> + Clone + Copy + fmt::Debug {
+    fn deserialize(header: NonZeroU8, bytes: &mut &[u8]) -> Result<Self, ParseError>;
+}
+
+pub(crate) struct MiniBuffer {
+    pub bytes: [u8; 9],
+    pub len: usize,
+}
+
+impl MiniBuffer {
+    pub fn as_slice(&self) -> &[u8] {
+        &self.bytes[..self.len]
+    }
+}
+
+fn compute_header_byte(typ: SymbolType, len: usize) -> u8 {
+    assert!(len <= 9);
+    (len << 4) as u8 | typ as u8
+}
+
+impl SymbolValue for NumericalValue {
+    fn deserialize(header_byte: NonZeroU8, bytes: &mut &[u8]) -> Result<Self, ParseError> {
+        let (typ, len) = parse_header_byte(header_byte)?;
+        let value_bytes: &[u8];
+        (value_bytes, *bytes) = bytes.split_at(len);
+        let symbol: NumericalValue = match typ {
+            SymbolType::U64 => {
+                let mut octet: [u8; 8] = [0u8; 8];
+                octet[..value_bytes.len()].copy_from_slice(value_bytes);
+                let val: u64 = u64::from_le_bytes(octet);
+                NumericalValue::U64(val)
+            }
+            SymbolType::I64 => {
+                let mut octet: [u8; 8] = [0u8; 8];
+                octet[..value_bytes.len()].copy_from_slice(value_bytes);
+                let encoded: u64 = u64::from_le_bytes(octet);
+                let val: i64 = decode_zig_zag(encoded);
+                NumericalValue::I64(val)
+            }
+            SymbolType::Float => {
+                let octet: [u8; 8] =
+                    value_bytes.try_into().map_err(|_| ParseError::InvalidLen {
+                        typ: SymbolType::Float,
+                        len,
+                    })?;
+                let val_possibly_nan = f64::from_le_bytes(octet);
+                let val_not_nan = NotNan::new(val_possibly_nan)
+                    .map_err(|_| ParseError::NaN)?;
+                NumericalValue::F64(val_not_nan)
+            }
+        };
+        Ok(symbol)
+    }
+}
+
+#[allow(clippy::from_over_into)]
+impl Into<MiniBuffer> for NumericalValue {
+    fn into(self) -> MiniBuffer {
+        let mut bytes = [0u8; 9];
+        match self {
+            NumericalValue::F64(val) => {
+                let len = 8;
+                let header_byte = compute_header_byte(SymbolType::Float, len);
+                bytes[0] = header_byte;
+                bytes[1..].copy_from_slice(&val.to_le_bytes());
+                MiniBuffer {
+                    bytes,
+                    len: len + 1,
+                }
+            }
+            NumericalValue::U64(val) => {
+                let len = compute_num_bytes_for_u64(val);
+                let header_byte = compute_header_byte(SymbolType::U64, len);
+                bytes[0] = header_byte;
+                bytes[1..].copy_from_slice(&val.to_le_bytes());
+                MiniBuffer {
+                    bytes,
+                    len: len + 1,
+                }
+            }
+            NumericalValue::I64(val) => {
+                let encoded = encode_zig_zag(val);
+                let len = compute_num_bytes_for_u64(encoded);
+                let header_byte = compute_header_byte(SymbolType::I64, len);
+                bytes[0] = header_byte;
+                bytes[1..].copy_from_slice(&encoded.to_le_bytes());
+                MiniBuffer {
+                    bytes,
+                    len: len + 1,
+                }
+            }
+        }
+    }
+}
+
+#[allow(clippy::from_over_into)]
+impl Into<MiniBuffer> for UnorderedId {
+    fn into(self) -> MiniBuffer {
+        let mut bytes = [0u8; 9];
+        let val = self.0 as u64;
+        let len = compute_num_bytes_for_u64(val) + 1;
+        bytes[0] = len as u8;
+        bytes[1..].copy_from_slice(&val.to_le_bytes());
+        MiniBuffer { bytes, len }
+    }
+}
+
+impl SymbolValue for UnorderedId {
+    fn deserialize(header: NonZeroU8, bytes: &mut &[u8]) -> Result<UnorderedId, ParseError> {
+        let len = header.get() as usize;
+        let symbol_bytes: &[u8];
+        (symbol_bytes, *bytes) = bytes.split_at(len);
+        let mut value_bytes = [0u8; 4];
+        value_bytes[..len - 1].copy_from_slice(&symbol_bytes[1..]);
+        let value = u32::from_le_bytes(value_bytes);
+        Ok(UnorderedId(value))
+    }
+}
+
+const HEADER_MASK: u8 = (1u8 << 4) - 1u8;
+
+fn compute_num_bytes_for_u64(val: u64) -> usize {
+    let msb = (64u32 - val.leading_zeros()) as usize;
+    (msb + 7) / 8
+}
+
+fn parse_header_byte(byte: NonZeroU8) -> Result<(SymbolType, usize), ParseError> {
+    let len = (byte.get() as usize) >> 4;
+    let typ_code = byte.get() & HEADER_MASK;
+    let typ = SymbolType::try_from(typ_code)?;
+    Ok((typ, len))
+}
+
+#[derive(Error, Debug)]
+pub enum ParseError {
+    #[error("Type byte unknown `{0}`")]
+    UnknownType(u8),
+    #[error("Invalid len for type `{len}` for type `{typ:?}`.")]
+    InvalidLen { typ: SymbolType, len: usize },
+    #[error("Missing bytes.")]
+    MissingBytes,
+    #[error("Not a number value.")]
+    NaN,
+}
+
+impl<V: SymbolValue> ColumnOperation<V> {
+    pub fn serialize(self) -> MiniBuffer {
+        match self {
+            ColumnOperation::NewDoc(doc) => {
+                let mut minibuf: [u8; 9] = [0u8; 9];
+                minibuf[0] = 0u8;
+                minibuf[1..5].copy_from_slice(&doc.to_le_bytes());
+                MiniBuffer {
+                    bytes: minibuf,
+                    len: 5,
+                }
+            }
+            ColumnOperation::Value(val) => val.into(),
+        }
+    }
+
+    pub fn deserialize(bytes: &mut &[u8]) -> Result<Self, ParseError> {
+        if bytes.is_empty() {
+            return Err(ParseError::MissingBytes);
+        }
+        let header_byte = bytes[0];
+        *bytes = &bytes[1..];
+        if let Some(header_byte) = NonZeroU8::new(header_byte) {
+            let value = V::deserialize(header_byte, bytes)?;
+            Ok(ColumnOperation::Value(value))
+        } else {
+            let doc_bytes: &[u8];
+            (doc_bytes, *bytes) = bytes.split_at(4);
+            let doc: u32 =
+                u32::from_le_bytes(doc_bytes.try_into().map_err(|_| ParseError::MissingBytes)?);
+            Ok(ColumnOperation::NewDoc(doc))
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[repr(u8)]
+pub enum SymbolType {
+    U64 = 1u8,
+    I64 = 2u8,
+    Float = 3u8,
+}
+
+impl TryFrom<u8> for SymbolType {
+    type Error = ParseError;
+
+    fn try_from(byte: u8) -> Result<Self, ParseError> {
+        match byte {
+            1u8 => Ok(SymbolType::U64),
+            2u8 => Ok(SymbolType::I64),
+            3u8 => Ok(SymbolType::Float),
+            _ => Err(ParseError::UnknownType(byte)),
+        }
+    }
+}
+
+fn encode_zig_zag(n: i64) -> u64 {
+    ((n << 1) ^ (n >> 63)) as u64
+}
+
+fn decode_zig_zag(n: u64) -> i64 {
+    ((n >> 1) as i64) ^ (-((n & 1) as i64))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{SymbolType, *};
+
+    #[track_caller]
+    fn test_zig_zag_aux(val: i64) {
+        let encoded = super::encode_zig_zag(val);
+        assert_eq!(decode_zig_zag(encoded), val);
+        if let Some(abs_val) = val.checked_abs() {
+            let abs_val = abs_val as u64;
+            assert!(encoded <= abs_val * 2);
+        }
+    }
+
+    #[test]
+    fn test_zig_zag() {
+        assert_eq!(encode_zig_zag(0i64), 0u64);
+        assert_eq!(encode_zig_zag(-1i64), 1u64);
+        assert_eq!(encode_zig_zag(1i64), 2u64);
+        test_zig_zag_aux(0i64);
+        test_zig_zag_aux(i64::MIN);
+        test_zig_zag_aux(i64::MAX);
+    }
+
+    use proptest::prelude::any;
+    use proptest::proptest;
+
+    proptest! {
+        #[test]
+        fn test_proptest_zig_zag(val in any::<i64>()) {
+            test_zig_zag_aux(val);
+        }
+    }
+
+    #[track_caller]
+    fn ser_deser_header_byte_aux(symbol_type: SymbolType, len: usize) {
+        let header_byte = compute_header_byte(symbol_type, len);
+        let (serdeser_numerical_type, serdeser_len) =
+            parse_header_byte(NonZeroU8::new(header_byte).unwrap()).unwrap();
+        assert_eq!(symbol_type, serdeser_numerical_type);
+        assert_eq!(len, serdeser_len);
+    }
+
+    #[test]
+    fn test_header_byte_serialization() {
+        for len in 1..9 {
+            ser_deser_header_byte_aux(SymbolType::Float, len);
+            ser_deser_header_byte_aux(SymbolType::I64, len);
+            ser_deser_header_byte_aux(SymbolType::U64, len);
+        }
+    }
+
+    #[track_caller]
+    fn ser_deser_symbol(symbol: ColumnOperation<NumericalValue>) {
+        let buf = symbol.serialize();
+        let mut bytes = &buf.bytes[..];
+        let serdeser_symbol = ColumnOperation::deserialize(&mut bytes).unwrap();
+        assert_eq!(bytes.len() + buf.len, buf.bytes.len());
+        assert_eq!(symbol, serdeser_symbol);
+    }
+
+    #[test]
+    fn test_compute_num_bytes_for_u64() {
+        assert_eq!(compute_num_bytes_for_u64(0), 0);
+        assert_eq!(compute_num_bytes_for_u64(1), 1);
+        assert_eq!(compute_num_bytes_for_u64(255), 1);
+        assert_eq!(compute_num_bytes_for_u64(256), 2);
+        assert_eq!(compute_num_bytes_for_u64((1 << 16) - 1), 2);
+        assert_eq!(compute_num_bytes_for_u64(1 << 16), 3);
+    }
+
+    #[test]
+    fn test_symbol_serialization() {
+        ser_deser_symbol(ColumnOperation::NewDoc(0));
+        ser_deser_symbol(ColumnOperation::NewDoc(3));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::I64(0i64)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::I64(1i64)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::U64(257u64)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::I64(-257i64)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::I64(i64::MIN)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::U64(0u64)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::U64(u64::MIN)));
+        ser_deser_symbol(ColumnOperation::Value(NumericalValue::U64(u64::MAX)));
+    }
+}
--- a/columnar/src/writer/mod.rs
+++ b/columnar/src/writer/mod.rs
@@ -0,0 +1,675 @@
+mod column_operation;
+mod value_index;
+
+use std::io::{self, Write};
+
+use column_operation::ColumnOperation;
+use common::CountingWriter;
+use fastfield_codecs::serialize::ValueIndexInfo;
+use fastfield_codecs::{Column, MonotonicallyMappableToU64, VecColumn};
+use ordered_float::NotNan;
+use stacker::{Addr, ArenaHashMap, ExpUnrolledLinkedList, MemoryArena};
+
+use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality};
+use crate::dictionary::{DictionaryBuilder, IdMapping, UnorderedId};
+use crate::value::{Coerce, NumericalType, NumericalValue};
+use crate::writer::column_operation::SymbolValue;
+use crate::writer::value_index::{IndexBuilder, SpareIndexBuilders};
+use crate::{Cardinality, ColumnarSerializer, DocId};
+
+#[derive(Copy, Clone, Default)]
+struct ColumnWriter {
+    // Detected cardinality of the column so far.
+    cardinality: Cardinality,
+    // Last document inserted.
+    // None if no doc has been added yet.
+    last_doc_opt: Option<u32>,
+    // Buffer containing the serialized values.
+    values: ExpUnrolledLinkedList,
+}
+
+#[derive(Clone, Copy, Default)]
+pub struct NumericalColumnWriter {
+    compatible_numerical_types: CompatibleNumericalTypes,
+    column_writer: ColumnWriter,
+}
+
+#[derive(Clone, Copy)]
+struct CompatibleNumericalTypes {
+    all_values_within_i64_range: bool,
+    all_values_within_u64_range: bool,
+}
+
+impl Default for CompatibleNumericalTypes {
+    fn default() -> CompatibleNumericalTypes {
+        CompatibleNumericalTypes {
+            all_values_within_i64_range: true,
+            all_values_within_u64_range: true,
+        }
+    }
+}
+
+impl CompatibleNumericalTypes {
+    pub fn accept_value(&mut self, numerical_value: NumericalValue) {
+        match numerical_value {
+            NumericalValue::I64(val_i64) => {
+                let value_within_u64_range = val_i64 >= 0i64;
+                self.all_values_within_u64_range &= value_within_u64_range;
+            }
+            NumericalValue::U64(val_u64) => {
+                let value_within_i64_range = val_u64 < i64::MAX as u64;
+                self.all_values_within_i64_range &= value_within_i64_range;
+            }
+            NumericalValue::F64(_) => {
+                self.all_values_within_i64_range = false;
+                self.all_values_within_u64_range = false;
+            }
+        }
+    }
+
+    pub fn to_numerical_type(self) -> NumericalType {
+        if self.all_values_within_i64_range {
+            NumericalType::I64
+        } else if self.all_values_within_u64_range {
+            NumericalType::U64
+        } else {
+            NumericalType::F64
+        }
+    }
+}
+
+impl NumericalColumnWriter {
+    pub fn record_numerical_value(
+        &mut self,
+        doc: DocId,
+        value: NumericalValue,
+        arena: &mut MemoryArena,
+    ) {
+        self.compatible_numerical_types.accept_value(value);
+        self.column_writer.record(doc, value, arena);
+    }
+}
+
+impl ColumnWriter {
+    fn symbol_iterator<'a, V: SymbolValue>(
+        &self,
+        arena: &MemoryArena,
+        buffer: &'a mut Vec<u8>,
+    ) -> impl Iterator<Item = ColumnOperation<V>> + 'a {
+        buffer.clear();
+        self.values.read_to_end(arena, buffer);
+        let mut cursor: &[u8] = &buffer[..];
+        std::iter::from_fn(move || {
+            if cursor.is_empty() {
+                return None;
+            }
+            let symbol = ColumnOperation::deserialize(&mut cursor)
+                .expect("Failed to deserialize symbol from in-memory. This should never happen.");
+            Some(symbol)
+        })
+    }
+
+    fn delta_with_last_doc(&self, doc: DocId) -> u32 {
+        self.last_doc_opt
+            .map(|last_doc| doc - last_doc)
+            .unwrap_or(doc + 1u32)
+    }
+
+    /// Records a change of the document being recorded.
+    ///
+    /// This function will also update the cardinality of the column
+    /// if necessary.
+    fn record(&mut self, doc: DocId, value: NumericalValue, arena: &mut MemoryArena) {
+        // Difference between `doc` and the last doc.
+        match self.delta_with_last_doc(doc) {
+            0 => {
+                // This is the last encounterred document.
+                self.cardinality = Cardinality::Multivalued;
+            }
+            1 => {
+                self.last_doc_opt = Some(doc);
+                self.write_symbol::<NumericalValue>(ColumnOperation::NewDoc(doc), arena);
+            }
+            _ => {
+                self.cardinality = self.cardinality.max(Cardinality::Optional);
+                self.last_doc_opt = Some(doc);
+                self.write_symbol::<NumericalValue>(ColumnOperation::NewDoc(doc), arena);
+            }
+        }
+        self.write_symbol(ColumnOperation::Value(value), arena);
+    }
+
+    // Get the cardinality.
+    // The overall number of docs in the column is necessary to
+    // deal with the case where the all docs contain 1 value, except some documents
+    // at the end of the column.
+    fn get_cardinality(&self, num_docs: DocId) -> Cardinality {
+        if self.delta_with_last_doc(num_docs) > 1 {
+            self.cardinality.max(Cardinality::Optional)
+        } else {
+            self.cardinality
+        }
+    }
+
+    fn write_symbol<V: SymbolValue>(
+        &mut self,
+        symbol: ColumnOperation<V>,
+        arena: &mut MemoryArena,
+    ) {
+        self.values
+            .writer(arena)
+            .extend_from_slice(symbol.serialize().as_slice());
+    }
+}
+
+#[derive(Copy, Clone, Default)]
+pub struct BytesColumnWriter {
+    dictionary_id: u32,
+    column_writer: ColumnWriter,
+}
+
+impl BytesColumnWriter {
+    pub fn with_dictionary_id(dictionary_id: u32) -> BytesColumnWriter {
+        BytesColumnWriter {
+            dictionary_id,
+            column_writer: Default::default(),
+        }
+    }
+
+    pub fn record_bytes(
+        &mut self,
+        doc: DocId,
+        bytes: &[u8],
+        dictionaries: &mut [DictionaryBuilder],
+        arena: &mut MemoryArena,
+    ) {
+        let unordered_id = dictionaries[self.dictionary_id as usize].get_or_allocate_id(bytes);
+        let numerical_value = NumericalValue::U64(unordered_id.0 as u64);
+        self.column_writer.record(doc, numerical_value, arena);
+    }
+}
+
+pub struct ColumnarWriter {
+    numerical_field_hash_map: ArenaHashMap,
+    bytes_field_hash_map: ArenaHashMap,
+    arena: MemoryArena,
+    // Dictionaries used to store dictionary-encoded values.
+    dictionaries: Vec<DictionaryBuilder>,
+    buffers: SpareBuffers,
+}
+
+#[derive(Default)]
+struct SpareBuffers {
+    byte_buffer: Vec<u8>,
+    value_index_builders: SpareIndexBuilders,
+    i64_values: Vec<i64>,
+    u64_values: Vec<u64>,
+    f64_values: Vec<ordered_float::NotNan<f64>>,
+}
+
+impl Default for ColumnarWriter {
+    fn default() -> Self {
+        ColumnarWriter {
+            numerical_field_hash_map: ArenaHashMap::new(10_000),
+            bytes_field_hash_map: ArenaHashMap::new(10_000),
+            dictionaries: Vec::new(),
+            arena: MemoryArena::default(),
+            buffers: SpareBuffers::default(),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
+enum BytesOrNumerical {
+    Bytes,
+    Numerical,
+}
+
+impl ColumnarWriter {
+    pub fn record_numerical(&mut self, doc: DocId, key: &[u8], numerical_value: NumericalValue) {
+        let (hash_map, arena) = (&mut self.numerical_field_hash_map, &mut self.arena);
+        hash_map.mutate_or_create(key, |column_opt: Option<NumericalColumnWriter>| {
+            let mut column: NumericalColumnWriter = column_opt.unwrap_or_default();
+            column.record_numerical_value(doc, numerical_value, arena);
+            column
+        });
+    }
+
+    pub fn record_bytes(&mut self, doc: DocId, key: &[u8], value: &[u8]) {
+        let (hash_map, arena, dictionaries) = (
+            &mut self.bytes_field_hash_map,
+            &mut self.arena,
+            &mut self.dictionaries,
+        );
+        hash_map.mutate_or_create(key, |column_opt: Option<BytesColumnWriter>| {
+            let mut column: BytesColumnWriter = column_opt.unwrap_or_else(|| {
+                let dictionary_id = dictionaries.len() as u32;
+                dictionaries.push(DictionaryBuilder::default());
+                BytesColumnWriter::with_dictionary_id(dictionary_id)
+            });
+            column.record_bytes(doc, value, dictionaries, arena);
+            column
+        });
+    }
+
+    pub fn serialize<W: io::Write>(
+        &mut self,
+        num_docs: DocId,
+        mut serializer: ColumnarSerializer<W>,
+    ) -> io::Result<()> {
+        let mut field_columns: Vec<(&[u8], BytesOrNumerical, Addr)> = self
+            .numerical_field_hash_map
+            .iter()
+            .map(|(term, addr, _)| (term, BytesOrNumerical::Numerical, addr))
+            .collect();
+        field_columns.extend(
+            self.bytes_field_hash_map
+                .iter()
+                .map(|(term, addr, _)| (term, BytesOrNumerical::Bytes, addr)),
+        );
+        let mut key_buffer = Vec::new();
+        field_columns.sort_unstable_by_key(|(key, col_type, _)| (*key, *col_type));
+        let (arena, buffers, dictionaries) = (&self.arena, &mut self.buffers, &self.dictionaries);
+        for (key, bytes_or_numerical, addr) in field_columns {
+            let wrt = serializer.wrt();
+            let start_offset = wrt.written_bytes();
+            let column_type_and_cardinality: ColumnTypeAndCardinality =
+                match bytes_or_numerical {
+                BytesOrNumerical::Bytes => {
+                    let BytesColumnWriter { dictionary_id, column_writer } =
+                        self.bytes_field_hash_map.read(addr);
+                    let dictionary_builder =
+                        &dictionaries[dictionary_id as usize];
+                    serialize_bytes_column(
+                        &column_writer,
+                        num_docs,
+                        dictionary_builder,
+                        arena,
+                        buffers,
+                        wrt,
+                    )?;
+                    ColumnTypeAndCardinality {
+                        cardinality: column_writer.get_cardinality(num_docs),
+                        typ: ColumnType::Bytes,
+                    }
+                }
+                BytesOrNumerical::Numerical => {
+                    let NumericalColumnWriter { compatible_numerical_types, column_writer  } =
+                        self.numerical_field_hash_map.read(addr);
+                    let cardinality = column_writer.get_cardinality(num_docs);
+                    let numerical_type = compatible_numerical_types.to_numerical_type();
+                    serialize_numerical_column(
+                        cardinality,
+                        numerical_type,
+                        &column_writer,
+                        num_docs,
+                        arena,
+                        buffers,
+                        wrt,
+                    )?;
+                    ColumnTypeAndCardinality {
+                        cardinality,
+                        typ: ColumnType::Numerical(numerical_type),
+                    }
+                }
+            };
+            let end_offset = wrt.written_bytes();
+            let key_with_type = prepare_key(key, column_type_and_cardinality, &mut key_buffer);
+            serializer.record_column_offsets(key_with_type, start_offset..end_offset)?;
+        }
+        serializer.finalize()?;
+        Ok(())
+    }
+}
+
+/// Returns a key consisting of the concatenation of the key and the column_type_and_cardinality
+/// code.
+fn prepare_key<'a>(
+    key: &[u8],
+    column_type_cardinality: ColumnTypeAndCardinality,
+    buffer: &'a mut Vec<u8>,
+) -> &'a [u8] {
+    buffer.clear();
+    buffer.extend_from_slice(key);
+    buffer.push(0u8);
+    buffer.push(column_type_cardinality.to_code());
+    &buffer[..]
+}
+
+fn serialize_bytes_column<W: io::Write>(
+    column_writer: &ColumnWriter,
+    num_docs: DocId,
+    dictionary_builder: &DictionaryBuilder,
+    arena: &MemoryArena,
+    buffers: &mut SpareBuffers,
+    wrt: &mut CountingWriter<W>,
+) -> io::Result<()> {
+    let start_offset = wrt.written_bytes();
+    let id_mapping: IdMapping = dictionary_builder.serialize(wrt)?;
+    let dictionary_num_bytes: u32 = (wrt.written_bytes() - start_offset) as u32;
+    let cardinality = column_writer.get_cardinality(num_docs);
+    let SpareBuffers {
+        byte_buffer,
+        value_index_builders,
+        u64_values,
+        ..
+    } = buffers;
+    let symbol_iterator = column_writer
+        .symbol_iterator(arena, byte_buffer)
+        .map(|symbol: ColumnOperation<UnorderedId>| {
+            // We map unordered ids to ordered ids.
+            match symbol {
+                ColumnOperation::Value(unordered_id) => {
+                    let ordered_id = id_mapping.to_ord(unordered_id);
+                    ColumnOperation::Value(ordered_id.0 as u64)
+                }
+                ColumnOperation::NewDoc(doc) => ColumnOperation::NewDoc(doc),
+            }
+        });
+    serialize_column(
+        symbol_iterator,
+        cardinality,
+        num_docs,
+        value_index_builders,
+        u64_values,
+        wrt,
+    )?;
+    wrt.write_all(&dictionary_num_bytes.to_le_bytes()[..])?;
+    Ok(())
+}
+
+fn serialize_numerical_column<W: io::Write>(
+    cardinality: Cardinality,
+    numerical_type: NumericalType,
+    column_writer: &ColumnWriter,
+    num_docs: DocId,
+    arena: &MemoryArena,
+    buffers: &mut SpareBuffers,
+    wrt: &mut W,
+) -> io::Result<()> {
+    let SpareBuffers {
+        byte_buffer,
+        value_index_builders,
+        u64_values,
+        i64_values,
+        f64_values,
+    } = buffers;
+    let symbol_iterator = column_writer.symbol_iterator(arena, byte_buffer);
+    match numerical_type {
+        NumericalType::I64 => {
+            serialize_column(
+                coerce_numerical_symbol::<i64>(symbol_iterator),
+                cardinality,
+                num_docs,
+                value_index_builders,
+                i64_values,
+                wrt,
+            )?;
+        }
+        NumericalType::U64 => {
+            serialize_column(
+                coerce_numerical_symbol::<u64>(symbol_iterator),
+                cardinality,
+                num_docs,
+                value_index_builders,
+                u64_values,
+                wrt,
+            )?;
+        }
+        NumericalType::F64 => {
+            serialize_column(
+                coerce_numerical_symbol::<NotNan<f64>>(symbol_iterator),
+                cardinality,
+                num_docs,
+                value_index_builders,
+                f64_values,
+                wrt,
+            )?;
+        }
+    };
+    Ok(())
+}
+
+fn serialize_column<
+    T: Copy + Ord + Default + Send + Sync + MonotonicallyMappableToU64,
+    W: io::Write,
+>(
+    symbol_iterator: impl Iterator<Item = ColumnOperation<T>>,
+    cardinality: Cardinality,
+    num_docs: DocId,
+    value_index_builders: &mut SpareIndexBuilders,
+    values: &mut Vec<T>,
+    wrt: &mut W,
+) -> io::Result<()>
+where
+    for<'a> VecColumn<'a, T>: Column<T>,
+{
+    match cardinality {
+        Cardinality::Required => {
+            consume_symbol_iterator(
+                symbol_iterator,
+                value_index_builders.borrow_required_index_builder(),
+                values,
+            );
+            fastfield_codecs::serialize(
+                VecColumn::from(&values[..]),
+                wrt,
+                &fastfield_codecs::ALL_CODEC_TYPES[..],
+            )?;
+        }
+        Cardinality::Optional => {
+            let optional_index_builder = value_index_builders.borrow_optional_index_builder();
+            consume_symbol_iterator(symbol_iterator, optional_index_builder, values);
+            let optional_index = optional_index_builder.finish(num_docs);
+            fastfield_codecs::serialize::serialize_new(
+                ValueIndexInfo::SingleValue(Box::new(optional_index)),
+                VecColumn::from(&values[..]),
+                wrt,
+                &fastfield_codecs::ALL_CODEC_TYPES[..],
+            )?;
+        }
+        Cardinality::Multivalued => {
+            let multivalued_index_builder = value_index_builders.borrow_multivalued_index_builder();
+            consume_symbol_iterator(symbol_iterator, multivalued_index_builder, values);
+            let multivalued_index = multivalued_index_builder.finish(num_docs);
+            fastfield_codecs::serialize::serialize_new(
+                ValueIndexInfo::MultiValue(Box::new(multivalued_index)),
+                VecColumn::from(&values[..]),
+                wrt,
+                &fastfield_codecs::ALL_CODEC_TYPES[..],
+            )?;
+        }
+    }
+    Ok(())
+}
+
+fn coerce_numerical_symbol<T>(
+    symbol_iterator: impl Iterator<Item = ColumnOperation<NumericalValue>>,
+) -> impl Iterator<Item = ColumnOperation<T>>
+where T: Coerce {
+    symbol_iterator.map(|symbol| match symbol {
+        ColumnOperation::NewDoc(doc) => ColumnOperation::NewDoc(doc),
+        ColumnOperation::Value(numerical_value) => {
+            ColumnOperation::Value(Coerce::coerce(numerical_value))
+        }
+    })
+}
+
+fn consume_symbol_iterator<T, TIndexBuilder: IndexBuilder>(
+    symbol_iterator: impl Iterator<Item = ColumnOperation<T>>,
+    index_builder: &mut TIndexBuilder,
+    values: &mut Vec<T>,
+) {
+    for symbol in symbol_iterator {
+        match symbol {
+            ColumnOperation::NewDoc(doc) => {
+                index_builder.record_doc(doc);
+            }
+            ColumnOperation::Value(value) => {
+                index_builder.record_value();
+                values.push(value);
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use ordered_float::NotNan;
+    use stacker::MemoryArena;
+
+    use super::prepare_key;
+    use crate::column_type_header::{ColumnType, ColumnTypeAndCardinality};
+    use crate::value::{NumericalType, NumericalValue};
+    use crate::writer::column_operation::ColumnOperation;
+    use crate::writer::CompatibleNumericalTypes;
+    use crate::Cardinality;
+
+    #[test]
+    fn test_prepare_key_bytes() {
+        let mut buffer: Vec<u8> = b"somegarbage".to_vec();
+        let column_type_and_cardinality = ColumnTypeAndCardinality {
+            typ: ColumnType::Bytes,
+            cardinality: Cardinality::Optional,
+        };
+        let prepared_key = prepare_key(b"root\0child", column_type_and_cardinality, &mut buffer);
+        assert_eq!(prepared_key.len(), 12);
+        assert_eq!(&prepared_key[..10], b"root\0child");
+        assert_eq!(prepared_key[10], 0u8);
+        assert_eq!(prepared_key[11], column_type_and_cardinality.to_code());
+    }
+
+    #[test]
+    fn test_column_writer_required_simple() {
+        let mut arena = MemoryArena::default();
+        let mut column_writer = super::ColumnWriter::default();
+        column_writer.record(0u32, 14i64.into(), &mut arena);
+        column_writer.record(1u32, 15i64.into(), &mut arena);
+        column_writer.record(2u32, (-16i64).into(), &mut arena);
+        assert_eq!(column_writer.get_cardinality(3), Cardinality::Required);
+        let mut buffer = Vec::new();
+        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
+            .symbol_iterator(&mut arena, &mut buffer)
+            .collect();
+        assert_eq!(symbols.len(), 6);
+        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
+        assert!(matches!(
+            symbols[1],
+            ColumnOperation::Value(NumericalValue::I64(14i64))
+        ));
+        assert!(matches!(symbols[2], ColumnOperation::NewDoc(1u32)));
+        assert!(matches!(
+            symbols[3],
+            ColumnOperation::Value(NumericalValue::I64(15i64))
+        ));
+        assert!(matches!(symbols[4], ColumnOperation::NewDoc(2u32)));
+        assert!(matches!(
+            symbols[5],
+            ColumnOperation::Value(NumericalValue::I64(-16i64))
+        ));
+    }
+
+    #[test]
+    fn test_column_writer_optional_cardinality_missing_first() {
+        let mut arena = MemoryArena::default();
+        let mut column_writer = super::ColumnWriter::default();
+        column_writer.record(1u32, 15i64.into(), &mut arena);
+        column_writer.record(2u32, (-16i64).into(), &mut arena);
+        assert_eq!(column_writer.get_cardinality(3), Cardinality::Optional);
+        let mut buffer = Vec::new();
+        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
+            .symbol_iterator(&mut arena, &mut buffer)
+            .collect();
+        assert_eq!(symbols.len(), 4);
+        assert!(matches!(symbols[0], ColumnOperation::NewDoc(1u32)));
+        assert!(matches!(
+            symbols[1],
+            ColumnOperation::Value(NumericalValue::I64(15i64))
+        ));
+        assert!(matches!(symbols[2], ColumnOperation::NewDoc(2u32)));
+        assert!(matches!(
+            symbols[3],
+            ColumnOperation::Value(NumericalValue::I64(-16i64))
+        ));
+    }
+
+    #[test]
+    fn test_column_writer_optional_cardinality_missing_last() {
+        let mut arena = MemoryArena::default();
+        let mut column_writer = super::ColumnWriter::default();
+        column_writer.record(0u32, 15i64.into(), &mut arena);
+        assert_eq!(column_writer.get_cardinality(2), Cardinality::Optional);
+        let mut buffer = Vec::new();
+        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
+            .symbol_iterator(&mut arena, &mut buffer)
+            .collect();
+        assert_eq!(symbols.len(), 2);
+        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
+        assert!(matches!(
+            symbols[1],
+            ColumnOperation::Value(NumericalValue::I64(15i64))
+        ));
+    }
+
+    #[test]
+    fn test_column_writer_multivalued() {
+        let mut arena = MemoryArena::default();
+        let mut column_writer = super::ColumnWriter::default();
+        column_writer.record(0u32, 16i64.into(), &mut arena);
+        column_writer.record(0u32, 17i64.into(), &mut arena);
+        assert_eq!(column_writer.get_cardinality(1), Cardinality::Multivalued);
+        let mut buffer = Vec::new();
+        let symbols: Vec<ColumnOperation<NumericalValue>> = column_writer
+            .symbol_iterator(&mut arena, &mut buffer)
+            .collect();
+        assert_eq!(symbols.len(), 3);
+        assert!(matches!(symbols[0], ColumnOperation::NewDoc(0u32)));
+        assert!(matches!(
+            symbols[1],
+            ColumnOperation::Value(NumericalValue::I64(16i64))
+        ));
+        assert!(matches!(
+            symbols[2],
+            ColumnOperation::Value(NumericalValue::I64(17i64))
+        ));
+    }
+
+    #[track_caller]
+    fn test_column_writer_coercion_iter_aux(
+        values: impl Iterator<Item = NumericalValue>,
+        expected_numerical_type: NumericalType,
+    ) {
+        let mut compatible_numerical_types = CompatibleNumericalTypes::default();
+        for value in values {
+            compatible_numerical_types.accept_value(value);
+        }
+        assert_eq!(
+            compatible_numerical_types.to_numerical_type(),
+            expected_numerical_type
+        );
+    }
+
+    #[track_caller]
+    fn test_column_writer_coercion_aux(
+        values: &[NumericalValue],
+        expected_numerical_type: NumericalType,
+    ) {
+        test_column_writer_coercion_iter_aux(values.iter().copied(), expected_numerical_type);
+        test_column_writer_coercion_iter_aux(values.iter().rev().copied(), expected_numerical_type);
+    }
+
+    #[test]
+    fn test_column_writer_coercion() {
+        test_column_writer_coercion_aux(&[], NumericalType::I64);
+        test_column_writer_coercion_aux(&[1i64.into()], NumericalType::I64);
+        test_column_writer_coercion_aux(&[1u64.into()], NumericalType::I64);
+        // We don't detect exact integer at the moment. We could!
+        test_column_writer_coercion_aux(&[NotNan::new(1f64).unwrap().into()], NumericalType::F64);
+        test_column_writer_coercion_aux(&[u64::MAX.into()], NumericalType::U64);
+        test_column_writer_coercion_aux(&[(i64::MAX as u64).into()], NumericalType::U64);
+        test_column_writer_coercion_aux(&[(1u64 << 63).into()], NumericalType::U64);
+        test_column_writer_coercion_aux(&[1i64.into(), 1u64.into()], NumericalType::I64);
+        test_column_writer_coercion_aux(&[u64::MAX.into(), (-1i64).into()], NumericalType::F64);
+    }
+}
--- a/columnar/src/writer/value_index.rs
+++ b/columnar/src/writer/value_index.rs
@@ -0,0 +1,218 @@
+use fastfield_codecs::serialize::{MultiValueIndexInfo, SingleValueIndexInfo};
+
+use crate::DocId;
+
+/// The `IndexBuilder` interprets a sequence of
+/// calls of the form:
+/// (record_doc,record_value+)*
+/// and can then serialize the results into an index.
+///
+/// It has different implementation depending on whether the
+/// cardinality is required, optional, or multivalued.
+pub(crate) trait IndexBuilder {
+    fn record_doc(&mut self, doc: DocId);
+    #[inline]
+    fn record_value(&mut self) {}
+}
+
+/// The RequiredIndexBuilder does nothing.
+#[derive(Default)]
+pub struct RequiredIndexBuilder;
+
+impl IndexBuilder for RequiredIndexBuilder {
+    #[inline(always)]
+    fn record_doc(&mut self, _doc: DocId) {}
+}
+
+#[derive(Default)]
+pub struct OptionalIndexBuilder {
+    docs: Vec<DocId>,
+}
+
+struct SingleValueArrayIndex<'a> {
+    docs: &'a [DocId],
+    num_docs: DocId,
+}
+
+impl<'a> SingleValueIndexInfo for SingleValueArrayIndex<'a> {
+    fn num_vals(&self) -> u32 {
+        self.num_docs as u32
+    }
+
+    fn num_non_nulls(&self) -> u32 {
+        self.docs.len() as u32
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
+        Box::new(self.docs.iter().copied())
+    }
+}
+
+impl OptionalIndexBuilder {
+    pub fn finish(&mut self, num_docs: DocId) -> impl SingleValueIndexInfo + '_ {
+        debug_assert!(self
+            .docs
+            .last()
+            .copied()
+            .map(|last_doc| last_doc < num_docs)
+            .unwrap_or(true));
+        SingleValueArrayIndex {
+            docs: &self.docs[..],
+            num_docs,
+        }
+    }
+
+    fn reset(&mut self) {
+        self.docs.clear();
+    }
+}
+
+impl IndexBuilder for OptionalIndexBuilder {
+    #[inline(always)]
+    fn record_doc(&mut self, doc: DocId) {
+        debug_assert!(self
+            .docs
+            .last()
+            .copied()
+            .map(|prev_doc| doc > prev_doc)
+            .unwrap_or(true));
+        self.docs.push(doc);
+    }
+}
+
+#[derive(Default)]
+pub struct MultivaluedIndexBuilder {
+    // TODO should we switch to `start_offset`?
+    end_values: Vec<DocId>,
+    total_num_vals_seen: u32,
+}
+
+pub struct MultivaluedValueArrayIndex<'a> {
+    end_offsets: &'a [DocId],
+}
+
+impl<'a> MultiValueIndexInfo for MultivaluedValueArrayIndex<'a> {
+    fn num_docs(&self) -> u32 {
+        self.end_offsets.len() as u32
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.end_offsets.last().copied().unwrap_or(0u32)
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_> {
+        if self.end_offsets.is_empty() {
+            return Box::new(std::iter::empty());
+        }
+        let n = self.end_offsets.len();
+        Box::new(std::iter::once(0u32).chain(self.end_offsets[..n - 1].iter().copied()))
+    }
+}
+
+impl MultivaluedIndexBuilder {
+    pub fn finish(&mut self, num_docs: DocId) -> impl MultiValueIndexInfo + '_ {
+        self.end_values
+            .resize(num_docs as usize, self.total_num_vals_seen);
+        MultivaluedValueArrayIndex {
+            end_offsets: &self.end_values[..],
+        }
+    }
+
+    fn reset(&mut self) {
+        self.end_values.clear();
+        self.total_num_vals_seen = 0;
+    }
+}
+
+impl IndexBuilder for MultivaluedIndexBuilder {
+    fn record_doc(&mut self, doc: DocId) {
+        self.end_values
+            .resize(doc as usize, self.total_num_vals_seen);
+    }
+
+    fn record_value(&mut self) {
+        self.total_num_vals_seen += 1;
+    }
+}
+
+/// The `SpareIndexBuilders` is there to avoid allocating a
+/// new index builder for every single column.
+#[derive(Default)]
+pub struct SpareIndexBuilders {
+    required_index_builder: RequiredIndexBuilder,
+    optional_index_builder: OptionalIndexBuilder,
+    multivalued_index_builder: MultivaluedIndexBuilder,
+}
+
+impl SpareIndexBuilders {
+    pub fn borrow_required_index_builder(&mut self) -> &mut RequiredIndexBuilder {
+        &mut self.required_index_builder
+    }
+
+    pub fn borrow_optional_index_builder(&mut self) -> &mut OptionalIndexBuilder {
+        self.optional_index_builder.reset();
+        &mut self.optional_index_builder
+    }
+
+    pub fn borrow_multivalued_index_builder(&mut self) -> &mut MultivaluedIndexBuilder {
+        self.multivalued_index_builder.reset();
+        &mut self.multivalued_index_builder
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_optional_value_index_builder() {
+        let mut opt_value_index_builder = OptionalIndexBuilder::default();
+        opt_value_index_builder.record_doc(0u32);
+        opt_value_index_builder.record_value();
+        assert_eq!(
+            &opt_value_index_builder
+                .finish(1u32)
+                .iter()
+                .collect::<Vec<u32>>(),
+            &[0]
+        );
+        opt_value_index_builder.reset();
+        opt_value_index_builder.record_doc(1u32);
+        opt_value_index_builder.record_value();
+        assert_eq!(
+            &opt_value_index_builder
+                .finish(2u32)
+                .iter()
+                .collect::<Vec<u32>>(),
+            &[1]
+        );
+    }
+
+    #[test]
+    fn test_multivalued_value_index_builder() {
+        let mut multivalued_value_index_builder = MultivaluedIndexBuilder::default();
+        multivalued_value_index_builder.record_doc(1u32);
+        multivalued_value_index_builder.record_value();
+        multivalued_value_index_builder.record_value();
+        multivalued_value_index_builder.record_doc(2u32);
+        multivalued_value_index_builder.record_value();
+        assert_eq!(
+            multivalued_value_index_builder
+                .finish(4u32)
+                .iter()
+                .collect::<Vec<u32>>(),
+            vec![0, 0, 2, 3]
+        );
+        multivalued_value_index_builder.reset();
+        multivalued_value_index_builder.record_doc(2u32);
+        multivalued_value_index_builder.record_value();
+        multivalued_value_index_builder.record_value();
+        assert_eq!(
+            multivalued_value_index_builder
+                .finish(4u32)
+                .iter()
+                .collect::<Vec<u32>>(),
+            vec![0, 0, 0, 2]
+        );
+    }
+}
--- a/common/Cargo.toml
+++ b/common/Cargo.toml
@@ -1,16 +1,21 @@
 [package]
 name = "tantivy-common"
-version = "0.3.0"
+version = "0.5.0"
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2021"
 description = "common traits and utility functions used by multiple tantivy subcrates"
+documentation = "https://docs.rs/tantivy_common/"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
+

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
 byteorder = "1.4.3"
-ownedbytes = { version="0.3", path="../ownedbytes" }
+ownedbytes = { version= "0.5", path="../ownedbytes" }
+async-trait = "0.1"

 [dev-dependencies]
 proptest = "1.0.0"
--- a/common/src/bitset.rs
+++ b/common/src/bitset.rs
@@ -151,7 +151,7 @@ impl TinySet {
        if self.is_empty() {
            None
        } else {
-            let lowest = self.0.trailing_zeros() as u32;
+            let lowest = self.0.trailing_zeros();
            self.0 ^= TinySet::singleton(lowest).0;
            Some(lowest)
        }
@@ -259,11 +259,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len += if self.tinysets[higher as usize].insert_mut(lower) {
-            1
-        } else {
-            0
-        };
+        self.len += u64::from(self.tinysets[higher as usize].insert_mut(lower));
    }

    /// Inserts an element in the `BitSet`
@@ -272,11 +268,7 @@ impl BitSet {
        // we do not check saturated els.
        let higher = el / 64u32;
        let lower = el % 64u32;
-        self.len -= if self.tinysets[higher as usize].remove_mut(lower) {
-            1
-        } else {
-            0
-        };
+        self.len -= u64::from(self.tinysets[higher as usize].remove_mut(lower));
    }

    /// Returns true iff the elements is in the `BitSet`.
@@ -285,7 +277,7 @@ impl BitSet {
        self.tinyset(el / 64u32).contains(el % 64)
    }

-    /// Returns the first non-empty `TinySet` associated to a bucket lower
+    /// Returns the first non-empty `TinySet` associated with a bucket lower
    /// or greater than bucket.
    ///
    /// Reminder: the tiny set with the bucket `bucket`, represents the
@@ -429,7 +421,7 @@ mod tests {
            bitset.serialize(&mut out).unwrap();

            let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
-            assert_eq!(bitset.len() as usize, i as usize);
+            assert_eq!(bitset.len(), i as usize);
        }
    }

@@ -440,7 +432,7 @@ mod tests {
        bitset.serialize(&mut out).unwrap();

        let bitset = ReadOnlyBitSet::open(OwnedBytes::new(out));
-        assert_eq!(bitset.len() as usize, 64);
+        assert_eq!(bitset.len(), 64);
    }

    #[test]
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,23 +1,19 @@
-use std::ops::{Deref, Range};
-use std::sync::{Arc, Weak};
+use std::ops::{Deref, Range, RangeBounds};
+use std::sync::Arc;
 use std::{fmt, io};

 use async_trait::async_trait;
-use common::HasLen;
-use stable_deref_trait::StableDeref;
+use ownedbytes::{OwnedBytes, StableDeref};

-use crate::directory::OwnedBytes;
-
-pub type ArcBytes = Arc<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
-pub type WeakArcBytes = Weak<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
+use crate::HasLen;

 /// Objects that represents files sections in tantivy.
 ///
 /// By contract, whatever happens to the directory file, as long as a FileHandle
 /// is alive, the data associated with it cannot be altered or destroyed.
 ///
-/// The underlying behavior is therefore specific to the `Directory` that created it.
-/// Despite its name, a `FileSlice` may or may not directly map to an actual file
+/// The underlying behavior is therefore specific to the `Directory` that
+/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
 /// on the filesystem.

 #[async_trait]
@@ -27,13 +23,9 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + fmt::Debug {
    /// This method may panic if the range requested is invalid.
    fn read_bytes(&self, range: Range<usize>) -> io::Result<OwnedBytes>;

-    #[cfg(feature = "quickwit")]
    #[doc(hidden)]
-    async fn read_bytes_async(
-        &self,
-        _byte_range: Range<usize>,
-    ) -> crate::AsyncIoResult<OwnedBytes> {
-        Err(crate::error::AsyncIoError::AsyncUnsupported)
+    async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
+        self.read_bytes(byte_range)
    }
 }

@@ -45,7 +37,7 @@ impl FileHandle for &'static [u8] {
    }

    #[cfg(feature = "quickwit")]
-    async fn read_bytes_async(&self, byte_range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
+    async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
        Ok(self.read_bytes(byte_range)?)
    }
 }
@@ -73,6 +65,25 @@ impl fmt::Debug for FileSlice {
    }
 }

+#[inline]
+fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
+    let start: usize = orig_range.start
+        + match rel_range.start_bound().cloned() {
+            std::ops::Bound::Included(rel_start) => rel_start,
+            std::ops::Bound::Excluded(rel_start) => rel_start + 1,
+            std::ops::Bound::Unbounded => 0,
+        };
+    assert!(start <= orig_range.end);
+    let end: usize = match rel_range.end_bound().cloned() {
+        std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
+        std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
+        std::ops::Bound::Unbounded => orig_range.end,
+    };
+    assert!(end >= start);
+    assert!(end <= orig_range.end);
+    start..end
+}
+
 impl FileSlice {
    /// Wraps a FileHandle.
    pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
@@ -96,11 +107,11 @@ impl FileSlice {
    ///
    /// Panics if `byte_range.end` exceeds the filesize.
    #[must_use]
-    pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
-        assert!(byte_range.end <= self.len());
+    #[inline]
+    pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
        FileSlice {
            data: self.data.clone(),
-            range: self.range.start + byte_range.start..self.range.start + byte_range.end,
+            range: combine_ranges(self.range.clone(), byte_range),
        }
    }

@@ -112,7 +123,7 @@ impl FileSlice {

    /// Returns a `OwnedBytes` with all of the data in the `FileSlice`.
    ///
-    /// The behavior is strongly dependant on the implementation of the underlying
+    /// The behavior is strongly dependent on the implementation of the underlying
    /// `Directory` and the `FileSliceTrait` it creates.
    /// In particular, it is  up to the `Directory` implementation
    /// to handle caching if needed.
@@ -120,9 +131,8 @@ impl FileSlice {
        self.data.read_bytes(self.range.clone())
    }

-    #[cfg(feature = "quickwit")]
    #[doc(hidden)]
-    pub async fn read_bytes_async(&self) -> crate::AsyncIoResult<OwnedBytes> {
+    pub async fn read_bytes_async(&self) -> io::Result<OwnedBytes> {
        self.data.read_bytes_async(self.range.clone()).await
    }

@@ -140,12 +150,8 @@ impl FileSlice {
            .read_bytes(self.range.start + range.start..self.range.start + range.end)
    }

-    #[cfg(feature = "quickwit")]
    #[doc(hidden)]
-    pub async fn read_bytes_slice_async(
-        &self,
-        byte_range: Range<usize>,
-    ) -> crate::AsyncIoResult<OwnedBytes> {
+    pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
        assert!(
            self.range.start + byte_range.end <= self.range.end,
            "`to` exceeds the fileslice length"
@@ -208,7 +214,7 @@ impl FileHandle for FileSlice {
    }

    #[cfg(feature = "quickwit")]
-    async fn read_bytes_async(&self, byte_range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
+    async fn read_bytes_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
        self.read_bytes_slice_async(byte_range).await
    }
 }
@@ -226,7 +232,7 @@ impl FileHandle for OwnedBytes {
    }

    #[cfg(feature = "quickwit")]
-    async fn read_bytes_async(&self, range: Range<usize>) -> crate::AsyncIoResult<OwnedBytes> {
+    async fn read_bytes_async(&self, range: Range<usize>) -> io::Result<OwnedBytes> {
        let bytes = self.read_bytes(range)?;
        Ok(bytes)
    }
@@ -237,9 +243,9 @@ mod tests {
    use std::io;
    use std::sync::Arc;

-    use common::HasLen;
-
    use super::{FileHandle, FileSlice};
+    use crate::file_slice::combine_ranges;
+    use crate::HasLen;

    #[test]
    fn test_file_slice() -> io::Result<()> {
@@ -310,4 +316,18 @@ mod tests {
            b"bcd"
        );
    }
+
+    #[test]
+    fn test_combine_range() {
+        assert_eq!(combine_ranges(1..3, 0..1), 1..2);
+        assert_eq!(combine_ranges(1..3, 1..), 2..3);
+        assert_eq!(combine_ranges(1..4, ..2), 1..3);
+        assert_eq!(combine_ranges(3..10, 2..5), 5..8);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_combine_range_panics() {
+        let _ = combine_ranges(3..5, 1..4);
+    }
 }
--- a/common/src/lib.rs
+++ b/common/src/lib.rs
@@ -5,13 +5,17 @@ use std::ops::Deref;
 pub use byteorder::LittleEndian as Endianness;

 mod bitset;
+pub mod file_slice;
 mod serialize;
 mod vint;
 mod writer;
-
 pub use bitset::*;
+pub use ownedbytes::OwnedBytes;
 pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
-pub use vint::{read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt};
+pub use vint::{
+    deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
+    serialize_vint_u32, write_u32_vint, VInt, VIntU128,
+};
 pub use writer::{AntiCallToken, CountingWriter, TerminatingWrite};

 /// Has length trait
@@ -52,13 +56,13 @@ const HIGHEST_BIT: u64 = 1 << 63;
 /// to values over 2^63, and all values end up requiring 64 bits.
 ///
 /// # See also
-/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html).
+/// The reverse mapping is [`u64_to_i64()`].
 #[inline]
 pub fn i64_to_u64(val: i64) -> u64 {
    (val as u64) ^ HIGHEST_BIT
 }

-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+/// Reverse the mapping given by [`i64_to_u64()`].
 #[inline]
 pub fn u64_to_i64(val: u64) -> i64 {
    (val ^ HIGHEST_BIT) as i64
@@ -80,7 +84,7 @@ pub fn u64_to_i64(val: u64) -> i64 {
 /// explains the mapping in a clear manner.
 ///
 /// # See also
-/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+/// The reverse mapping is [`u64_to_f64()`].
 #[inline]
 pub fn f64_to_u64(val: f64) -> u64 {
    let bits = val.to_bits();
@@ -91,7 +95,7 @@ pub fn f64_to_u64(val: f64) -> u64 {
    }
 }

-/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+/// Reverse the mapping given by [`f64_to_u64()`].
 #[inline]
 pub fn u64_to_f64(val: u64) -> f64 {
    f64::from_bits(if val & HIGHEST_BIT != 0 {
--- a/common/src/serialize.rs
+++ b/common/src/serialize.rs
@@ -19,7 +19,7 @@ pub trait DeserializeFrom<T: BinarySerializable> {

 /// Implement deserialize from &[u8] for all types which implement BinarySerializable.
 ///
-/// TryFrom would actually be preferrable, but not possible because of the orphan
+/// TryFrom would actually be preferable, but not possible because of the orphan
 /// rules (not completely sure if this could be resolved)
 impl<T: BinarySerializable> DeserializeFrom<T> for &[u8] {
    fn deserialize(&mut self) -> io::Result<T> {
@@ -94,6 +94,20 @@ impl FixedSize for u32 {
    const SIZE_IN_BYTES: usize = 4;
 }

+impl BinarySerializable for u16 {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        writer.write_u16::<Endianness>(*self)
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<u16> {
+        reader.read_u16::<Endianness>()
+    }
+}
+
+impl FixedSize for u16 {
+    const SIZE_IN_BYTES: usize = 2;
+}
+
 impl BinarySerializable for u64 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_u64::<Endianness>(*self)
@@ -107,6 +121,19 @@ impl FixedSize for u64 {
    const SIZE_IN_BYTES: usize = 8;
 }

+impl BinarySerializable for u128 {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        writer.write_u128::<Endianness>(*self)
+    }
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        reader.read_u128::<Endianness>()
+    }
+}
+
+impl FixedSize for u128 {
+    const SIZE_IN_BYTES: usize = 16;
+}
+
 impl BinarySerializable for f32 {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
        writer.write_f32::<Endianness>(*self)
@@ -161,8 +188,7 @@ impl FixedSize for u8 {

 impl BinarySerializable for bool {
    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
-        let val = if *self { 1 } else { 0 };
-        writer.write_u8(val)
+        writer.write_u8(u8::from(*self))
    }
    fn deserialize<R: Read>(reader: &mut R) -> io::Result<bool> {
        let val = reader.read_u8()?;
--- a/common/src/vint.rs
+++ b/common/src/vint.rs
@@ -5,6 +5,75 @@ use byteorder::{ByteOrder, LittleEndian};

 use super::BinarySerializable;

+/// Variable int serializes a u128 number
+pub fn serialize_vint_u128(mut val: u128, output: &mut Vec<u8>) {
+    loop {
+        let next_byte: u8 = (val % 128u128) as u8;
+        val /= 128u128;
+        if val == 0 {
+            output.push(next_byte | STOP_BIT);
+            return;
+        } else {
+            output.push(next_byte);
+        }
+    }
+}
+
+/// Deserializes a u128 number
+///
+/// Returns the number and the slice after the vint
+pub fn deserialize_vint_u128(data: &[u8]) -> io::Result<(u128, &[u8])> {
+    let mut result = 0u128;
+    let mut shift = 0u64;
+    for i in 0..19 {
+        let b = data[i];
+        result |= u128::from(b % 128u8) << shift;
+        if b >= STOP_BIT {
+            return Ok((result, &data[i + 1..]));
+        }
+        shift += 7;
+    }
+    Err(io::Error::new(
+        io::ErrorKind::InvalidData,
+        "Failed to deserialize u128 vint",
+    ))
+}
+
+///   Wrapper over a `u128` that serializes as a variable int.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub struct VIntU128(pub u128);
+
+impl BinarySerializable for VIntU128 {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        let mut buffer = vec![];
+        serialize_vint_u128(self.0, &mut buffer);
+        writer.write_all(&buffer)
+    }
+
+    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+        let mut bytes = reader.bytes();
+        let mut result = 0u128;
+        let mut shift = 0u64;
+        loop {
+            match bytes.next() {
+                Some(Ok(b)) => {
+                    result |= u128::from(b % 128u8) << shift;
+                    if b >= STOP_BIT {
+                        return Ok(VIntU128(result));
+                    }
+                    shift += 7;
+                }
+                _ => {
+                    return Err(io::Error::new(
+                        io::ErrorKind::InvalidData,
+                        "Reach end of buffer while reading VInt",
+                    ));
+                }
+            }
+        }
+    }
+}
+
 ///   Wrapper over a `u64` that serializes as a variable int.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub struct VInt(pub u64);
@@ -88,7 +157,7 @@ fn vint_len(data: &[u8]) -> usize {
 /// If the buffer does not start by a valid
 /// vint payload
 pub fn read_u32_vint(data: &mut &[u8]) -> u32 {
-    let (result, vlen) = read_u32_vint_no_advance(*data);
+    let (result, vlen) = read_u32_vint_no_advance(data);
    *data = &data[vlen..];
    result
 }
@@ -176,6 +245,7 @@ impl BinarySerializable for VInt {
 mod tests {

    use super::{serialize_vint_u32, BinarySerializable, VInt};
+    use crate::vint::{deserialize_vint_u128, serialize_vint_u128, VIntU128};

    fn aux_test_vint(val: u64) {
        let mut v = [14u8; 10];
@@ -217,6 +287,26 @@ mod tests {
        assert_eq!(&buffer[..len_vint], res2, "array wrong for {}", val);
    }

+    fn aux_test_vint_u128(val: u128) {
+        let mut data = vec![];
+        serialize_vint_u128(val, &mut data);
+        let (deser_val, _data) = deserialize_vint_u128(&data).unwrap();
+        assert_eq!(val, deser_val);
+
+        let mut out = vec![];
+        VIntU128(val).serialize(&mut out).unwrap();
+        let deser_val = VIntU128::deserialize(&mut &out[..]).unwrap();
+        assert_eq!(val, deser_val.0);
+    }
+
+    #[test]
+    fn test_vint_u128() {
+        aux_test_vint_u128(0);
+        aux_test_vint_u128(1);
+        aux_test_vint_u128(u128::MAX / 3);
+        aux_test_vint_u128(u128::MAX);
+    }
+
    #[test]
    fn test_vint_u32() {
        aux_test_serialize_vint_u32(0);
--- a/common/src/writer.rs
+++ b/common/src/writer.rs
@@ -55,14 +55,14 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
 }

 /// Struct used to prevent from calling
-/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
+/// [`terminate_ref`](TerminatingWrite::terminate_ref) directly
 ///
 /// The point is that while the type is public, it cannot be built by anyone
 /// outside of this module.
 pub struct AntiCallToken(());

 /// Trait used to indicate when no more write need to be done on a writer
-pub trait TerminatingWrite: Write + Send {
+pub trait TerminatingWrite: Write + Send + Sync {
    /// Indicate that the writer will no longer be used. Internally call terminate_ref.
    fn terminate(mut self) -> io::Result<()>
    where Self: Sized {
--- a/doc/assets/images/etsy.png
+++ b/doc/assets/images/etsy.png
--- a/doc/src/SUMMARY.md
+++ b/doc/src/SUMMARY.md
@@ -1,7 +1,5 @@
 # Summary

-
-
 [Avant Propos](./avant-propos.md)

 - [Segments](./basis.md)
--- a/doc/src/avant-propos.md
+++ b/doc/src/avant-propos.md
@@ -3,7 +3,7 @@
 > Tantivy is a **search** engine **library** for Rust.

 If you are familiar with Lucene, it's an excellent approximation to consider tantivy as Lucene for rust. tantivy is heavily inspired by Lucene's design and
-they both have the same scope and targetted use cases.
+they both have the same scope and targeted use cases.

 If you are not familiar with Lucene, let's break down our little tagline.

@@ -31,4 +31,4 @@ relevancy, collapsing, highlighting, spatial search.
  index from a different format.

  Tantivy exposes a lot of low level API to do all of these things.
-  
+  
--- a/doc/src/basis.md
+++ b/doc/src/basis.md
@@ -11,7 +11,7 @@ directory shipped with tantivy is the `MmapDirectory`.
 While this design has some downsides, this greatly simplifies the source code of
 tantivy. Caching is also entirely delegated to the OS.

-`tantivy` works entirely (or almost) by directly reading the datastructures as they are layed on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.
+`tantivy` works entirely (or almost) by directly reading the datastructures as they are laid on disk. As a result, the act of opening an indexing does not involve loading different datastructures from the disk into random access memory : starting a process, opening an index, and performing your first query can typically be done in a matter of milliseconds.

 This is an interesting property for a command line search engine, or for some multi-tenant log search engine : spawning a new process for each new query can be a perfectly sensible solution in some use case.

@@ -22,7 +22,6 @@ Of course this is crucial to reduce IO, and ensure that as much of our index can
 Also, whenever possible its data is accessed sequentially. Of course, this is an amazing property when tantivy needs to access the data from your spinning hard disk, but this is also
 critical for performance, if your data is read from and an `SSD` or even already in your pagecache.

-
 ## Segments, and the log method

 That kind of compact layout comes at one cost: it prevents our datastructures from being dynamic.
@@ -51,13 +50,9 @@ to get tantivy to fit your use case:

 *Example 1* You could for instance use hadoop to build a very large search index in a timely manner, copy all of the resulting segment files in the same directory and edit the `meta.json` to get a functional index.[^2]

-*Example 2* You could also disable your merge policy and enforce daily segments. Removing data after one week can then be done very efficiently by just editing the `meta.json` and deleting the files associated to segment `D-7`.
+*Example 2* You could also disable your merge policy and enforce daily segments. Removing data after one week can then be done very efficiently by just editing the `meta.json` and deleting the files associated with segment `D-7`.

-
-
-
-
-# Merging
+## Merging

 As you index more and more data, your index will accumulate more and more segments.
 Having a lot of small segments is not really optimal. There is a bit of redundancy in having
@@ -66,11 +61,7 @@ all these term dictionary. Also when searching, we will need to do term lookups
 That's where merging or compacting comes into place. Tantivy will continuously consider merge
 opportunities and start merging segments in the background.

-
-# Indexing throughput, number of indexing threads
-
-
-
+## Indexing throughput, number of indexing threads

 [^1]: This may eventually change.

--- a/doc/src/examples.md
+++ b/doc/src/examples.md
@@ -1,3 +1,3 @@
 # Examples

- [Basic search](/examples/basic_search.html)
+- [Basic search](/examples/basic_search.html)
--- a/doc/src/index_sorting.md
+++ b/doc/src/index_sorting.md
@@ -1,11 +1,11 @@

 - [Index Sorting](#index-sorting)
-    + [Why Sorting](#why-sorting)
-        * [Compression](#compression)
-        * [Top-N Optimization](#top-n-optimization)
-        * [Pruning](#pruning)
-        * [Other](#other)
-    + [Usage](#usage)
+  - [Why Sorting](#why-sorting)
+    - [Compression](#compression)
+    - [Top-N Optimization](#top-n-optimization)
+    - [Pruning](#pruning)
+    - [Other](#other)
+  - [Usage](#usage)

 # Index Sorting

@@ -15,32 +15,34 @@ Tantivy allows you to sort the index according to a property.

 Presorting an index has several advantages:

-###### Compression
+### Compression

-When data is sorted it is easier to compress the data. E.g. the numbers sequence [5, 2, 3, 1, 4] would be sorted to [1, 2, 3, 4, 5]. 
+When data is sorted it is easier to compress the data. E.g. the numbers sequence [5, 2, 3, 1, 4] would be sorted to [1, 2, 3, 4, 5].
 If we apply delta encoding this list would be unsorted [5, -3, 1, -2, 3] vs. [1, 1, 1, 1, 1].
-Compression ratio is mainly affected on the fast field of the sorted property, every thing else is likely unaffected. 
-###### Top-N Optimization
+Compression ratio is mainly affected on the fast field of the sorted property, every thing else is likely unaffected.

-When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents. 
+### Top-N Optimization
+
+When data is presorted by a field and search queries request sorting by the same field, we can leverage the natural order of the documents.
 E.g. if the data is sorted by timestamp and want the top n newest docs containing a term, we can simply leveraging the order of the docids.

 Note: Tantivy 0.16 does not do this optimization yet.

-###### Pruning
+### Pruning

 Let's say we want all documents and want to apply the filter `>= 2010-08-11`. When the data is sorted, we could make a lookup in the fast field to find the docid range and use this as the filter.

 Note: Tantivy 0.16 does not do this optimization yet.

-###### Other?
+### Other?

 In principle there are many algorithms possible that exploit the monotonically increasing nature. (aggregations maybe?)

 ## Usage
+
 The index sorting can be configured setting [`sort_by_field`](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/core/index_meta.rs#L238) on `IndexSettings` and passing it to a `IndexBuilder`. As of Tantivy 0.16 only fast fields are allowed to be used.

-```
+```rust
 let settings = IndexSettings {
    sort_by_field: Some(IndexSortByField {
        field: "intval".to_string(),
@@ -58,4 +60,3 @@ let index = index_builder.create_in_ram().unwrap();
 Sorting an index is applied in the serialization step. In general there are two serialization steps: [Finishing a single segment](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/segment_writer.rs#L338) and [merging multiple segments](https://github.com/quickwit-oss/tantivy/blob/000d76b11a139a84b16b9b95060a1c93e8b9851c/src/indexer/merger.rs#L1073).

 In both cases we generate a docid mapping reflecting the sort. This mapping is used when serializing the different components (doc store, fastfields, posting list, normfield, facets).
-
--- a/doc/src/json.md
+++ b/doc/src/json.md
@@ -21,16 +21,17 @@ For instance,  if user is a json field, the following document:
 ```

 emits the following tokens:
-  ("name", Text, "Paul")
-  ("name", Text, "Masurel")
-  ("address.city", Text, "Tokyo")
-  ("address.country", Text, "Japan")
-  ("created_at", Date, 15420648505)

+- ("name", Text, "Paul")
+- ("name", Text, "Masurel")
+- ("address.city", Text, "Tokyo")
+- ("address.country", Text, "Japan")
+- ("created_at", Date, 15420648505)

-# Bytes-encoding and lexicographical sort.
+## Bytes-encoding and lexicographical sort

 Like any other terms, these triplets are encoded into a binary format as follows.
+
 - `json_path`: the json path is a sequence of "segments". In the example above, `address.city`
 is just a debug representation of the json path `["address", "city"]`.
 Its representation is done by separating segments by a unicode char `\x01`, and ending the path by `\x00`.
@@ -41,16 +42,16 @@ This representation is designed to align the natural sort of Terms with the lexi
 of their binary representation (Tantivy's dictionary (whether fst or sstable) is sorted and does prefix encoding).

 In the example above, the terms will be sorted as
-  ("address.city", Text, "Tokyo")
-  ("address.country", Text, "Japan")
-  ("name", Text, "Masurel")
-  ("name", Text, "Paul")
-  ("created_at", Date, 15420648505)
+
+- ("address.city", Text, "Tokyo")
+- ("address.country", Text, "Japan")
+- ("name", Text, "Masurel")
+- ("name", Text, "Paul")
+- ("created_at", Date, 15420648505)

 As seen in "pitfalls", we may end up having to search for a value for a same path in several different fields. Putting the field code after the path makes it maximizes compression opportunities but also increases the chances for the two terms to end up in the actual same term dictionary block.

-
-# Pitfalls, limitation and corner cases.
+## Pitfalls, limitation and corner cases

 Json gives very little information about the type of the literals it stores.
 All numeric types end up mapped as a "Number" and there are no types for dates.
@@ -70,19 +71,21 @@ For instance, we do not even know if the type is a number or string based.

 So the query

-```
+```rust
 my_path.my_segment:233
 ```

 Will be interpreted as
-`(my_path.my_segment, String, 233) or (my_path.my_segment, u64, 233)`
+
+```rust
+(my_path.my_segment, String, 233) or (my_path.my_segment, u64, 233)
+```

 Likewise, we need to emit two tokens if the query contains an rfc3999 date.
 Indeed the date could have been actually a single token inside the text of a document at ingestion time. Generally speaking, we will always at least emit a string token in query parsing, and sometimes more.

 If one more json field is defined, things get even more complicated.

-
 ## Default json field

 If the schema contains a text field called "text" and a json field that is set as a default field:
@@ -96,11 +99,11 @@ This is a product decision.
 The user can still target the JSON field by specifying its name explicitly:
 `json_dynamic.text:hello`.

-## Range queries are not supported.
+## Range queries are not supported

 Json field do not support range queries.

-## Arrays do not work like nested object.
+## Arrays do not work like nested object

 If json object contains an array, a search query might return more documents
 than what might be expected.
@@ -120,9 +123,8 @@ Let's take an example.
 Despite the array structure, a document in tantivy is a bag of terms.
 The query:

-```
+```rust
 cart.product_type:sneakers AND cart.attributes.color:red
 ```

 Actually match the document above.
-
--- a/examples/aggregation.rs
+++ b/examples/aggregation.rs
@@ -118,7 +118,7 @@ fn main() -> tantivy::Result<()> {
    .into_iter()
    .collect();

-    let collector = AggregationCollector::from_aggs(agg_req_1, None);
+    let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

    let searcher = reader.searcher();
    let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -7,10 +7,12 @@
 // Of course, you can have a look at the tantivy's built-in collectors
 // such as the `CountCollector` for more examples.

+use std::sync::Arc;
+
+use fastfield_codecs::Column;
 // ---
 // Importing tantivy...
 use tantivy::collector::{Collector, SegmentCollector};
-use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
 use tantivy::query::QueryParser;
 use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
 use tantivy::{doc, Index, Score, SegmentReader};
@@ -95,7 +97,7 @@ impl Collector for StatsCollector {
 }

 struct StatsSegmentCollector {
-    fast_field_reader: DynamicFastFieldReader<u64>,
+    fast_field_reader: Arc<dyn Column<u64>>,
    stats: Stats,
 }

@@ -103,7 +105,7 @@ impl SegmentCollector for StatsSegmentCollector {
    type Fruit = Option<Stats>;

    fn collect(&mut self, doc: u32, _score: Score) {
-        let value = self.fast_field_reader.get(doc) as f64;
+        let value = self.fast_field_reader.get_val(doc) as f64;
        self.stats.count += 1;
        self.stats.sum += value;
        self.stats.squared_sum += value * value;
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -36,8 +36,7 @@ fn main() -> tantivy::Result<()> {
    // need to be able to be able to retrieve it
    // for our application.
    //
-    // We can make our index lighter and
-    // by omitting `STORED` flag.
+    // We can make our index lighter by omitting the `STORED` flag.
    let body = schema_builder.add_text_field("body", TEXT);

    let schema = schema_builder.build();
@@ -50,7 +49,7 @@ fn main() -> tantivy::Result<()> {
    // for your unit tests... Or this example.
    let index = Index::create_in_ram(schema.clone());

-    // here we are registering our custome tokenizer
+    // here we are registering our custom tokenizer
    // this will store tokens of 3 characters each
    index
        .tokenizers()
--- a/examples/deleting_updating_documents.rs
+++ b/examples/deleting_updating_documents.rs
@@ -113,7 +113,7 @@ fn main() -> tantivy::Result<()> {
    // on its id.
    //
    // Note that `tantivy` does nothing to enforce the idea that
-    // there is only one document associated to this id.
+    // there is only one document associated with this id.
    //
    // Also you might have noticed that we apply the delete before
    // having committed. This does not matter really...
--- a/examples/iterating_docs_and_positions.rs
+++ b/examples/iterating_docs_and_positions.rs
@@ -44,7 +44,7 @@ fn main() -> tantivy::Result<()> {
        // A segment contains different data structure.
        // Inverted index stands for the combination of
        // - the term dictionary
-        // - the inverted lists associated to each terms and their positions
+        // - the inverted lists associated with each terms and their positions
        let inverted_index = segment_reader.inverted_index(title)?;

        // A `Term` is a text token associated with a field.
@@ -105,7 +105,7 @@ fn main() -> tantivy::Result<()> {
        // A segment contains different data structure.
        // Inverted index stands for the combination of
        // - the term dictionary
-        // - the inverted lists associated to each terms and their positions
+        // - the inverted lists associated with each terms and their positions
        let inverted_index = segment_reader.inverted_index(title)?;

        // This segment posting object is like a cursor over the documents matching the term.
--- a/examples/warmer.rs
+++ b/examples/warmer.rs
@@ -3,7 +3,6 @@ use std::collections::{HashMap, HashSet};
 use std::sync::{Arc, RwLock, Weak};

 use tantivy::collector::TopDocs;
-use tantivy::fastfield::FastFieldReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Field, Schema, FAST, TEXT};
 use tantivy::{
@@ -52,7 +51,7 @@ impl Warmer for DynamicPriceColumn {
            let product_id_reader = segment.fast_fields().u64(self.field)?;
            let product_ids: Vec<ProductId> = segment
                .doc_ids_alive()
-                .map(|doc| product_id_reader.get(doc))
+                .map(|doc| product_id_reader.get_val(doc))
                .collect();
            let mut prices_it = self.price_fetcher.fetch_prices(&product_ids).into_iter();
            let mut price_vals: Vec<Price> = Vec::new();
--- a/fastfield_codecs/Cargo.toml
+++ b/fastfield_codecs/Cargo.toml
@@ -1,24 +1,35 @@
 [package]
 name = "fastfield_codecs"
-version = "0.2.0"
+version = "0.3.0"
 authors = ["Pascal Seitz <pascal@quickwit.io>"]
 license = "MIT"
 edition = "2021"
 description = "Fast field codecs used by tantivy"
+documentation = "https://docs.rs/fastfield_codecs/"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
-common = { version = "0.3", path = "../common/", package = "tantivy-common" }
-tantivy-bitpacker = { version="0.2", path = "../bitpacker/" }
-prettytable-rs = {version="0.8.0", optional= true}
+common = { version = "0.5", path = "../common/", package = "tantivy-common" }
+tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
+ownedbytes = { version = "0.5", path = "../ownedbytes" }
+prettytable-rs = {version="0.9.0", optional= true}
 rand = {version="0.8.3", optional= true}
+fastdivide = "0.4"
+log = "0.4"
+itertools = { version = "0.10.3" }
+measure_time = { version="0.8.2", optional=true}
+ordered-float = "3.4"

 [dev-dependencies]
 more-asserts = "0.3.0"
+proptest = "1.0.0"
 rand = "0.8.3"

 [features]
-bin = ["prettytable-rs", "rand"]
+bin = ["prettytable-rs", "rand", "measure_time"]
 default = ["bin"]
+unstable = []

--- a/fastfield_codecs/benches/bench.rs
+++ b/fastfield_codecs/benches/bench.rs
@@ -4,105 +4,243 @@ extern crate test;

 #[cfg(test)]
 mod tests {
-    use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
-    use fastfield_codecs::linearinterpol::{
-        LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
-    };
-    use fastfield_codecs::multilinearinterpol::{
-        MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
-    };
-    use fastfield_codecs::*;
+    use std::iter;
+    use std::sync::Arc;

-    fn get_data() -> Vec<u64> {
-        let mut data: Vec<_> = (100..55000_u64)
-            .map(|num| num + rand::random::<u8>() as u64)
+    use fastfield_codecs::*;
+    use ownedbytes::OwnedBytes;
+    use rand::prelude::*;
+    use test::Bencher;
+
+    use super::*;
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    fn generate_random() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (0u64..100_000u64)
+            .map(|el| el + random::<u16>() as u64)
            .collect();
-        data.push(99_000);
-        data.insert(1000, 2000);
-        data.insert(2000, 100);
-        data.insert(3000, 4100);
-        data.insert(4000, 100);
-        data.insert(5000, 800);
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    // Warning: this generates the same permutation at each call
+    fn generate_permutation_gcd() -> Vec<u64> {
+        let mut permutation: Vec<u64> = (1u64..100_000u64).map(|el| el * 1000).collect();
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        permutation
+    }
+
+    pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
+        column: &[T],
+    ) -> Arc<dyn Column<T>> {
+        let mut buffer = Vec::new();
+        serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
+        open(OwnedBytes::new(buffer)).unwrap()
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_veclookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = permutation[a as usize];
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_jumpy_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for _ in 0..n {
+                a = column.get_val(a as u32);
+            }
+            a
+        });
+    }
+
+    fn get_exp_data() -> Vec<u64> {
+        let mut data = vec![];
+        for i in 0..100 {
+            let num = i * i;
+            data.extend(iter::repeat(i as u64).take(num));
+        }
+        data.shuffle(&mut StdRng::from_seed([1u8; 32]));
+
+        // lengt = 328350
        data
    }

-    fn value_iter() -> impl Iterator<Item = u64> {
-        0..20_000
+    fn get_data_50percent_item() -> (u128, u128, Vec<u128>) {
+        let mut permutation = get_exp_data();
+        let major_item = 20;
+        let minor_item = 10;
+        permutation.extend(iter::repeat(major_item).take(permutation.len()));
+        permutation.shuffle(&mut StdRng::from_seed([1u8; 32]));
+        let permutation = permutation.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        (major_item as u128, minor_item as u128, permutation)
    }
-    fn bench_get<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
-        b: &mut Bencher,
-        data: &[u64],
-    ) {
-        let mut bytes = vec![];
-        S::serialize(
-            &mut bytes,
-            &data,
-            stats_from_vec(data),
-            data.iter().cloned(),
-            data.iter().cloned(),
-        )
-        .unwrap();
-        let reader = R::open_from_bytes(&bytes).unwrap();
-        b.iter(|| {
-            for pos in value_iter() {
-                reader.get_u64(pos as u64, &bytes);
-            }
-        });
+    fn get_u128_column_random() -> Arc<dyn Column<u128>> {
+        let permutation = generate_random();
+        let permutation = permutation.iter().map(|el| *el as u128).collect::<Vec<_>>();
+        get_u128_column_from_data(&permutation)
    }
-    fn bench_create<S: FastFieldCodecSerializer>(b: &mut Bencher, data: &[u64]) {
-        let mut bytes = vec![];
+
+    fn get_u128_column_from_data(data: &[u128]) -> Arc<dyn Column<u128>> {
+        let mut out = vec![];
+        let iter_gen = || data.iter().cloned();
+        serialize_u128(iter_gen, data.len() as u32, &mut out).unwrap();
+        let out = OwnedBytes::new(out);
+        open_u128::<u128>(out).unwrap()
+    }
+
+    #[bench]
+    fn bench_intfastfield_getrange_u128_50percent_hit(b: &mut Bencher) {
+        let (major_item, _minor_item, data) = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
        b.iter(|| {
-            S::serialize(
-                &mut bytes,
-                &data,
-                stats_from_vec(data),
-                data.iter().cloned(),
-                data.iter().cloned(),
-            )
-            .unwrap();
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                major_item..=major_item,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
        });
    }

-    use test::Bencher;
    #[bench]
-    fn bench_fastfield_bitpack_create(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_create::<BitpackedFastFieldSerializer>(b, &data);
+    fn bench_intfastfield_getrange_u128_single_hit(b: &mut Bencher) {
+        let (_major_item, minor_item, data) = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(
+                minor_item..=minor_item,
+                0..data.len() as u32,
+                &mut positions,
+            );
+            positions
+        });
    }
+
    #[bench]
-    fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_create::<LinearInterpolFastFieldSerializer>(b, &data);
+    fn bench_intfastfield_getrange_u128_hit_all(b: &mut Bencher) {
+        let (_major_item, _minor_item, data) = get_data_50percent_item();
+        let column = get_u128_column_from_data(&data);
+
+        b.iter(|| {
+            let mut positions = Vec::new();
+            column.get_docids_for_value_range(0..=u128::MAX, 0..data.len() as u32, &mut positions);
+            positions
+        });
    }
+
    #[bench]
-    fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_create::<MultiLinearInterpolFastFieldSerializer>(b, &data);
+    fn bench_intfastfield_scan_all_fflookup_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let mut a = 0u128;
+            for i in 0u64..column.num_vals() as u64 {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
    }
+
    #[bench]
-    fn bench_fastfield_bitpack_get(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_get::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(b, &data);
+    fn bench_intfastfield_jumpy_stride5_u128(b: &mut Bencher) {
+        let column = get_u128_column_random();
+
+        b.iter(|| {
+            let n = column.num_vals();
+            let mut a = 0u128;
+            for i in (0..n / 5).map(|val| val * 5) {
+                a += column.get_val(i);
+            }
+            a
+        });
    }
+
    #[bench]
-    fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_get::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>(b, &data);
+    fn bench_intfastfield_stride7_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += permutation[i as usize];
+            }
+            a
+        });
    }
+
    #[bench]
-    fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
-        let data: Vec<_> = get_data();
-        bench_get::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>(
-            b, &data,
-        );
+    fn bench_intfastfield_stride7_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0;
+            for i in (0..n / 7).map(|val| val * 7) {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
    }
-    pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
-        let min_value = data.iter().cloned().min().unwrap_or(0);
-        let max_value = data.iter().cloned().max().unwrap_or(0);
-        FastFieldStats {
-            min_value,
-            max_value,
-            num_vals: data.len() as u64,
-        }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0u32..n as u32 {
+                a += column.get_val(i);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_fflookup_gcd(b: &mut Bencher) {
+        let permutation = generate_permutation_gcd();
+        let n = permutation.len();
+        let column: Arc<dyn Column<u64>> = serialize_and_load(&permutation);
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..n {
+                a += column.get_val(i as u32);
+            }
+            a
+        });
+    }
+
+    #[bench]
+    fn bench_intfastfield_scan_all_vec(b: &mut Bencher) {
+        let permutation = generate_permutation();
+        b.iter(|| {
+            let mut a = 0u64;
+            for i in 0..permutation.len() {
+                a += permutation[i as usize] as u64;
+            }
+            a
+        });
    }
 }
--- a/fastfield_codecs/src/bitpacked.rs
+++ b/fastfield_codecs/src/bitpacked.rs
@@ -1,155 +1,99 @@
 use std::io::{self, Write};

-use common::BinarySerializable;
+use ownedbytes::OwnedBytes;
 use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

-use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
+use crate::serialize::NormalizedHeader;
+use crate::{Column, FastFieldCodec, FastFieldCodecType};

 /// Depending on the field type, a different
 /// fast field is required.
 #[derive(Clone)]
-pub struct BitpackedFastFieldReader {
+pub struct BitpackedReader {
+    data: OwnedBytes,
    bit_unpacker: BitUnpacker,
-    pub min_value_u64: u64,
-    pub max_value_u64: u64,
+    normalized_header: NormalizedHeader,
 }

-impl FastFieldCodecReader for BitpackedFastFieldReader {
-    /// Opens a fast field given a file.
-    fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
-        let (_data, mut footer) = bytes.split_at(bytes.len() - 16);
-        let min_value = u64::deserialize(&mut footer)?;
-        let amplitude = u64::deserialize(&mut footer)?;
-        let max_value = min_value + amplitude;
-        let num_bits = compute_num_bits(amplitude);
-        let bit_unpacker = BitUnpacker::new(num_bits);
-        Ok(BitpackedFastFieldReader {
-            min_value_u64: min_value,
-            max_value_u64: max_value,
-            bit_unpacker,
-        })
-    }
+impl Column for BitpackedReader {
    #[inline]
-    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
-        self.min_value_u64 + self.bit_unpacker.get(doc, data)
+    fn get_val(&self, doc: u32) -> u64 {
+        self.bit_unpacker.get(doc, &self.data)
    }
    #[inline]
    fn min_value(&self) -> u64 {
-        self.min_value_u64
+        // The BitpackedReader assumes a normalized vector.
+        0
    }
    #[inline]
    fn max_value(&self) -> u64 {
-        self.max_value_u64
+        self.normalized_header.max_value
+    }
+    #[inline]
+    fn num_vals(&self) -> u32 {
+        self.normalized_header.num_vals
    }
 }
-pub struct BitpackedFastFieldSerializerLegacy<'a, W: 'a + Write> {
-    bit_packer: BitPacker,
-    write: &'a mut W,
-    min_value: u64,
-    amplitude: u64,
-    num_bits: u8,
-}

-impl<'a, W: Write> BitpackedFastFieldSerializerLegacy<'a, W> {
-    /// Creates a new fast field serializer.
-    ///
-    /// The serializer in fact encode the values by bitpacking
-    /// `(val - min_value)`.
-    ///
-    /// It requires a `min_value` and a `max_value` to compute
-    /// compute the minimum number of bits required to encode
-    /// values.
-    pub fn open(
-        write: &'a mut W,
-        min_value: u64,
-        max_value: u64,
-    ) -> io::Result<BitpackedFastFieldSerializerLegacy<'a, W>> {
-        assert!(min_value <= max_value);
-        let amplitude = max_value - min_value;
-        let num_bits = compute_num_bits(amplitude);
-        let bit_packer = BitPacker::new();
-        Ok(BitpackedFastFieldSerializerLegacy {
-            bit_packer,
-            write,
-            min_value,
-            amplitude,
-            num_bits,
+pub struct BitpackedCodec;
+
+impl FastFieldCodec for BitpackedCodec {
+    /// The CODEC_TYPE is an enum value used for serialization.
+    const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Bitpacked;
+
+    type Reader = BitpackedReader;
+
+    /// Opens a fast field given a file.
+    fn open_from_bytes(
+        data: OwnedBytes,
+        normalized_header: NormalizedHeader,
+    ) -> io::Result<Self::Reader> {
+        let num_bits = compute_num_bits(normalized_header.max_value);
+        let bit_unpacker = BitUnpacker::new(num_bits);
+        Ok(BitpackedReader {
+            data,
+            bit_unpacker,
+            normalized_header,
        })
    }
-    /// Pushes a new value to the currently open u64 fast field.
-    #[inline]
-    pub fn add_val(&mut self, val: u64) -> io::Result<()> {
-        let val_to_write: u64 = val - self.min_value;
-        self.bit_packer
-            .write(val_to_write, self.num_bits, &mut self.write)?;
-        Ok(())
-    }
-    pub fn close_field(mut self) -> io::Result<()> {
-        self.bit_packer.close(&mut self.write)?;
-        self.min_value.serialize(&mut self.write)?;
-        self.amplitude.serialize(&mut self.write)?;
-        Ok(())
-    }
-}

-pub struct BitpackedFastFieldSerializer {}
-
-impl FastFieldCodecSerializer for BitpackedFastFieldSerializer {
-    const NAME: &'static str = "Bitpacked";
-    const ID: u8 = 1;
    /// Serializes data with the BitpackedFastFieldSerializer.
    ///
-    /// The serializer in fact encode the values by bitpacking
-    /// `(val - min_value)`.
+    /// The bitpacker assumes that the column has been normalized.
+    /// i.e. It has already been shifted by its minimum value, so that its
+    /// current minimum value is 0.
    ///
-    /// It requires a `min_value` and a `max_value` to compute
-    /// compute the minimum number of bits required to encode
-    /// values.
-    fn serialize(
-        write: &mut impl Write,
-        _fastfield_accessor: &dyn FastFieldDataAccess,
-        stats: FastFieldStats,
-        data_iter: impl Iterator<Item = u64>,
-        _data_iter1: impl Iterator<Item = u64>,
-    ) -> io::Result<()> {
-        let mut serializer =
-            BitpackedFastFieldSerializerLegacy::open(write, stats.min_value, stats.max_value)?;
-
-        for val in data_iter {
-            serializer.add_val(val)?;
+    /// Ideally, we made a shift upstream on the column so that `col.min_value() == 0`.
+    fn serialize(column: &dyn Column, write: &mut impl Write) -> io::Result<()> {
+        assert_eq!(column.min_value(), 0u64);
+        let num_bits = compute_num_bits(column.max_value());
+        let mut bit_packer = BitPacker::new();
+        for val in column.iter() {
+            bit_packer.write(val, num_bits, write)?;
        }
-        serializer.close_field()?;
-
+        bit_packer.close(write)?;
        Ok(())
    }
-    fn is_applicable(
-        _fastfield_accessor: &impl FastFieldDataAccess,
-        _stats: FastFieldStats,
-    ) -> bool {
-        true
-    }
-    fn estimate(_fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
-        let amplitude = stats.max_value - stats.min_value;
-        let num_bits = compute_num_bits(amplitude);
+
+    fn estimate(column: &dyn Column) -> Option<f32> {
+        let num_bits = compute_num_bits(column.max_value());
        let num_bits_uncompressed = 64;
-        num_bits as f32 / num_bits_uncompressed as f32
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
    }
 }

 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::tests::get_codec_test_data_sets;
+    use crate::tests::get_codec_test_datasets;

    fn create_and_validate(data: &[u64], name: &str) {
-        crate::tests::create_and_validate::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>(
-            data, name,
-        );
+        crate::tests::create_and_validate::<BitpackedCodec>(data, name);
    }

    #[test]
    fn test_with_codec_data_sets() {
-        let data_sets = get_codec_test_data_sets();
+        let data_sets = get_codec_test_datasets();
        for (mut data, name) in data_sets {
            create_and_validate(&data, name);
            data.reverse();
--- a/fastfield_codecs/src/blockwise_linear.rs
+++ b/fastfield_codecs/src/blockwise_linear.rs
@@ -0,0 +1,186 @@
+use std::sync::Arc;
+use std::{io, iter};
+
+use common::{BinarySerializable, CountingWriter, DeserializeFrom};
+use ownedbytes::OwnedBytes;
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::line::Line;
+use crate::serialize::NormalizedHeader;
+use crate::{Column, FastFieldCodec, FastFieldCodecType, VecColumn};
+
+const CHUNK_SIZE: usize = 512;
+
+#[derive(Debug, Default)]
+struct Block {
+    line: Line,
+    bit_unpacker: BitUnpacker,
+    data_start_offset: usize,
+}
+
+impl BinarySerializable for Block {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.line.serialize(writer)?;
+        self.bit_unpacker.bit_width().serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let line = Line::deserialize(reader)?;
+        let bit_width = u8::deserialize(reader)?;
+        Ok(Block {
+            line,
+            bit_unpacker: BitUnpacker::new(bit_width),
+            data_start_offset: 0,
+        })
+    }
+}
+
+fn compute_num_blocks(num_vals: u32) -> usize {
+    (num_vals as usize + CHUNK_SIZE - 1) / CHUNK_SIZE
+}
+
+pub struct BlockwiseLinearCodec;
+
+impl FastFieldCodec for BlockwiseLinearCodec {
+    const CODEC_TYPE: crate::FastFieldCodecType = FastFieldCodecType::BlockwiseLinear;
+    type Reader = BlockwiseLinearReader;
+
+    fn open_from_bytes(
+        bytes: ownedbytes::OwnedBytes,
+        normalized_header: NormalizedHeader,
+    ) -> io::Result<Self::Reader> {
+        let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
+        let footer_offset = bytes.len() - 4 - footer_len as usize;
+        let (data, mut footer) = bytes.split(footer_offset);
+        let num_blocks = compute_num_blocks(normalized_header.num_vals);
+        let mut blocks: Vec<Block> = iter::repeat_with(|| Block::deserialize(&mut footer))
+            .take(num_blocks)
+            .collect::<io::Result<_>>()?;
+
+        let mut start_offset = 0;
+        for block in &mut blocks {
+            block.data_start_offset = start_offset;
+            start_offset += (block.bit_unpacker.bit_width() as usize) * CHUNK_SIZE / 8;
+        }
+        Ok(BlockwiseLinearReader {
+            blocks: Arc::new(blocks),
+            data,
+            normalized_header,
+        })
+    }
+
+    // Estimate first_chunk and extrapolate
+    fn estimate(column: &dyn crate::Column) -> Option<f32> {
+        if column.num_vals() < 10 * CHUNK_SIZE as u32 {
+            return None;
+        }
+        let mut first_chunk: Vec<u64> = column.iter().take(CHUNK_SIZE).collect();
+        let line = Line::train(&VecColumn::from(&first_chunk));
+        for (i, buffer_val) in first_chunk.iter_mut().enumerate() {
+            let interpolated_val = line.eval(i as u32);
+            *buffer_val = buffer_val.wrapping_sub(interpolated_val);
+        }
+        let estimated_bit_width = first_chunk
+            .iter()
+            .map(|el| ((el + 1) as f32 * 3.0) as u64)
+            .map(compute_num_bits)
+            .max()
+            .unwrap();
+
+        let metadata_per_block = {
+            let mut out = vec![];
+            Block::default().serialize(&mut out).unwrap();
+            out.len()
+        };
+        let num_bits = estimated_bit_width as u64 * column.num_vals() as u64
+            // function metadata per block
+            + metadata_per_block as u64 * (column.num_vals() as u64 / CHUNK_SIZE as u64);
+        let num_bits_uncompressed = 64 * column.num_vals();
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
+    }
+
+    fn serialize(column: &dyn Column, wrt: &mut impl io::Write) -> io::Result<()> {
+        // The BitpackedReader assumes a normalized vector.
+        assert_eq!(column.min_value(), 0);
+        let mut buffer = Vec::with_capacity(CHUNK_SIZE);
+        let num_vals = column.num_vals();
+
+        let num_blocks = compute_num_blocks(num_vals);
+        let mut blocks = Vec::with_capacity(num_blocks);
+
+        let mut vals = column.iter();
+
+        let mut bit_packer = BitPacker::new();
+
+        for _ in 0..num_blocks {
+            buffer.clear();
+            buffer.extend((&mut vals).take(CHUNK_SIZE));
+            let line = Line::train(&VecColumn::from(&buffer));
+
+            assert!(!buffer.is_empty());
+
+            for (i, buffer_val) in buffer.iter_mut().enumerate() {
+                let interpolated_val = line.eval(i as u32);
+                *buffer_val = buffer_val.wrapping_sub(interpolated_val);
+            }
+            let bit_width = buffer.iter().copied().map(compute_num_bits).max().unwrap();
+
+            for &buffer_val in &buffer {
+                bit_packer.write(buffer_val, bit_width, wrt)?;
+            }
+
+            blocks.push(Block {
+                line,
+                bit_unpacker: BitUnpacker::new(bit_width),
+                data_start_offset: 0,
+            });
+        }
+
+        bit_packer.close(wrt)?;
+
+        assert_eq!(blocks.len(), compute_num_blocks(num_vals));
+
+        let mut counting_wrt = CountingWriter::wrap(wrt);
+        for block in &blocks {
+            block.serialize(&mut counting_wrt)?;
+        }
+        let footer_len = counting_wrt.written_bytes();
+        (footer_len as u32).serialize(&mut counting_wrt)?;
+
+        Ok(())
+    }
+}
+
+#[derive(Clone)]
+pub struct BlockwiseLinearReader {
+    blocks: Arc<Vec<Block>>,
+    normalized_header: NormalizedHeader,
+    data: OwnedBytes,
+}
+
+impl Column for BlockwiseLinearReader {
+    #[inline(always)]
+    fn get_val(&self, idx: u32) -> u64 {
+        let block_id = (idx / CHUNK_SIZE as u32) as usize;
+        let idx_within_block = idx % (CHUNK_SIZE as u32);
+        let block = &self.blocks[block_id];
+        let interpoled_val: u64 = block.line.eval(idx_within_block);
+        let block_bytes = &self.data[block.data_start_offset..];
+        let bitpacked_diff = block.bit_unpacker.get(idx_within_block, block_bytes);
+        interpoled_val.wrapping_add(bitpacked_diff)
+    }
+
+    fn min_value(&self) -> u64 {
+        // The BlockwiseLinearReader assumes a normalized vector.
+        0u64
+    }
+
+    fn max_value(&self) -> u64 {
+        self.normalized_header.max_value
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.normalized_header.num_vals
+    }
+}
--- a/fastfield_codecs/src/column.rs
+++ b/fastfield_codecs/src/column.rs
@@ -0,0 +1,348 @@
+use std::marker::PhantomData;
+use std::ops::{Range, RangeInclusive};
+
+use tantivy_bitpacker::minmax;
+
+use crate::monotonic_mapping::StrictlyMonotonicFn;
+
+/// `Column` provides columnar access on a field.
+pub trait Column<T: PartialOrd = u64>: Send + Sync {
+    /// Return the value associated with the given idx.
+    ///
+    /// This accessor should return as fast as possible.
+    ///
+    /// # Panics
+    ///
+    /// May panic if `idx` is greater than the column length.
+    fn get_val(&self, idx: u32) -> T;
+
+    /// Fills an output buffer with the fast field values
+    /// associated with the `DocId` going from
+    /// `start` to `start + output.len()`.
+    ///
+    /// # Panics
+    ///
+    /// Must panic if `start + output.len()` is greater than
+    /// the segment's `maxdoc`.
+    #[inline]
+    fn get_range(&self, start: u64, output: &mut [T]) {
+        for (out, idx) in output.iter_mut().zip(start..) {
+            *out = self.get_val(idx as u32);
+        }
+    }
+
+    /// Get the positions of values which are in the provided value range.
+    ///
+    /// Note that position == docid for single value fast fields
+    #[inline]
+    fn get_docids_for_value_range(
+        &self,
+        value_range: RangeInclusive<T>,
+        doc_id_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        let doc_id_range = doc_id_range.start..doc_id_range.end.min(self.num_vals());
+
+        for idx in doc_id_range.start..doc_id_range.end {
+            let val = self.get_val(idx);
+            if value_range.contains(&val) {
+                positions.push(idx);
+            }
+        }
+    }
+
+    /// Returns the minimum value for this fast field.
+    ///
+    /// This min_value may not be exact.
+    /// For instance, the min value does not take in account of possible
+    /// deleted document. All values are however guaranteed to be higher than
+    /// `.min_value()`.
+    fn min_value(&self) -> T;
+
+    /// Returns the maximum value for this fast field.
+    ///
+    /// This max_value may not be exact.
+    /// For instance, the max value does not take in account of possible
+    /// deleted document. All values are however guaranteed to be higher than
+    /// `.max_value()`.
+    fn max_value(&self) -> T;
+
+    /// The number of values in the column.
+    fn num_vals(&self) -> u32;
+
+    /// Returns a iterator over the data
+    fn iter<'a>(&'a self) -> Box<dyn Iterator<Item = T> + 'a> {
+        Box::new((0..self.num_vals()).map(|idx| self.get_val(idx)))
+    }
+}
+
+/// VecColumn provides `Column` over a slice.
+pub struct VecColumn<'a, T = u64> {
+    values: &'a [T],
+    min_value: T,
+    max_value: T,
+}
+
+impl<'a, C: Column<T>, T: Copy + PartialOrd> Column<T> for &'a C {
+    fn get_val(&self, idx: u32) -> T {
+        (*self).get_val(idx)
+    }
+
+    fn min_value(&self) -> T {
+        (*self).min_value()
+    }
+
+    fn max_value(&self) -> T {
+        (*self).max_value()
+    }
+
+    fn num_vals(&self) -> u32 {
+        (*self).num_vals()
+    }
+
+    fn iter<'b>(&'b self) -> Box<dyn Iterator<Item = T> + 'b> {
+        (*self).iter()
+    }
+
+    fn get_range(&self, start: u64, output: &mut [T]) {
+        (*self).get_range(start, output)
+    }
+}
+
+impl<'a, T: Copy + PartialOrd + Send + Sync> Column<T> for VecColumn<'a, T> {
+    fn get_val(&self, position: u32) -> T {
+        self.values[position as usize]
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = T> + '_> {
+        Box::new(self.values.iter().copied())
+    }
+
+    fn min_value(&self) -> T {
+        self.min_value
+    }
+
+    fn max_value(&self) -> T {
+        self.max_value
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.values.len() as u32
+    }
+
+    fn get_range(&self, start: u64, output: &mut [T]) {
+        output.copy_from_slice(&self.values[start as usize..][..output.len()])
+    }
+}
+
+impl<'a, T: Copy + Ord + Default, V> From<&'a V> for VecColumn<'a, T>
+where V: AsRef<[T]> + ?Sized
+{
+    fn from(values: &'a V) -> Self {
+        let values = values.as_ref();
+        let (min_value, max_value) = minmax(values.iter().copied()).unwrap_or_default();
+        Self {
+            values,
+            min_value,
+            max_value,
+        }
+    }
+}
+
+struct MonotonicMappingColumn<C, T, Input> {
+    from_column: C,
+    monotonic_mapping: T,
+    _phantom: PhantomData<Input>,
+}
+
+/// Creates a view of a column transformed by a strictly monotonic mapping. See
+/// [`StrictlyMonotonicFn`].
+///
+/// E.g. apply a gcd monotonic_mapping([100, 200, 300]) == [1, 2, 3]
+/// monotonic_mapping.mapping() is expected to be injective, and we should always have
+/// monotonic_mapping.inverse(monotonic_mapping.mapping(el)) == el
+///
+/// The inverse of the mapping is required for:
+/// `fn get_positions_for_value_range(&self, range: RangeInclusive<T>) -> Vec<u64> `
+/// The user provides the original value range and we need to monotonic map them in the same way the
+/// serialization does before calling the underlying column.
+///
+/// Note that when opening a codec, the monotonic_mapping should be the inverse of the mapping
+/// during serialization. And therefore the monotonic_mapping_inv when opening is the same as
+/// monotonic_mapping during serialization.
+pub fn monotonic_map_column<C, T, Input, Output>(
+    from_column: C,
+    monotonic_mapping: T,
+) -> impl Column<Output>
+where
+    C: Column<Input>,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
+    Input: PartialOrd + Send + Sync + Clone,
+    Output: PartialOrd + Send + Sync + Clone,
+{
+    MonotonicMappingColumn {
+        from_column,
+        monotonic_mapping,
+        _phantom: PhantomData,
+    }
+}
+
+impl<C, T, Input, Output> Column<Output> for MonotonicMappingColumn<C, T, Input>
+where
+    C: Column<Input>,
+    T: StrictlyMonotonicFn<Input, Output> + Send + Sync,
+    Input: PartialOrd + Send + Sync + Clone,
+    Output: PartialOrd + Send + Sync + Clone,
+{
+    #[inline]
+    fn get_val(&self, idx: u32) -> Output {
+        let from_val = self.from_column.get_val(idx);
+        self.monotonic_mapping.mapping(from_val)
+    }
+
+    fn min_value(&self) -> Output {
+        let from_min_value = self.from_column.min_value();
+        self.monotonic_mapping.mapping(from_min_value)
+    }
+
+    fn max_value(&self) -> Output {
+        let from_max_value = self.from_column.max_value();
+        self.monotonic_mapping.mapping(from_max_value)
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.from_column.num_vals()
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = Output> + '_> {
+        Box::new(
+            self.from_column
+                .iter()
+                .map(|el| self.monotonic_mapping.mapping(el)),
+        )
+    }
+
+    fn get_docids_for_value_range(
+        &self,
+        range: RangeInclusive<Output>,
+        doc_id_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        self.from_column.get_docids_for_value_range(
+            self.monotonic_mapping.inverse(range.start().clone())
+                ..=self.monotonic_mapping.inverse(range.end().clone()),
+            doc_id_range,
+            positions,
+        )
+    }
+
+    // We voluntarily do not implement get_range as it yields a regression,
+    // and we do not have any specialized implementation anyway.
+}
+
+/// Wraps an iterator into a `Column`.
+pub struct IterColumn<T>(T);
+
+impl<T> From<T> for IterColumn<T>
+where T: Iterator + Clone + ExactSizeIterator
+{
+    fn from(iter: T) -> Self {
+        IterColumn(iter)
+    }
+}
+
+impl<T> Column<T::Item> for IterColumn<T>
+where
+    T: Iterator + Clone + ExactSizeIterator + Send + Sync,
+    T::Item: PartialOrd,
+{
+    fn get_val(&self, idx: u32) -> T::Item {
+        self.0.clone().nth(idx as usize).unwrap()
+    }
+
+    fn min_value(&self) -> T::Item {
+        self.0.clone().next().unwrap()
+    }
+
+    fn max_value(&self) -> T::Item {
+        self.0.clone().last().unwrap()
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.0.len() as u32
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = T::Item> + '_> {
+        Box::new(self.0.clone())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::monotonic_mapping::{
+        StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternalBaseval,
+        StrictlyMonotonicMappingToInternalGCDBaseval,
+    };
+
+    #[test]
+    fn test_monotonic_mapping() {
+        let vals = &[3u64, 5u64][..];
+        let col = VecColumn::from(vals);
+        let mapped = monotonic_map_column(col, StrictlyMonotonicMappingToInternalBaseval::new(2));
+        assert_eq!(mapped.min_value(), 1u64);
+        assert_eq!(mapped.max_value(), 3u64);
+        assert_eq!(mapped.num_vals(), 2);
+        assert_eq!(mapped.num_vals(), 2);
+        assert_eq!(mapped.get_val(0), 1);
+        assert_eq!(mapped.get_val(1), 3);
+    }
+
+    #[test]
+    fn test_range_as_col() {
+        let col = IterColumn::from(10..100);
+        assert_eq!(col.num_vals(), 90);
+        assert_eq!(col.max_value(), 99);
+    }
+
+    #[test]
+    fn test_monotonic_mapping_iter() {
+        let vals: Vec<u64> = (10..110u64).map(|el| el * 10).collect();
+        let col = VecColumn::from(&vals);
+        let mapped = monotonic_map_column(
+            col,
+            StrictlyMonotonicMappingInverter::from(
+                StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 100),
+            ),
+        );
+        let val_i64s: Vec<u64> = mapped.iter().collect();
+        for i in 0..100 {
+            assert_eq!(val_i64s[i as usize], mapped.get_val(i));
+        }
+    }
+
+    #[test]
+    fn test_monotonic_mapping_get_range() {
+        let vals: Vec<u64> = (0..100u64).map(|el| el * 10).collect();
+        let col = VecColumn::from(&vals);
+        let mapped = monotonic_map_column(
+            col,
+            StrictlyMonotonicMappingInverter::from(
+                StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 0),
+            ),
+        );
+
+        assert_eq!(mapped.min_value(), 0u64);
+        assert_eq!(mapped.max_value(), 9900u64);
+        assert_eq!(mapped.num_vals(), 100);
+        let val_u64s: Vec<u64> = mapped.iter().collect();
+        assert_eq!(val_u64s.len(), 100);
+        for i in 0..100 {
+            assert_eq!(val_u64s[i as usize], mapped.get_val(i));
+            assert_eq!(val_u64s[i as usize], vals[i as usize] * 10);
+        }
+        let mut buf = [0u64; 20];
+        mapped.get_range(7, &mut buf[..]);
+        assert_eq!(&val_u64s[7..][..20], &buf);
+    }
+}
--- a/fastfield_codecs/src/compact_space/blank_range.rs
+++ b/fastfield_codecs/src/compact_space/blank_range.rs
@@ -0,0 +1,43 @@
+use std::ops::RangeInclusive;
+
+/// The range of a blank in value space.
+///
+/// A blank is an unoccupied space in the data.
+/// Use try_into() to construct.
+/// A range has to have at least length of 3. Invalid ranges will be rejected.
+///
+/// Ordered by range length.
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub(crate) struct BlankRange {
+    blank_range: RangeInclusive<u128>,
+}
+impl TryFrom<RangeInclusive<u128>> for BlankRange {
+    type Error = &'static str;
+    fn try_from(range: RangeInclusive<u128>) -> Result<Self, Self::Error> {
+        let blank_size = range.end().saturating_sub(*range.start());
+        if blank_size < 2 {
+            Err("invalid range")
+        } else {
+            Ok(BlankRange { blank_range: range })
+        }
+    }
+}
+impl BlankRange {
+    pub(crate) fn blank_size(&self) -> u128 {
+        self.blank_range.end() - self.blank_range.start() + 1
+    }
+    pub(crate) fn blank_range(&self) -> RangeInclusive<u128> {
+        self.blank_range.clone()
+    }
+}
+
+impl Ord for BlankRange {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.blank_size().cmp(&other.blank_size())
+    }
+}
+impl PartialOrd for BlankRange {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.blank_size().cmp(&other.blank_size()))
+    }
+}
--- a/fastfield_codecs/src/compact_space/build_compact_space.rs
+++ b/fastfield_codecs/src/compact_space/build_compact_space.rs
@@ -0,0 +1,231 @@
+use std::collections::{BTreeSet, BinaryHeap};
+use std::iter;
+use std::ops::RangeInclusive;
+
+use itertools::Itertools;
+
+use super::blank_range::BlankRange;
+use super::{CompactSpace, RangeMapping};
+
+/// Put the blanks for the sorted values into a binary heap
+fn get_blanks(values_sorted: &BTreeSet<u128>) -> BinaryHeap<BlankRange> {
+    let mut blanks: BinaryHeap<BlankRange> = BinaryHeap::new();
+    for (first, second) in values_sorted.iter().tuple_windows() {
+        // Correctness Overflow: the values are deduped and sorted (BTreeSet property), that means
+        // there's always space between two values.
+        let blank_range = first + 1..=second - 1;
+        let blank_range: Result<BlankRange, _> = blank_range.try_into();
+        if let Ok(blank_range) = blank_range {
+            blanks.push(blank_range);
+        }
+    }
+
+    blanks
+}
+
+struct BlankCollector {
+    blanks: Vec<BlankRange>,
+    staged_blanks_sum: u128,
+}
+impl BlankCollector {
+    fn new() -> Self {
+        Self {
+            blanks: vec![],
+            staged_blanks_sum: 0,
+        }
+    }
+    fn stage_blank(&mut self, blank: BlankRange) {
+        self.staged_blanks_sum += blank.blank_size();
+        self.blanks.push(blank);
+    }
+    fn drain(&mut self) -> impl Iterator<Item = BlankRange> + '_ {
+        self.staged_blanks_sum = 0;
+        self.blanks.drain(..)
+    }
+    fn staged_blanks_sum(&self) -> u128 {
+        self.staged_blanks_sum
+    }
+    fn num_staged_blanks(&self) -> usize {
+        self.blanks.len()
+    }
+}
+fn num_bits(val: u128) -> u8 {
+    (128u32 - val.leading_zeros()) as u8
+}
+
+/// Will collect blanks and add them to compact space if more bits are saved than cost from
+/// metadata.
+pub fn get_compact_space(
+    values_deduped_sorted: &BTreeSet<u128>,
+    total_num_values: u32,
+    cost_per_blank: usize,
+) -> CompactSpace {
+    let mut compact_space_builder = CompactSpaceBuilder::new();
+    if values_deduped_sorted.is_empty() {
+        return compact_space_builder.finish();
+    }
+
+    let mut blanks: BinaryHeap<BlankRange> = get_blanks(values_deduped_sorted);
+    // Replace after stabilization of https://github.com/rust-lang/rust/issues/62924
+
+    // We start by space that's limited to min_value..=max_value
+    let min_value = *values_deduped_sorted.iter().next().unwrap_or(&0);
+    let max_value = *values_deduped_sorted.iter().last().unwrap_or(&0);
+
+    // +1 for null, in case min and max covers the whole space, we are off by one.
+    let mut amplitude_compact_space = (max_value - min_value).saturating_add(1);
+    if min_value != 0 {
+        compact_space_builder.add_blanks(iter::once(0..=min_value - 1));
+    }
+    if max_value != u128::MAX {
+        compact_space_builder.add_blanks(iter::once(max_value + 1..=u128::MAX));
+    }
+
+    let mut amplitude_bits: u8 = num_bits(amplitude_compact_space);
+
+    let mut blank_collector = BlankCollector::new();
+    // We will stage blanks until they reduce the compact space by at least 1 bit and then flush
+    // them if the metadata cost is lower than the total number of saved bits.
+    // Binary heap to process the gaps by their size
+    while let Some(blank_range) = blanks.pop() {
+        blank_collector.stage_blank(blank_range);
+
+        let staged_spaces_sum: u128 = blank_collector.staged_blanks_sum();
+        let amplitude_new_compact_space = amplitude_compact_space - staged_spaces_sum;
+        let amplitude_new_bits = num_bits(amplitude_new_compact_space);
+        if amplitude_bits == amplitude_new_bits {
+            continue;
+        }
+        let saved_bits = (amplitude_bits - amplitude_new_bits) as usize * total_num_values as usize;
+        // TODO: Maybe calculate exact cost of blanks and run this more expensive computation only,
+        // when amplitude_new_bits changes
+        let cost = blank_collector.num_staged_blanks() * cost_per_blank;
+        if cost >= saved_bits {
+            // Continue here, since although we walk over the blanks by size,
+            // we can potentially save a lot at the last bits, which are smaller blanks
+            //
+            // E.g. if the first range reduces the compact space by 1000 from 2000 to 1000, which
+            // saves 11-10=1 bit and the next range reduces the compact space by 950 to
+            // 50, which saves 10-6=4 bit
+            continue;
+        }
+
+        amplitude_compact_space = amplitude_new_compact_space;
+        amplitude_bits = amplitude_new_bits;
+        compact_space_builder.add_blanks(blank_collector.drain().map(|blank| blank.blank_range()));
+    }
+
+    // special case, when we don't collected any blanks because:
+    // * the data is empty (early exit)
+    // * the algorithm did decide it's not worth the cost, which can be the case for single values
+    //
+    // We drain one collected blank unconditionally, so the empty case is reserved for empty
+    // data, and therefore empty compact_space means the data is empty and no data is covered
+    // (conversely to all data) and we can assign null to it.
+    if compact_space_builder.is_empty() {
+        compact_space_builder.add_blanks(
+            blank_collector
+                .drain()
+                .map(|blank| blank.blank_range())
+                .take(1),
+        );
+    }
+
+    let compact_space = compact_space_builder.finish();
+    if max_value - min_value != u128::MAX {
+        debug_assert_eq!(
+            compact_space.amplitude_compact_space(),
+            amplitude_compact_space
+        );
+    }
+    compact_space
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+struct CompactSpaceBuilder {
+    blanks: Vec<RangeInclusive<u128>>,
+}
+
+impl CompactSpaceBuilder {
+    /// Creates a new compact space builder which will initially cover the whole space.
+    fn new() -> Self {
+        Self { blanks: Vec::new() }
+    }
+
+    /// Assumes that repeated add_blank calls don't overlap and are not adjacent,
+    /// e.g. [3..=5, 5..=10] is not allowed
+    ///
+    /// Both of those assumptions are true when blanks are produced from sorted values.
+    fn add_blanks(&mut self, blank: impl Iterator<Item = RangeInclusive<u128>>) {
+        self.blanks.extend(blank);
+    }
+
+    fn is_empty(&self) -> bool {
+        self.blanks.is_empty()
+    }
+
+    /// Convert blanks to covered space and assign null value
+    fn finish(mut self) -> CompactSpace {
+        // sort by start. ranges are not allowed to overlap
+        self.blanks.sort_unstable_by_key(|blank| *blank.start());
+
+        let mut covered_space = Vec::with_capacity(self.blanks.len());
+
+        // begining of the blanks
+        if let Some(first_blank_start) = self.blanks.first().map(RangeInclusive::start) {
+            if *first_blank_start != 0 {
+                covered_space.push(0..=first_blank_start - 1);
+            }
+        }
+
+        // Between the blanks
+        let between_blanks = self.blanks.iter().tuple_windows().map(|(left, right)| {
+            assert!(
+                left.end() < right.start(),
+                "overlapping or adjacent ranges detected"
+            );
+            *left.end() + 1..=*right.start() - 1
+        });
+        covered_space.extend(between_blanks);
+
+        // end of the blanks
+        if let Some(last_blank_end) = self.blanks.last().map(RangeInclusive::end) {
+            if *last_blank_end != u128::MAX {
+                covered_space.push(last_blank_end + 1..=u128::MAX);
+            }
+        }
+
+        if covered_space.is_empty() {
+            covered_space.push(0..=0); // empty data case
+        };
+
+        let mut compact_start: u64 = 1; // 0 is reserved for `null`
+        let mut ranges_mapping: Vec<RangeMapping> = Vec::with_capacity(covered_space.len());
+        for cov in covered_space {
+            let range_mapping = super::RangeMapping {
+                value_range: cov,
+                compact_start,
+            };
+            let covered_range_len = range_mapping.range_length();
+            ranges_mapping.push(range_mapping);
+            compact_start += covered_range_len;
+        }
+        // println!("num ranges {}", ranges_mapping.len());
+        CompactSpace { ranges_mapping }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_binary_heap_pop_order() {
+        let mut blanks: BinaryHeap<BlankRange> = BinaryHeap::new();
+        blanks.push((0..=10).try_into().unwrap());
+        blanks.push((100..=200).try_into().unwrap());
+        blanks.push((100..=110).try_into().unwrap());
+        assert_eq!(blanks.pop().unwrap().blank_size(), 101);
+        assert_eq!(blanks.pop().unwrap().blank_size(), 11);
+    }
+}
--- a/fastfield_codecs/src/compact_space/mod.rs
+++ b/fastfield_codecs/src/compact_space/mod.rs
@@ -0,0 +1,814 @@
+/// This codec takes a large number space (u128) and reduces it to a compact number space.
+///
+/// It will find spaces in the number range. For example:
+///
+/// 100, 101, 102, 103, 104, 50000, 50001
+/// could be mapped to
+/// 100..104 -> 0..4
+/// 50000..50001 -> 5..6
+///
+/// Compact space 0..=6 requires much less bits than 100..=50001
+///
+/// The codec is created to compress ip addresses, but may be employed in other use cases.
+use std::{
+    cmp::Ordering,
+    collections::BTreeSet,
+    io::{self, Write},
+    ops::{Range, RangeInclusive},
+};
+
+use common::{BinarySerializable, CountingWriter, VInt, VIntU128};
+use ownedbytes::OwnedBytes;
+use tantivy_bitpacker::{self, BitPacker, BitUnpacker};
+
+use crate::compact_space::build_compact_space::get_compact_space;
+use crate::Column;
+
+mod blank_range;
+mod build_compact_space;
+
+/// The cost per blank is quite hard actually, since blanks are delta encoded, the actual cost of
+/// blanks depends on the number of blanks.
+///
+/// The number is taken by looking at a real dataset. It is optimized for larger datasets.
+const COST_PER_BLANK_IN_BITS: usize = 36;
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct CompactSpace {
+    ranges_mapping: Vec<RangeMapping>,
+}
+
+/// Maps the range from the original space to compact_start + range.len()
+#[derive(Debug, Clone, Eq, PartialEq)]
+struct RangeMapping {
+    value_range: RangeInclusive<u128>,
+    compact_start: u64,
+}
+impl RangeMapping {
+    fn range_length(&self) -> u64 {
+        (self.value_range.end() - self.value_range.start()) as u64 + 1
+    }
+
+    // The last value of the compact space in this range
+    fn compact_end(&self) -> u64 {
+        self.compact_start + self.range_length() - 1
+    }
+}
+
+impl BinarySerializable for CompactSpace {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        VInt(self.ranges_mapping.len() as u64).serialize(writer)?;
+
+        let mut prev_value = 0;
+        for value_range in self
+            .ranges_mapping
+            .iter()
+            .map(|range_mapping| &range_mapping.value_range)
+        {
+            let blank_delta_start = value_range.start() - prev_value;
+            VIntU128(blank_delta_start).serialize(writer)?;
+            prev_value = *value_range.start();
+
+            let blank_delta_end = value_range.end() - prev_value;
+            VIntU128(blank_delta_end).serialize(writer)?;
+            prev_value = *value_range.end();
+        }
+
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let num_ranges = VInt::deserialize(reader)?.0;
+        let mut ranges_mapping: Vec<RangeMapping> = vec![];
+        let mut value = 0u128;
+        let mut compact_start = 1u64; // 0 is reserved for `null`
+        for _ in 0..num_ranges {
+            let blank_delta_start = VIntU128::deserialize(reader)?.0;
+            value += blank_delta_start;
+            let blank_start = value;
+
+            let blank_delta_end = VIntU128::deserialize(reader)?.0;
+            value += blank_delta_end;
+            let blank_end = value;
+
+            let range_mapping = RangeMapping {
+                value_range: blank_start..=blank_end,
+                compact_start,
+            };
+            let range_length = range_mapping.range_length();
+            ranges_mapping.push(range_mapping);
+            compact_start += range_length;
+        }
+
+        Ok(Self { ranges_mapping })
+    }
+}
+
+impl CompactSpace {
+    /// Amplitude is the value range of the compact space including the sentinel value used to
+    /// identify null values. The compact space is 0..=amplitude .
+    ///
+    /// It's only used to verify we don't exceed u64 number space, which would indicate a bug.
+    fn amplitude_compact_space(&self) -> u128 {
+        self.ranges_mapping
+            .last()
+            .map(|last_range| last_range.compact_end() as u128)
+            .unwrap_or(1) // compact space starts at 1, 0 == null
+    }
+
+    fn get_range_mapping(&self, pos: usize) -> &RangeMapping {
+        &self.ranges_mapping[pos]
+    }
+
+    /// Returns either Ok(the value in the compact space) or if it is outside the compact space the
+    /// Err(position where it would be inserted)
+    fn u128_to_compact(&self, value: u128) -> Result<u64, usize> {
+        self.ranges_mapping
+            .binary_search_by(|probe| {
+                let value_range = &probe.value_range;
+                if value < *value_range.start() {
+                    Ordering::Greater
+                } else if value > *value_range.end() {
+                    Ordering::Less
+                } else {
+                    Ordering::Equal
+                }
+            })
+            .map(|pos| {
+                let range_mapping = &self.ranges_mapping[pos];
+                let pos_in_range = (value - range_mapping.value_range.start()) as u64;
+                range_mapping.compact_start + pos_in_range
+            })
+    }
+
+    /// Unpacks a value from compact space u64 to u128 space
+    fn compact_to_u128(&self, compact: u64) -> u128 {
+        let pos = self
+            .ranges_mapping
+            .binary_search_by_key(&compact, |range_mapping| range_mapping.compact_start)
+            // Correctness: Overflow. The first range starts at compact space 0, the error from
+            // binary search can never be 0
+            .map_or_else(|e| e - 1, |v| v);
+
+        let range_mapping = &self.ranges_mapping[pos];
+        let diff = compact - range_mapping.compact_start;
+        range_mapping.value_range.start() + diff as u128
+    }
+}
+
+pub struct CompactSpaceCompressor {
+    params: IPCodecParams,
+}
+#[derive(Debug, Clone)]
+pub struct IPCodecParams {
+    compact_space: CompactSpace,
+    bit_unpacker: BitUnpacker,
+    min_value: u128,
+    max_value: u128,
+    num_vals: u32,
+    num_bits: u8,
+}
+
+impl CompactSpaceCompressor {
+    /// Taking the vals as Vec may cost a lot of memory. It is used to sort the vals.
+    pub fn train_from(iter: impl Iterator<Item = u128>, num_vals: u32) -> Self {
+        let mut values_sorted = BTreeSet::new();
+        values_sorted.extend(iter);
+        let total_num_values = num_vals;
+
+        let compact_space =
+            get_compact_space(&values_sorted, total_num_values, COST_PER_BLANK_IN_BITS);
+        let amplitude_compact_space = compact_space.amplitude_compact_space();
+
+        assert!(
+            amplitude_compact_space <= u64::MAX as u128,
+            "case unsupported."
+        );
+
+        let num_bits = tantivy_bitpacker::compute_num_bits(amplitude_compact_space as u64);
+        let min_value = *values_sorted.iter().next().unwrap_or(&0);
+        let max_value = *values_sorted.iter().last().unwrap_or(&0);
+        assert_eq!(
+            compact_space
+                .u128_to_compact(max_value)
+                .expect("could not convert max value to compact space"),
+            amplitude_compact_space as u64
+        );
+        CompactSpaceCompressor {
+            params: IPCodecParams {
+                compact_space,
+                bit_unpacker: BitUnpacker::new(num_bits),
+                min_value,
+                max_value,
+                num_vals: total_num_values,
+                num_bits,
+            },
+        }
+    }
+
+    fn write_footer(self, writer: &mut impl Write) -> io::Result<()> {
+        let writer = &mut CountingWriter::wrap(writer);
+        self.params.serialize(writer)?;
+
+        let footer_len = writer.written_bytes() as u32;
+        footer_len.serialize(writer)?;
+
+        Ok(())
+    }
+
+    pub fn compress_into(
+        self,
+        vals: impl Iterator<Item = u128>,
+        write: &mut impl Write,
+    ) -> io::Result<()> {
+        let mut bitpacker = BitPacker::default();
+        for val in vals {
+            let compact = self
+                .params
+                .compact_space
+                .u128_to_compact(val)
+                .map_err(|_| {
+                    io::Error::new(
+                        io::ErrorKind::InvalidData,
+                        "Could not convert value to compact_space. This is a bug.",
+                    )
+                })?;
+            bitpacker.write(compact, self.params.num_bits, write)?;
+        }
+        bitpacker.close(write)?;
+        self.write_footer(write)?;
+        Ok(())
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct CompactSpaceDecompressor {
+    data: OwnedBytes,
+    params: IPCodecParams,
+}
+
+impl BinarySerializable for IPCodecParams {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        // header flags for future optional dictionary encoding
+        let footer_flags = 0u64;
+        footer_flags.serialize(writer)?;
+
+        VIntU128(self.min_value).serialize(writer)?;
+        VIntU128(self.max_value).serialize(writer)?;
+        VIntU128(self.num_vals as u128).serialize(writer)?;
+        self.num_bits.serialize(writer)?;
+
+        self.compact_space.serialize(writer)?;
+
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let _header_flags = u64::deserialize(reader)?;
+        let min_value = VIntU128::deserialize(reader)?.0;
+        let max_value = VIntU128::deserialize(reader)?.0;
+        let num_vals = VIntU128::deserialize(reader)?.0 as u32;
+        let num_bits = u8::deserialize(reader)?;
+        let compact_space = CompactSpace::deserialize(reader)?;
+
+        Ok(Self {
+            compact_space,
+            bit_unpacker: BitUnpacker::new(num_bits),
+            min_value,
+            max_value,
+            num_vals,
+            num_bits,
+        })
+    }
+}
+
+impl Column<u128> for CompactSpaceDecompressor {
+    #[inline]
+    fn get_val(&self, doc: u32) -> u128 {
+        self.get(doc)
+    }
+
+    fn min_value(&self) -> u128 {
+        self.min_value()
+    }
+
+    fn max_value(&self) -> u128 {
+        self.max_value()
+    }
+
+    fn num_vals(&self) -> u32 {
+        self.params.num_vals
+    }
+
+    #[inline]
+    fn iter(&self) -> Box<dyn Iterator<Item = u128> + '_> {
+        Box::new(self.iter())
+    }
+
+    #[inline]
+    fn get_docids_for_value_range(
+        &self,
+        value_range: RangeInclusive<u128>,
+        positions_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        self.get_positions_for_value_range(value_range, positions_range, positions)
+    }
+}
+
+impl CompactSpaceDecompressor {
+    pub fn open(data: OwnedBytes) -> io::Result<CompactSpaceDecompressor> {
+        let (data_slice, footer_len_bytes) = data.split_at(data.len() - 4);
+        let footer_len = u32::deserialize(&mut &footer_len_bytes[..])?;
+
+        let data_footer = &data_slice[data_slice.len() - footer_len as usize..];
+        let params = IPCodecParams::deserialize(&mut &data_footer[..])?;
+        let decompressor = CompactSpaceDecompressor { data, params };
+
+        Ok(decompressor)
+    }
+
+    /// Converting to compact space for the decompressor is more complex, since we may get values
+    /// which are outside the compact space. e.g. if we map
+    /// 1000 => 5
+    /// 2000 => 6
+    ///
+    /// and we want a mapping for 1005, there is no equivalent compact space. We instead return an
+    /// error with the index of the next range.
+    fn u128_to_compact(&self, value: u128) -> Result<u64, usize> {
+        self.params.compact_space.u128_to_compact(value)
+    }
+
+    fn compact_to_u128(&self, compact: u64) -> u128 {
+        self.params.compact_space.compact_to_u128(compact)
+    }
+
+    /// Comparing on compact space: Random dataset 0,24 (50% random hit) - 1.05 GElements/s
+    /// Comparing on compact space: Real dataset 1.08 GElements/s
+    ///
+    /// Comparing on original space: Real dataset .06 GElements/s (not completely optimized)
+    #[inline]
+    pub fn get_positions_for_value_range(
+        &self,
+        value_range: RangeInclusive<u128>,
+        position_range: Range<u32>,
+        positions: &mut Vec<u32>,
+    ) {
+        if value_range.start() > value_range.end() {
+            return;
+        }
+        let position_range = position_range.start..position_range.end.min(self.num_vals());
+        let from_value = *value_range.start();
+        let to_value = *value_range.end();
+        assert!(to_value >= from_value);
+        let compact_from = self.u128_to_compact(from_value);
+        let compact_to = self.u128_to_compact(to_value);
+
+        // Quick return, if both ranges fall into the same non-mapped space, the range can't cover
+        // any values, so we can early exit
+        match (compact_to, compact_from) {
+            (Err(pos1), Err(pos2)) if pos1 == pos2 => return,
+            _ => {}
+        }
+
+        let compact_from = compact_from.unwrap_or_else(|pos| {
+            // Correctness: Out of bounds, if this value is Err(last_index + 1), we early exit,
+            // since the to_value also mapps into the same non-mapped space
+            let range_mapping = self.params.compact_space.get_range_mapping(pos);
+            range_mapping.compact_start
+        });
+        // If there is no compact space, we go to the closest upperbound compact space
+        let compact_to = compact_to.unwrap_or_else(|pos| {
+            // Correctness: Overflow, if this value is Err(0), we early exit,
+            // since the from_value also mapps into the same non-mapped space
+
+            // Get end of previous range
+            let pos = pos - 1;
+            let range_mapping = self.params.compact_space.get_range_mapping(pos);
+            range_mapping.compact_end()
+        });
+
+        let range = compact_from..=compact_to;
+
+        let scan_num_docs = position_range.end - position_range.start;
+
+        let step_size = 4;
+        let cutoff = position_range.start + scan_num_docs - scan_num_docs % step_size;
+
+        let mut push_if_in_range = |idx, val| {
+            if range.contains(&val) {
+                positions.push(idx);
+            }
+        };
+        let get_val = |idx| self.params.bit_unpacker.get(idx, &self.data);
+        // unrolled loop
+        for idx in (position_range.start..cutoff).step_by(step_size as usize) {
+            let idx1 = idx;
+            let idx2 = idx + 1;
+            let idx3 = idx + 2;
+            let idx4 = idx + 3;
+            let val1 = get_val(idx1);
+            let val2 = get_val(idx2);
+            let val3 = get_val(idx3);
+            let val4 = get_val(idx4);
+            push_if_in_range(idx1, val1);
+            push_if_in_range(idx2, val2);
+            push_if_in_range(idx3, val3);
+            push_if_in_range(idx4, val4);
+        }
+
+        // handle rest
+        for idx in cutoff..position_range.end {
+            push_if_in_range(idx, get_val(idx));
+        }
+    }
+
+    #[inline]
+    fn iter_compact(&self) -> impl Iterator<Item = u64> + '_ {
+        (0..self.params.num_vals).map(move |idx| self.params.bit_unpacker.get(idx, &self.data))
+    }
+
+    #[inline]
+    fn iter(&self) -> impl Iterator<Item = u128> + '_ {
+        // TODO: Performance. It would be better to iterate on the ranges and check existence via
+        // the bit_unpacker.
+        self.iter_compact()
+            .map(|compact| self.compact_to_u128(compact))
+    }
+
+    #[inline]
+    pub fn get(&self, idx: u32) -> u128 {
+        let compact = self.params.bit_unpacker.get(idx, &self.data);
+        self.compact_to_u128(compact)
+    }
+
+    pub fn min_value(&self) -> u128 {
+        self.params.min_value
+    }
+
+    pub fn max_value(&self) -> u128 {
+        self.params.max_value
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+    use crate::format_version::read_format_version;
+    use crate::null_index_footer::read_null_index_footer;
+    use crate::serialize::U128Header;
+    use crate::{open_u128, serialize_u128};
+
+    #[test]
+    fn compact_space_test() {
+        let ips = &[
+            2u128, 4u128, 1000, 1001, 1002, 1003, 1004, 1005, 1008, 1010, 1012, 1260,
+        ]
+        .into_iter()
+        .collect();
+        let compact_space = get_compact_space(ips, ips.len() as u32, 11);
+        let amplitude = compact_space.amplitude_compact_space();
+        assert_eq!(amplitude, 17);
+        assert_eq!(1, compact_space.u128_to_compact(2).unwrap());
+        assert_eq!(2, compact_space.u128_to_compact(3).unwrap());
+        assert_eq!(compact_space.u128_to_compact(100).unwrap_err(), 1);
+
+        for (num1, num2) in (0..3).tuple_windows() {
+            assert_eq!(
+                compact_space.get_range_mapping(num1).compact_end() + 1,
+                compact_space.get_range_mapping(num2).compact_start
+            );
+        }
+
+        let mut output: Vec<u8> = Vec::new();
+        compact_space.serialize(&mut output).unwrap();
+
+        assert_eq!(
+            compact_space,
+            CompactSpace::deserialize(&mut &output[..]).unwrap()
+        );
+
+        for ip in ips {
+            let compact = compact_space.u128_to_compact(*ip).unwrap();
+            assert_eq!(compact_space.compact_to_u128(compact), *ip);
+        }
+    }
+
+    #[test]
+    fn compact_space_amplitude_test() {
+        let ips = &[100000u128, 1000000].into_iter().collect();
+        let compact_space = get_compact_space(ips, ips.len() as u32, 1);
+        let amplitude = compact_space.amplitude_compact_space();
+        assert_eq!(amplitude, 2);
+    }
+
+    fn test_all(mut data: OwnedBytes, expected: &[u128]) {
+        let _header = U128Header::deserialize(&mut data);
+        let decompressor = CompactSpaceDecompressor::open(data).unwrap();
+        for (idx, expected_val) in expected.iter().cloned().enumerate() {
+            let val = decompressor.get(idx as u32);
+            assert_eq!(val, expected_val);
+
+            let test_range = |range: RangeInclusive<u128>| {
+                let expected_positions = expected
+                    .iter()
+                    .positions(|val| range.contains(val))
+                    .map(|pos| pos as u32)
+                    .collect::<Vec<_>>();
+                let mut positions = Vec::new();
+                decompressor.get_positions_for_value_range(
+                    range,
+                    0..decompressor.num_vals(),
+                    &mut positions,
+                );
+                assert_eq!(positions, expected_positions);
+            };
+
+            test_range(expected_val.saturating_sub(1)..=expected_val);
+            test_range(expected_val..=expected_val);
+            test_range(expected_val..=expected_val.saturating_add(1));
+            test_range(expected_val.saturating_sub(1)..=expected_val.saturating_add(1));
+        }
+    }
+
+    fn test_aux_vals(u128_vals: &[u128]) -> OwnedBytes {
+        let mut out = Vec::new();
+        serialize_u128(
+            || u128_vals.iter().cloned(),
+            u128_vals.len() as u32,
+            &mut out,
+        )
+        .unwrap();
+
+        let data = OwnedBytes::new(out);
+        let (data, _format_version) = read_format_version(data).unwrap();
+        let (data, _null_index_footer) = read_null_index_footer(data).unwrap();
+        test_all(data.clone(), u128_vals);
+
+        data
+    }
+
+    #[test]
+    fn test_range_1() {
+        let vals = &[
+            1u128,
+            100u128,
+            3u128,
+            99999u128,
+            100000u128,
+            100001u128,
+            4_000_211_221u128,
+            4_000_211_222u128,
+            333u128,
+        ];
+        let mut data = test_aux_vals(vals);
+
+        let _header = U128Header::deserialize(&mut data);
+        let decomp = CompactSpaceDecompressor::open(data).unwrap();
+        let complete_range = 0..vals.len() as u32;
+        for (pos, val) in vals.iter().enumerate() {
+            let val = *val;
+            let pos = pos as u32;
+            let mut positions = Vec::new();
+            decomp.get_positions_for_value_range(val..=val, pos..pos + 1, &mut positions);
+            assert_eq!(positions, vec![pos]);
+        }
+
+        // handle docid range out of bounds
+        let positions = get_positions_for_value_range_helper(&decomp, 0..=1, 1..u32::MAX);
+        assert_eq!(positions, vec![]);
+
+        let positions =
+            get_positions_for_value_range_helper(&decomp, 0..=1, complete_range.clone());
+        assert_eq!(positions, vec![0]);
+        let positions =
+            get_positions_for_value_range_helper(&decomp, 0..=2, complete_range.clone());
+        assert_eq!(positions, vec![0]);
+        let positions =
+            get_positions_for_value_range_helper(&decomp, 0..=3, complete_range.clone());
+        assert_eq!(positions, vec![0, 2]);
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                99999u128..=99999u128,
+                complete_range.clone()
+            ),
+            vec![3]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                99999u128..=100000u128,
+                complete_range.clone()
+            ),
+            vec![3, 4]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                99998u128..=100000u128,
+                complete_range.clone()
+            ),
+            vec![3, 4]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                99998u128..=99999u128,
+                complete_range.clone()
+            ),
+            vec![3]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                99998u128..=99998u128,
+                complete_range.clone()
+            ),
+            vec![]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                333u128..=333u128,
+                complete_range.clone()
+            ),
+            vec![8]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                332u128..=333u128,
+                complete_range.clone()
+            ),
+            vec![8]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                332u128..=334u128,
+                complete_range.clone()
+            ),
+            vec![8]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                333u128..=334u128,
+                complete_range.clone()
+            ),
+            vec![8]
+        );
+
+        assert_eq!(
+            get_positions_for_value_range_helper(
+                &decomp,
+                4_000_211_221u128..=5_000_000_000u128,
+                complete_range
+            ),
+            vec![6, 7]
+        );
+    }
+
+    #[test]
+    fn test_empty() {
+        let vals = &[];
+        let data = test_aux_vals(vals);
+        let _decomp = CompactSpaceDecompressor::open(data).unwrap();
+    }
+
+    #[test]
+    fn test_range_2() {
+        let vals = &[
+            100u128,
+            99999u128,
+            100000u128,
+            100001u128,
+            4_000_211_221u128,
+            4_000_211_222u128,
+            333u128,
+        ];
+        let mut data = test_aux_vals(vals);
+        let _header = U128Header::deserialize(&mut data);
+        let decomp = CompactSpaceDecompressor::open(data).unwrap();
+        let complete_range = 0..vals.len() as u32;
+        assert_eq!(
+            get_positions_for_value_range_helper(&decomp, 0..=5, complete_range.clone()),
+            vec![]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(&decomp, 0..=100, complete_range.clone()),
+            vec![0]
+        );
+        assert_eq!(
+            get_positions_for_value_range_helper(&decomp, 0..=105, complete_range),
+            vec![0]
+        );
+    }
+
+    fn get_positions_for_value_range_helper<C: Column<T> + ?Sized, T: PartialOrd>(
+        column: &C,
+        value_range: RangeInclusive<T>,
+        doc_id_range: Range<u32>,
+    ) -> Vec<u32> {
+        let mut positions = Vec::new();
+        column.get_docids_for_value_range(value_range, doc_id_range, &mut positions);
+        positions
+    }
+
+    #[test]
+    fn test_range_3() {
+        let vals = &[
+            200u128,
+            201,
+            202,
+            203,
+            204,
+            204,
+            206,
+            207,
+            208,
+            209,
+            210,
+            1_000_000,
+            5_000_000_000,
+        ];
+        let mut out = Vec::new();
+        serialize_u128(|| vals.iter().cloned(), vals.len() as u32, &mut out).unwrap();
+        let decomp = open_u128::<u128>(OwnedBytes::new(out)).unwrap();
+        let complete_range = 0..vals.len() as u32;
+
+        assert_eq!(
+            get_positions_for_value_range_helper(&*decomp, 199..=200, complete_range.clone()),
+            vec![0]
+        );
+
+        assert_eq!(
+            get_positions_for_value_range_helper(&*decomp, 199..=201, complete_range.clone()),
+            vec![0, 1]
+        );
+
+        assert_eq!(
+            get_positions_for_value_range_helper(&*decomp, 200..=200, complete_range.clone()),
+            vec![0]
+        );
+
+        assert_eq!(
+            get_positions_for_value_range_helper(&*decomp, 1_000_000..=1_000_000, complete_range),
+            vec![11]
+        );
+    }
+
+    #[test]
+    fn test_bug1() {
+        let vals = &[9223372036854775806];
+        let _data = test_aux_vals(vals);
+    }
+
+    #[test]
+    fn test_bug2() {
+        let vals = &[340282366920938463463374607431768211455u128];
+        let _data = test_aux_vals(vals);
+    }
+
+    #[test]
+    fn test_bug3() {
+        let vals = &[340282366920938463463374607431768211454];
+        let _data = test_aux_vals(vals);
+    }
+
+    #[test]
+    fn test_bug4() {
+        let vals = &[340282366920938463463374607431768211455, 0];
+        let _data = test_aux_vals(vals);
+    }
+
+    #[test]
+    fn test_first_large_gaps() {
+        let vals = &[1_000_000_000u128; 100];
+        let _data = test_aux_vals(vals);
+    }
+    use itertools::Itertools;
+    use proptest::prelude::*;
+
+    fn num_strategy() -> impl Strategy<Value = u128> {
+        prop_oneof![
+            1 => prop::num::u128::ANY.prop_map(|num| u128::MAX - (num % 10) ),
+            1 => prop::num::u128::ANY.prop_map(|num| i64::MAX as u128 + 5 - (num % 10) ),
+            1 => prop::num::u128::ANY.prop_map(|num| i128::MAX as u128 + 5 - (num % 10) ),
+            1 => prop::num::u128::ANY.prop_map(|num| num % 10 ),
+            20 => prop::num::u128::ANY,
+        ]
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(10))]
+
+            #[test]
+            fn compress_decompress_random(vals in proptest::collection::vec(num_strategy()
+    , 1..1000)) {
+                let _data = test_aux_vals(&vals);
+            }
+        }
+}
--- a/fastfield_codecs/src/format_version.rs
+++ b/fastfield_codecs/src/format_version.rs
@@ -0,0 +1,39 @@
+use std::io;
+
+use common::BinarySerializable;
+use ownedbytes::OwnedBytes;
+
+const MAGIC_NUMBER: u16 = 4335u16;
+const FASTFIELD_FORMAT_VERSION: u8 = 1;
+
+pub(crate) fn append_format_version(output: &mut impl io::Write) -> io::Result<()> {
+    FASTFIELD_FORMAT_VERSION.serialize(output)?;
+    MAGIC_NUMBER.serialize(output)?;
+
+    Ok(())
+}
+
+pub(crate) fn read_format_version(data: OwnedBytes) -> io::Result<(OwnedBytes, u8)> {
+    let (data, magic_number_bytes) = data.rsplit(2);
+
+    let magic_number = u16::deserialize(&mut magic_number_bytes.as_slice())?;
+    if magic_number != MAGIC_NUMBER {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            format!("magic number mismatch {} != {}", magic_number, MAGIC_NUMBER),
+        ));
+    }
+    let (data, format_version_bytes) = data.rsplit(1);
+    let format_version = u8::deserialize(&mut format_version_bytes.as_slice())?;
+    if format_version > FASTFIELD_FORMAT_VERSION {
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidData,
+            format!(
+                "Unsupported fastfield format version: {}. Max supported version: {}",
+                format_version, FASTFIELD_FORMAT_VERSION
+            ),
+        ));
+    }
+
+    Ok((data, format_version))
+}
--- a/fastfield_codecs/src/gcd.rs
+++ b/fastfield_codecs/src/gcd.rs
@@ -0,0 +1,170 @@
+use std::num::NonZeroU64;
+
+use fastdivide::DividerU64;
+
+/// Compute the gcd of two non null numbers.
+///
+/// It is recommended, but not required, to feed values such that `large >= small`.
+fn compute_gcd(mut large: NonZeroU64, mut small: NonZeroU64) -> NonZeroU64 {
+    loop {
+        let rem: u64 = large.get() % small;
+        if let Some(new_small) = NonZeroU64::new(rem) {
+            (large, small) = (small, new_small);
+        } else {
+            return small;
+        }
+    }
+}
+
+// Find GCD for iterator of numbers
+pub fn find_gcd(numbers: impl Iterator<Item = u64>) -> Option<NonZeroU64> {
+    let mut numbers = numbers.flat_map(NonZeroU64::new);
+    let mut gcd: NonZeroU64 = numbers.next()?;
+    if gcd.get() == 1 {
+        return Some(gcd);
+    }
+
+    let mut gcd_divider = DividerU64::divide_by(gcd.get());
+    for val in numbers {
+        let remainder = val.get() - (gcd_divider.divide(val.get())) * gcd.get();
+        if remainder == 0 {
+            continue;
+        }
+        gcd = compute_gcd(val, gcd);
+        if gcd.get() == 1 {
+            return Some(gcd);
+        }
+
+        gcd_divider = DividerU64::divide_by(gcd.get());
+    }
+    Some(gcd)
+}
+
+#[cfg(test)]
+mod tests {
+    use std::io;
+    use std::num::NonZeroU64;
+
+    use ownedbytes::OwnedBytes;
+
+    use crate::gcd::{compute_gcd, find_gcd};
+    use crate::{FastFieldCodecType, VecColumn};
+
+    fn test_fastfield_gcd_i64_with_codec(
+        codec_type: FastFieldCodecType,
+        num_vals: usize,
+    ) -> io::Result<()> {
+        let mut vals: Vec<i64> = (-4..=(num_vals as i64) - 5).map(|val| val * 1000).collect();
+        let mut buffer: Vec<u8> = Vec::new();
+        crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
+        let buffer = OwnedBytes::new(buffer);
+        let column = crate::open::<i64>(buffer.clone())?;
+        assert_eq!(column.get_val(0), -4000i64);
+        assert_eq!(column.get_val(1), -3000i64);
+        assert_eq!(column.get_val(2), -2000i64);
+        assert_eq!(column.max_value(), (num_vals as i64 - 5) * 1000);
+        assert_eq!(column.min_value(), -4000i64);
+
+        // Can't apply gcd
+        let mut buffer_without_gcd = Vec::new();
+        vals.pop();
+        vals.push(1001i64);
+        crate::serialize(
+            VecColumn::from(&vals),
+            &mut buffer_without_gcd,
+            &[codec_type],
+        )?;
+        let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd);
+        assert!(buffer_without_gcd.len() > buffer.len());
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_fastfield_gcd_i64() -> io::Result<()> {
+        for &codec_type in &[
+            FastFieldCodecType::Bitpacked,
+            FastFieldCodecType::BlockwiseLinear,
+            FastFieldCodecType::Linear,
+        ] {
+            test_fastfield_gcd_i64_with_codec(codec_type, 5500)?;
+        }
+        Ok(())
+    }
+
+    fn test_fastfield_gcd_u64_with_codec(
+        codec_type: FastFieldCodecType,
+        num_vals: usize,
+    ) -> io::Result<()> {
+        let mut vals: Vec<u64> = (1..=num_vals).map(|i| i as u64 * 1000u64).collect();
+        let mut buffer: Vec<u8> = Vec::new();
+        crate::serialize(VecColumn::from(&vals), &mut buffer, &[codec_type])?;
+        let buffer = OwnedBytes::new(buffer);
+        let column = crate::open::<u64>(buffer.clone())?;
+        assert_eq!(column.get_val(0), 1000u64);
+        assert_eq!(column.get_val(1), 2000u64);
+        assert_eq!(column.get_val(2), 3000u64);
+        assert_eq!(column.max_value(), num_vals as u64 * 1000);
+        assert_eq!(column.min_value(), 1000u64);
+
+        // Can't apply gcd
+        let mut buffer_without_gcd = Vec::new();
+        vals.pop();
+        vals.push(1001u64);
+        crate::serialize(
+            VecColumn::from(&vals),
+            &mut buffer_without_gcd,
+            &[codec_type],
+        )?;
+        let buffer_without_gcd = OwnedBytes::new(buffer_without_gcd);
+        assert!(buffer_without_gcd.len() > buffer.len());
+        Ok(())
+    }
+
+    #[test]
+    fn test_fastfield_gcd_u64() -> io::Result<()> {
+        for &codec_type in &[
+            FastFieldCodecType::Bitpacked,
+            FastFieldCodecType::BlockwiseLinear,
+            FastFieldCodecType::Linear,
+        ] {
+            test_fastfield_gcd_u64_with_codec(codec_type, 5500)?;
+        }
+        Ok(())
+    }
+
+    #[test]
+    pub fn test_fastfield2() {
+        let test_fastfield = crate::serialize_and_load(&[100u64, 200u64, 300u64]);
+        assert_eq!(test_fastfield.get_val(0), 100);
+        assert_eq!(test_fastfield.get_val(1), 200);
+        assert_eq!(test_fastfield.get_val(2), 300);
+    }
+
+    #[test]
+    fn test_compute_gcd() {
+        let test_compute_gcd_aux = |large, small, expected| {
+            let large = NonZeroU64::new(large).unwrap();
+            let small = NonZeroU64::new(small).unwrap();
+            let expected = NonZeroU64::new(expected).unwrap();
+            assert_eq!(compute_gcd(small, large), expected);
+            assert_eq!(compute_gcd(large, small), expected);
+        };
+        test_compute_gcd_aux(1, 4, 1);
+        test_compute_gcd_aux(2, 4, 2);
+        test_compute_gcd_aux(10, 25, 5);
+        test_compute_gcd_aux(25, 25, 25);
+    }
+
+    #[test]
+    fn find_gcd_test() {
+        assert_eq!(find_gcd([0].into_iter()), None);
+        assert_eq!(find_gcd([0, 10].into_iter()), NonZeroU64::new(10));
+        assert_eq!(find_gcd([10, 0].into_iter()), NonZeroU64::new(10));
+        assert_eq!(find_gcd([].into_iter()), None);
+        assert_eq!(find_gcd([15, 30, 5, 10].into_iter()), NonZeroU64::new(5));
+        assert_eq!(find_gcd([15, 16, 10].into_iter()), NonZeroU64::new(1));
+        assert_eq!(find_gcd([0, 5, 5, 5].into_iter()), NonZeroU64::new(5));
+        assert_eq!(find_gcd([0, 0].into_iter()), None);
+    }
+}
--- a/fastfield_codecs/src/lib.rs
+++ b/fastfield_codecs/src/lib.rs
@@ -1,130 +1,330 @@
+#![warn(missing_docs)]
+#![cfg_attr(all(feature = "unstable", test), feature(test))]
+
+//! # `fastfield_codecs`
+//!
+//! - Columnar storage of data for tantivy [`Column`].
+//! - Encode data in different codecs.
+//! - Monotonically map values to u64/u128
+
 #[cfg(test)]
 #[macro_use]
 extern crate more_asserts;

+#[cfg(all(test, feature = "unstable"))]
+extern crate test;
+
 use std::io;
 use std::io::Write;
+use std::sync::Arc;

-pub mod bitpacked;
-pub mod linearinterpol;
-pub mod multilinearinterpol;
+use common::BinarySerializable;
+use compact_space::CompactSpaceDecompressor;
+use format_version::read_format_version;
+use monotonic_mapping::{
+    StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
+    StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
+};
+use null_index_footer::read_null_index_footer;
+use ownedbytes::OwnedBytes;
+use serialize::{Header, U128Header};

-pub trait FastFieldCodecReader: Sized {
-    /// reads the metadata and returns the CodecReader
-    fn open_from_bytes(bytes: &[u8]) -> std::io::Result<Self>;
+mod bitpacked;
+mod blockwise_linear;
+mod compact_space;
+mod format_version;
+mod line;
+mod linear;
+mod monotonic_mapping;
+mod monotonic_mapping_u128;
+#[allow(dead_code)]
+mod null_index;
+mod null_index_footer;

-    fn get_u64(&self, doc: u64, data: &[u8]) -> u64;
+mod column;
+mod gcd;
+pub mod serialize;

-    fn min_value(&self) -> u64;
-    fn max_value(&self) -> u64;
+pub use ordered_float;
+
+use self::bitpacked::BitpackedCodec;
+use self::blockwise_linear::BlockwiseLinearCodec;
+pub use self::column::{monotonic_map_column, Column, IterColumn, VecColumn};
+use self::linear::LinearCodec;
+pub use self::monotonic_mapping::{MonotonicallyMappableToU64, StrictlyMonotonicFn};
+pub use self::monotonic_mapping_u128::MonotonicallyMappableToU128;
+pub use self::serialize::{
+    estimate, serialize, serialize_and_load, serialize_u128, NormalizedHeader,
+};
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
+#[repr(u8)]
+/// Available codecs to use to encode the u64 (via [`MonotonicallyMappableToU64`]) converted data.
+pub enum FastFieldCodecType {
+    /// Bitpack all values in the value range. The number of bits is defined by the amplitude
+    /// `column.max_value() - column.min_value()`
+    Bitpacked = 1,
+    /// Linear interpolation puts a line between the first and last value and then bitpacks the
+    /// values by the offset from the line. The number of bits is defined by the max deviation from
+    /// the line.
+    Linear = 2,
+    /// Same as [`FastFieldCodecType::Linear`], but encodes in blocks of 512 elements.
+    BlockwiseLinear = 3,
+}
+
+impl BinarySerializable for FastFieldCodecType {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
+        self.to_code().serialize(wrt)
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let code = u8::deserialize(reader)?;
+        let codec_type: Self = Self::from_code(code)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
+        Ok(codec_type)
+    }
+}
+
+impl FastFieldCodecType {
+    pub(crate) fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub(crate) fn from_code(code: u8) -> Option<Self> {
+        match code {
+            1 => Some(Self::Bitpacked),
+            2 => Some(Self::Linear),
+            3 => Some(Self::BlockwiseLinear),
+            _ => None,
+        }
+    }
+}
+
+#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
+#[repr(u8)]
+/// Available codecs to use to encode the u128 (via [`MonotonicallyMappableToU128`]) converted data.
+pub enum U128FastFieldCodecType {
+    /// This codec takes a large number space (u128) and reduces it to a compact number space, by
+    /// removing the holes.
+    CompactSpace = 1,
+}
+
+impl BinarySerializable for U128FastFieldCodecType {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
+        self.to_code().serialize(wrt)
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let code = u8::deserialize(reader)?;
+        let codec_type: Self = Self::from_code(code)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
+        Ok(codec_type)
+    }
+}
+
+impl U128FastFieldCodecType {
+    pub(crate) fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub(crate) fn from_code(code: u8) -> Option<Self> {
+        match code {
+            1 => Some(Self::CompactSpace),
+            _ => None,
+        }
+    }
+}
+
+/// Returns the correct codec reader wrapped in the `Arc` for the data.
+pub fn open_u128<Item: MonotonicallyMappableToU128>(
+    bytes: OwnedBytes,
+) -> io::Result<Arc<dyn Column<Item>>> {
+    let (bytes, _format_version) = read_format_version(bytes)?;
+    let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
+    let header = U128Header::deserialize(&mut bytes)?;
+    assert_eq!(header.codec_type, U128FastFieldCodecType::CompactSpace);
+    let reader = CompactSpaceDecompressor::open(bytes)?;
+    let inverted: StrictlyMonotonicMappingInverter<StrictlyMonotonicMappingToInternal<Item>> =
+        StrictlyMonotonicMappingToInternal::<Item>::new().into();
+    Ok(Arc::new(monotonic_map_column(reader, inverted)))
+}
+
+/// Returns the correct codec reader wrapped in the `Arc` for the data.
+pub fn open<T: MonotonicallyMappableToU64>(bytes: OwnedBytes) -> io::Result<Arc<dyn Column<T>>> {
+    let (bytes, _format_version) = read_format_version(bytes)?;
+    let (mut bytes, _null_index_footer) = read_null_index_footer(bytes)?;
+    let header = Header::deserialize(&mut bytes)?;
+    match header.codec_type {
+        FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
+        FastFieldCodecType::Linear => open_specific_codec::<LinearCodec, _>(bytes, &header),
+        FastFieldCodecType::BlockwiseLinear => {
+            open_specific_codec::<BlockwiseLinearCodec, _>(bytes, &header)
+        }
+    }
+}
+
+fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
+    bytes: OwnedBytes,
+    header: &Header,
+) -> io::Result<Arc<dyn Column<Item>>> {
+    let normalized_header = header.normalized();
+    let reader = C::open_from_bytes(bytes, normalized_header)?;
+    let min_value = header.min_value;
+    if let Some(gcd) = header.gcd {
+        let mapping = StrictlyMonotonicMappingInverter::from(
+            StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd.get(), min_value),
+        );
+        Ok(Arc::new(monotonic_map_column(reader, mapping)))
+    } else {
+        let mapping = StrictlyMonotonicMappingInverter::from(
+            StrictlyMonotonicMappingToInternalBaseval::new(min_value),
+        );
+        Ok(Arc::new(monotonic_map_column(reader, mapping)))
+    }
 }

 /// The FastFieldSerializerEstimate trait is required on all variants
 /// of fast field compressions, to decide which one to choose.
-pub trait FastFieldCodecSerializer {
+trait FastFieldCodec: 'static {
    /// A codex needs to provide a unique name and id, which is
    /// used for debugging and de/serialization.
-    const NAME: &'static str;
-    const ID: u8;
+    const CODEC_TYPE: FastFieldCodecType;

-    /// Check if the Codec is able to compress the data
-    fn is_applicable(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> bool;
+    type Reader: Column<u64> + 'static;
+
+    /// Reads the metadata and returns the CodecReader
+    fn open_from_bytes(bytes: OwnedBytes, header: NormalizedHeader) -> io::Result<Self::Reader>;
+
+    /// Serializes the data using the serializer into write.
+    ///
+    /// The column iterator should be preferred over using column `get_val` method for
+    /// performance reasons.
+    fn serialize(column: &dyn Column, write: &mut impl Write) -> io::Result<()>;

    /// Returns an estimate of the compression ratio.
+    /// If the codec is not applicable, returns `None`.
+    ///
    /// The baseline is uncompressed 64bit data.
    ///
    /// It could make sense to also return a value representing
    /// computational complexity.
-    fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32;
-
-    /// Serializes the data using the serializer into write.
-    /// There are multiple iterators, in case the codec needs to read the data multiple times.
-    /// The iterators should be preferred over using fastfield_accessor for performance reasons.
-    fn serialize(
-        write: &mut impl Write,
-        fastfield_accessor: &dyn FastFieldDataAccess,
-        stats: FastFieldStats,
-        data_iter: impl Iterator<Item = u64>,
-        data_iter1: impl Iterator<Item = u64>,
-    ) -> io::Result<()>;
+    fn estimate(column: &dyn Column) -> Option<f32>;
 }

-/// FastFieldDataAccess is the trait to access fast field data during serialization and estimation.
-pub trait FastFieldDataAccess {
-    /// Return the value associated to the given position.
-    ///
-    /// Whenever possible use the Iterator passed to the fastfield creation instead, for performance
-    /// reasons.
-    ///
-    /// # Panics
-    ///
-    /// May panic if `position` is greater than the index.
-    fn get_val(&self, position: u64) -> u64;
-}
-
-#[derive(Debug, Clone)]
-/// Statistics are used in codec detection and stored in the fast field footer.
-pub struct FastFieldStats {
-    pub min_value: u64,
-    pub max_value: u64,
-    pub num_vals: u64,
-}
-
-impl<'a> FastFieldDataAccess for &'a [u64] {
-    fn get_val(&self, position: u64) -> u64 {
-        self[position as usize]
-    }
-}
-
-impl FastFieldDataAccess for Vec<u64> {
-    fn get_val(&self, position: u64) -> u64 {
-        self[position as usize]
-    }
-}
+/// The list of all available codecs for u64 convertible data.
+pub const ALL_CODEC_TYPES: [FastFieldCodecType; 3] = [
+    FastFieldCodecType::Bitpacked,
+    FastFieldCodecType::BlockwiseLinear,
+    FastFieldCodecType::Linear,
+];

 #[cfg(test)]
 mod tests {
-    use crate::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
-    use crate::linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer};
-    use crate::multilinearinterpol::{
-        MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
-    };

-    pub fn create_and_validate<S: FastFieldCodecSerializer, R: FastFieldCodecReader>(
+    use proptest::prelude::*;
+    use proptest::strategy::Strategy;
+    use proptest::{prop_oneof, proptest};
+
+    use crate::bitpacked::BitpackedCodec;
+    use crate::blockwise_linear::BlockwiseLinearCodec;
+    use crate::linear::LinearCodec;
+    use crate::serialize::Header;
+
+    pub(crate) fn create_and_validate<Codec: FastFieldCodec>(
        data: &[u64],
        name: &str,
-    ) -> (f32, f32) {
-        if !S::is_applicable(&data, crate::tests::stats_from_vec(data)) {
-            return (f32::MAX, 0.0);
-        }
-        let estimation = S::estimate(&data, crate::tests::stats_from_vec(data));
-        let mut out = vec![];
-        S::serialize(
-            &mut out,
-            &data,
-            crate::tests::stats_from_vec(data),
-            data.iter().cloned(),
-            data.iter().cloned(),
-        )
-        .unwrap();
+    ) -> Option<(f32, f32)> {
+        let col = &VecColumn::from(data);
+        let header = Header::compute_header(col, &[Codec::CODEC_TYPE])?;
+        let normalized_col = header.normalize_column(col);
+        let estimation = Codec::estimate(&normalized_col)?;
+
+        let mut out = Vec::new();
+        let col = VecColumn::from(data);
+        serialize(col, &mut out, &[Codec::CODEC_TYPE]).unwrap();

-        let reader = R::open_from_bytes(&out).unwrap();
-        for (doc, orig_val) in data.iter().enumerate() {
-            let val = reader.get_u64(doc as u64, &out);
-            if val != *orig_val {
-                panic!(
-                    "val {:?} does not match orig_val {:?}, in data set {}, data {:?}",
-                    val, orig_val, name, data
-                );
-            }
-        }
        let actual_compression = out.len() as f32 / (data.len() as f32 * 8.0);
-        (estimation, actual_compression)
+
+        let reader = crate::open::<u64>(OwnedBytes::new(out)).unwrap();
+        assert_eq!(reader.num_vals(), data.len() as u32);
+        for (doc, orig_val) in data.iter().copied().enumerate() {
+            let val = reader.get_val(doc as u32);
+            assert_eq!(
+                val, orig_val,
+                "val `{val}` does not match orig_val {orig_val:?}, in data set {name}, data \
+                 `{data:?}`",
+            );
+        }
+
+        if !data.is_empty() {
+            let test_rand_idx = rand::thread_rng().gen_range(0..=data.len() - 1);
+            let expected_positions: Vec<u32> = data
+                .iter()
+                .enumerate()
+                .filter(|(_, el)| **el == data[test_rand_idx])
+                .map(|(pos, _)| pos as u32)
+                .collect();
+            let mut positions = Vec::new();
+            reader.get_docids_for_value_range(
+                data[test_rand_idx]..=data[test_rand_idx],
+                0..data.len() as u32,
+                &mut positions,
+            );
+            assert_eq!(expected_positions, positions);
+        }
+        Some((estimation, actual_compression))
    }
-    pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(100))]
+
+        #[test]
+        fn test_proptest_small_bitpacked(data in proptest::collection::vec(num_strategy(), 1..10)) {
+            create_and_validate::<BitpackedCodec>(&data, "proptest bitpacked");
+        }
+
+        #[test]
+        fn test_proptest_small_linear(data in proptest::collection::vec(num_strategy(), 1..10)) {
+            create_and_validate::<LinearCodec>(&data, "proptest linearinterpol");
+        }
+
+        #[test]
+        fn test_proptest_small_blockwise_linear(data in proptest::collection::vec(num_strategy(), 1..10)) {
+            create_and_validate::<BlockwiseLinearCodec>(&data, "proptest multilinearinterpol");
+        }
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(10))]
+
+        #[test]
+        fn test_proptest_large_bitpacked(data in proptest::collection::vec(num_strategy(), 1..6000)) {
+            create_and_validate::<BitpackedCodec>(&data, "proptest bitpacked");
+        }
+
+        #[test]
+        fn test_proptest_large_linear(data in proptest::collection::vec(num_strategy(), 1..6000)) {
+            create_and_validate::<LinearCodec>(&data, "proptest linearinterpol");
+        }
+
+        #[test]
+        fn test_proptest_large_blockwise_linear(data in proptest::collection::vec(num_strategy(), 1..6000)) {
+            create_and_validate::<BlockwiseLinearCodec>(&data, "proptest multilinearinterpol");
+        }
+    }
+
+    fn num_strategy() -> impl Strategy<Value = u64> {
+        prop_oneof![
+            1 => prop::num::u64::ANY.prop_map(|num| u64::MAX - (num % 10) ),
+            1 => prop::num::u64::ANY.prop_map(|num| num % 10 ),
+            20 => prop::num::u64::ANY,
+        ]
+    }
+
+    pub fn get_codec_test_datasets() -> Vec<(Vec<u64>, &'static str)> {
        let mut data_and_names = vec![];

-        let data = (10..=20_u64).collect::<Vec<_>>();
+        let data = (10..=10_000_u64).collect::<Vec<_>>();
        data_and_names.push((data, "simple monotonically increasing"));

        data_and_names.push((
@@ -134,92 +334,230 @@ mod tests {
        data_and_names.push((vec![5, 50, 3, 13, 1, 1000, 35], "rand small"));
        data_and_names.push((vec![10], "single value"));

+        data_and_names.push((
+            vec![1572656989877777, 1170935903116329, 720575940379279, 0],
+            "overflow error",
+        ));
+
        data_and_names
    }

-    fn test_codec<S: FastFieldCodecSerializer, R: FastFieldCodecReader>() {
-        let codec_name = S::NAME;
-        for (data, data_set_name) in get_codec_test_data_sets() {
-            let (estimate, actual) =
-                crate::tests::create_and_validate::<S, R>(&data, data_set_name);
-            let result = if estimate == f32::MAX {
-                "Disabled".to_string()
+    fn test_codec<C: FastFieldCodec>() {
+        let codec_name = format!("{:?}", C::CODEC_TYPE);
+        for (data, dataset_name) in get_codec_test_datasets() {
+            let estimate_actual_opt: Option<(f32, f32)> =
+                crate::tests::create_and_validate::<C>(&data, dataset_name);
+            let result = if let Some((estimate, actual)) = estimate_actual_opt {
+                format!("Estimate `{estimate}` Actual `{actual}`")
            } else {
-                format!("Estimate {:?} Actual {:?} ", estimate, actual)
+                "Disabled".to_string()
            };
-            println!(
-                "Codec {}, DataSet {}, {}",
-                codec_name, data_set_name, result
-            );
+            println!("Codec {codec_name}, DataSet {dataset_name}, {result}");
        }
    }
    #[test]
    fn test_codec_bitpacking() {
-        test_codec::<BitpackedFastFieldSerializer, BitpackedFastFieldReader>();
+        test_codec::<BitpackedCodec>();
    }
    #[test]
    fn test_codec_interpolation() {
-        test_codec::<LinearInterpolFastFieldSerializer, LinearInterpolFastFieldReader>();
+        test_codec::<LinearCodec>();
    }
    #[test]
    fn test_codec_multi_interpolation() {
-        test_codec::<MultiLinearInterpolFastFieldSerializer, MultiLinearInterpolFastFieldReader>();
+        test_codec::<BlockwiseLinearCodec>();
    }

    use super::*;
-    pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
-        let min_value = data.iter().cloned().min().unwrap_or(0);
-        let max_value = data.iter().cloned().max().unwrap_or(0);
-        FastFieldStats {
-            min_value,
-            max_value,
-            num_vals: data.len() as u64,
-        }
-    }

    #[test]
    fn estimation_good_interpolation_case() {
        let data = (10..=20000_u64).collect::<Vec<_>>();
+        let data: VecColumn = data.as_slice().into();

-        let linear_interpol_estimation =
-            LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
+        let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
        assert_le!(linear_interpol_estimation, 0.01);

-        let multi_linear_interpol_estimation =
-            MultiLinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
+        let multi_linear_interpol_estimation = BlockwiseLinearCodec::estimate(&data).unwrap();
        assert_le!(multi_linear_interpol_estimation, 0.2);
-        assert_le!(linear_interpol_estimation, multi_linear_interpol_estimation);
+        assert_lt!(linear_interpol_estimation, multi_linear_interpol_estimation);

-        let bitpacked_estimation =
-            BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
-        assert_le!(linear_interpol_estimation, bitpacked_estimation);
+        let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+        assert_lt!(linear_interpol_estimation, bitpacked_estimation);
    }
    #[test]
    fn estimation_test_bad_interpolation_case() {
-        let data = vec![200, 10, 10, 10, 10, 1000, 20];
+        let data: &[u64] = &[200, 10, 10, 10, 10, 1000, 20];

-        let linear_interpol_estimation =
-            LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
-        assert_le!(linear_interpol_estimation, 0.32);
+        let data: VecColumn = data.into();
+        let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
+        assert_le!(linear_interpol_estimation, 0.34);

-        let bitpacked_estimation =
-            BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
-        assert_le!(bitpacked_estimation, linear_interpol_estimation);
+        let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+        assert_lt!(bitpacked_estimation, linear_interpol_estimation);
    }
+
+    #[test]
+    fn estimation_prefer_bitpacked() {
+        let data = VecColumn::from(&[10, 10, 10, 10]);
+        let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
+        let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
+        assert_lt!(bitpacked_estimation, linear_interpol_estimation);
+    }
+
    #[test]
    fn estimation_test_bad_interpolation_case_monotonically_increasing() {
-        let mut data = (200..=20000_u64).collect::<Vec<_>>();
+        let mut data: Vec<u64> = (201..=20000_u64).collect();
        data.push(1_000_000);
+        let data: VecColumn = data.as_slice().into();

        // in this case the linear interpolation can't in fact not be worse than bitpacking,
        // but the estimator adds some threshold, which leads to estimated worse behavior
-        let linear_interpol_estimation =
-            LinearInterpolFastFieldSerializer::estimate(&data, stats_from_vec(&data));
+        let linear_interpol_estimation = LinearCodec::estimate(&data).unwrap();
        assert_le!(linear_interpol_estimation, 0.35);

-        let bitpacked_estimation =
-            BitpackedFastFieldSerializer::estimate(&data, stats_from_vec(&data));
+        let bitpacked_estimation = BitpackedCodec::estimate(&data).unwrap();
        assert_le!(bitpacked_estimation, 0.32);
        assert_le!(bitpacked_estimation, linear_interpol_estimation);
    }
+
+    #[test]
+    fn test_fast_field_codec_type_to_code() {
+        let mut count_codec = 0;
+        for code in 0..=255 {
+            if let Some(codec_type) = FastFieldCodecType::from_code(code) {
+                assert_eq!(codec_type.to_code(), code);
+                count_codec += 1;
+            }
+        }
+        assert_eq!(count_codec, 3);
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+    use std::sync::Arc;
+
+    use ownedbytes::OwnedBytes;
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use test::{self, Bencher};
+
+    use super::*;
+    use crate::Column;
+
+    fn get_data() -> Vec<u64> {
+        let mut rng = StdRng::seed_from_u64(2u64);
+        let mut data: Vec<_> = (100..55000_u64)
+            .map(|num| num + rng.gen::<u8>() as u64)
+            .collect();
+        data.push(99_000);
+        data.insert(1000, 2000);
+        data.insert(2000, 100);
+        data.insert(3000, 4100);
+        data.insert(4000, 100);
+        data.insert(5000, 800);
+        data
+    }
+
+    #[inline(never)]
+    fn value_iter() -> impl Iterator<Item = u64> {
+        0..20_000
+    }
+    fn get_reader_for_bench<Codec: FastFieldCodec>(data: &[u64]) -> Codec::Reader {
+        let mut bytes = Vec::new();
+        let min_value = *data.iter().min().unwrap();
+        let data = data.iter().map(|el| *el - min_value).collect::<Vec<_>>();
+        let col = VecColumn::from(&data);
+        let normalized_header = crate::NormalizedHeader {
+            num_vals: col.num_vals(),
+            max_value: col.max_value(),
+        };
+        Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap();
+        Codec::open_from_bytes(OwnedBytes::new(bytes), normalized_header).unwrap()
+    }
+    fn bench_get<Codec: FastFieldCodec>(b: &mut Bencher, data: &[u64]) {
+        let col = get_reader_for_bench::<Codec>(data);
+        b.iter(|| {
+            let mut sum = 0u64;
+            for pos in value_iter() {
+                let val = col.get_val(pos as u32);
+                sum = sum.wrapping_add(val);
+            }
+            sum
+        });
+    }
+
+    #[inline(never)]
+    fn bench_get_dynamic_helper(b: &mut Bencher, col: Arc<dyn Column>) {
+        b.iter(|| {
+            let mut sum = 0u64;
+            for pos in value_iter() {
+                let val = col.get_val(pos as u32);
+                sum = sum.wrapping_add(val);
+            }
+            sum
+        });
+    }
+
+    fn bench_get_dynamic<Codec: FastFieldCodec>(b: &mut Bencher, data: &[u64]) {
+        let col = Arc::new(get_reader_for_bench::<Codec>(data));
+        bench_get_dynamic_helper(b, col);
+    }
+    fn bench_create<Codec: FastFieldCodec>(b: &mut Bencher, data: &[u64]) {
+        let min_value = *data.iter().min().unwrap();
+        let data = data.iter().map(|el| *el - min_value).collect::<Vec<_>>();
+
+        let mut bytes = Vec::new();
+        b.iter(|| {
+            bytes.clear();
+            Codec::serialize(&VecColumn::from(&data), &mut bytes).unwrap();
+        });
+    }
+
+    #[bench]
+    fn bench_fastfield_bitpack_create(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_create::<BitpackedCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_linearinterpol_create(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_create::<LinearCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_multilinearinterpol_create(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_create::<BlockwiseLinearCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_bitpack_get(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get::<BitpackedCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_bitpack_get_dynamic(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get_dynamic::<BitpackedCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_linearinterpol_get(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get::<LinearCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_linearinterpol_get_dynamic(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get_dynamic::<LinearCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_multilinearinterpol_get(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get::<BlockwiseLinearCodec>(b, &data);
+    }
+    #[bench]
+    fn bench_fastfield_multilinearinterpol_get_dynamic(b: &mut Bencher) {
+        let data: Vec<_> = get_data();
+        bench_get_dynamic::<BlockwiseLinearCodec>(b, &data);
+    }
 }
--- a/fastfield_codecs/src/line.rs
+++ b/fastfield_codecs/src/line.rs
@@ -0,0 +1,222 @@
+use std::io;
+use std::num::NonZeroU32;
+
+use common::{BinarySerializable, VInt};
+
+use crate::Column;
+
+const MID_POINT: u64 = (1u64 << 32) - 1u64;
+
+/// `Line` describes a line function `y: ax + b` using integer
+/// arithmetics.
+///
+/// The slope is in fact a decimal split into a 32 bit integer value,
+/// and a 32-bit decimal value.
+///
+/// The multiplication then becomes.
+/// `y = m * x >> 32 + b`
+#[derive(Debug, Clone, Copy, Default)]
+pub struct Line {
+    slope: u64,
+    intercept: u64,
+}
+
+/// Compute the line slope.
+///
+/// This function has the nice property of being
+/// invariant by translation.
+/// `
+///   compute_slope(y0, y1)
+/// = compute_slope(y0 + X % 2^64, y1 + X % 2^64)
+/// `
+fn compute_slope(y0: u64, y1: u64, num_vals: NonZeroU32) -> u64 {
+    let dy = y1.wrapping_sub(y0);
+    let sign = dy <= (1 << 63);
+    let abs_dy = if sign {
+        y1.wrapping_sub(y0)
+    } else {
+        y0.wrapping_sub(y1)
+    };
+    if abs_dy >= 1 << 32 {
+        // This is outside of realm we handle.
+        // Let's just bail.
+        return 0u64;
+    }
+
+    let abs_slope = (abs_dy << 32) / num_vals.get() as u64;
+    if sign {
+        abs_slope
+    } else {
+        // The complement does indeed create the
+        // opposite decreasing slope...
+        //
+        // Intuitively (without the bitshifts and % u64::MAX)
+        // ```
+        //    (x + shift)*(u64::MAX - abs_slope)
+        // -  (x * (u64::MAX - abs_slope))
+        // = - shift * abs_slope
+        // ```
+        u64::MAX - abs_slope
+    }
+}
+
+impl Line {
+    #[inline(always)]
+    pub fn eval(&self, x: u32) -> u64 {
+        let linear_part = ((x as u64).wrapping_mul(self.slope) >> 32) as i32 as u64;
+        self.intercept.wrapping_add(linear_part)
+    }
+
+    // Same as train, but the intercept is only estimated from provided sample positions
+    pub fn estimate(sample_positions_and_values: &[(u64, u64)]) -> Self {
+        let first_val = sample_positions_and_values[0].1;
+        let last_val = sample_positions_and_values[sample_positions_and_values.len() - 1].1;
+        let num_vals = sample_positions_and_values[sample_positions_and_values.len() - 1].0 + 1;
+        Self::train_from(
+            first_val,
+            last_val,
+            num_vals as u32,
+            sample_positions_and_values.iter().cloned(),
+        )
+    }
+
+    // Intercept is only computed from provided positions
+    fn train_from(
+        first_val: u64,
+        last_val: u64,
+        num_vals: u32,
+        positions_and_values: impl Iterator<Item = (u64, u64)>,
+    ) -> Self {
+        // TODO replace with let else
+        let idx_last_val = if let Some(idx_last_val) = NonZeroU32::new(num_vals - 1) {
+            idx_last_val
+        } else {
+            return Line::default();
+        };
+
+        let y0 = first_val;
+        let y1 = last_val;
+
+        // We first independently pick our slope.
+        let slope = compute_slope(y0, y1, idx_last_val);
+
+        // We picked our slope. Note that it does not have to be perfect.
+        // Now we need to compute the best intercept.
+        //
+        // Intuitively, the best intercept is such that line passes through one of the
+        // `(i, ys[])`.
+        //
+        // The best intercept therefore has the form
+        // `y[i] - line.eval(i)` (using wrapping arithmetics).
+        // In other words, the best intercept is one of the `y - Line::eval(ys[i])`
+        // and our task is just to pick the one that minimizes our error.
+        //
+        // Without sorting our values, this is a difficult problem.
+        // We however rely on the following trick...
+        //
+        // We only focus on the case where the interpolation is half decent.
+        // If the line interpolation is doing its job on a dataset suited for it,
+        // we can hope that the maximum error won't be larger than `u64::MAX / 2`.
+        //
+        // In other words, even without the intercept the values `y - Line::eval(ys[i])` will all be
+        // within an interval that takes less than half of the modulo space of `u64`.
+        //
+        // Our task is therefore to identify this interval.
+        // Here we simply translate all of our values by `y0 - 2^63` and pick the min.
+        let mut line = Line {
+            slope,
+            intercept: 0,
+        };
+        let heuristic_shift = y0.wrapping_sub(MID_POINT);
+        line.intercept = positions_and_values
+            .map(|(pos, y)| y.wrapping_sub(line.eval(pos as u32)))
+            .min_by_key(|&val| val.wrapping_sub(heuristic_shift))
+            .unwrap_or(0u64); //< Never happens.
+        line
+    }
+
+    /// Returns a line that attemps to approximate a function
+    /// f: i in 0..[ys.num_vals()) -> ys[i].
+    ///
+    /// - The approximation is always lower than the actual value.
+    /// Or more rigorously, formally `f(i).wrapping_sub(ys[i])` is small
+    /// for any i in [0..ys.len()).
+    /// - It computes without panicking for any value of it.
+    ///
+    /// This function is only invariable by translation if all of the
+    /// `ys` are packaged into half of the space. (See heuristic below)
+    pub fn train(ys: &dyn Column) -> Self {
+        let first_val = ys.iter().next().unwrap();
+        let last_val = ys.iter().nth(ys.num_vals() as usize - 1).unwrap();
+        Self::train_from(
+            first_val,
+            last_val,
+            ys.num_vals(),
+            ys.iter().enumerate().map(|(pos, val)| (pos as u64, val)),
+        )
+    }
+}
+
+impl BinarySerializable for Line {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        VInt(self.slope).serialize(writer)?;
+        VInt(self.intercept).serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let slope = VInt::deserialize(reader)?.0;
+        let intercept = VInt::deserialize(reader)?.0;
+        Ok(Line { slope, intercept })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::VecColumn;
+
+    /// Test training a line and ensuring that the maximum difference between
+    /// the data points and the line is `expected`.
+    ///
+    /// This function operates translation over the data for better coverage.
+    #[track_caller]
+    fn test_line_interpol_with_translation(ys: &[u64], expected: Option<u64>) {
+        let mut translations = vec![0, 100, u64::MAX / 2, u64::MAX, u64::MAX - 1];
+        translations.extend_from_slice(ys);
+        for translation in translations {
+            let translated_ys: Vec<u64> = ys
+                .iter()
+                .copied()
+                .map(|y| y.wrapping_add(translation))
+                .collect();
+            let largest_err = test_eval_max_err(&translated_ys);
+            assert_eq!(largest_err, expected);
+        }
+    }
+
+    fn test_eval_max_err(ys: &[u64]) -> Option<u64> {
+        let line = Line::train(&VecColumn::from(&ys));
+        ys.iter()
+            .enumerate()
+            .map(|(x, y)| y.wrapping_sub(line.eval(x as u32)))
+            .max()
+    }
+
+    #[test]
+    fn test_train() {
+        test_line_interpol_with_translation(&[11, 11, 11, 12, 12, 13], Some(1));
+        test_line_interpol_with_translation(&[13, 12, 12, 11, 11, 11], Some(1));
+        test_line_interpol_with_translation(&[13, 13, 12, 11, 11, 11], Some(1));
+        test_line_interpol_with_translation(&[13, 13, 12, 11, 11, 11], Some(1));
+        test_line_interpol_with_translation(&[u64::MAX - 1, 0, 0, 1], Some(1));
+        test_line_interpol_with_translation(&[u64::MAX - 1, u64::MAX, 0, 1], Some(0));
+        test_line_interpol_with_translation(&[0, 1, 2, 3, 5], Some(0));
+        test_line_interpol_with_translation(&[1, 2, 3, 4], Some(0));
+
+        let data: Vec<u64> = (0..255).collect();
+        test_line_interpol_with_translation(&data, Some(0));
+        let data: Vec<u64> = (0..255).map(|el| el * 2).collect();
+        test_line_interpol_with_translation(&data, Some(0));
+    }
+}
--- a/fastfield_codecs/src/linear.rs
+++ b/fastfield_codecs/src/linear.rs
@@ -0,0 +1,231 @@
+use std::io::{self, Write};
+
+use common::BinarySerializable;
+use ownedbytes::OwnedBytes;
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::line::Line;
+use crate::serialize::NormalizedHeader;
+use crate::{Column, FastFieldCodec, FastFieldCodecType};
+
+/// Depending on the field type, a different
+/// fast field is required.
+#[derive(Clone)]
+pub struct LinearReader {
+    data: OwnedBytes,
+    linear_params: LinearParams,
+    header: NormalizedHeader,
+}
+
+impl Column for LinearReader {
+    #[inline]
+    fn get_val(&self, doc: u32) -> u64 {
+        let interpoled_val: u64 = self.linear_params.line.eval(doc);
+        let bitpacked_diff = self.linear_params.bit_unpacker.get(doc, &self.data);
+        interpoled_val.wrapping_add(bitpacked_diff)
+    }
+
+    #[inline]
+    fn min_value(&self) -> u64 {
+        // The LinearReader assumes a normalized vector.
+        0u64
+    }
+
+    #[inline]
+    fn max_value(&self) -> u64 {
+        self.header.max_value
+    }
+
+    #[inline]
+    fn num_vals(&self) -> u32 {
+        self.header.num_vals
+    }
+}
+
+/// Fastfield serializer, which tries to guess values by linear interpolation
+/// and stores the difference bitpacked.
+pub struct LinearCodec;
+
+#[derive(Debug, Clone)]
+struct LinearParams {
+    line: Line,
+    bit_unpacker: BitUnpacker,
+}
+
+impl BinarySerializable for LinearParams {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.line.serialize(writer)?;
+        self.bit_unpacker.bit_width().serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let line = Line::deserialize(reader)?;
+        let bit_width = u8::deserialize(reader)?;
+        Ok(Self {
+            line,
+            bit_unpacker: BitUnpacker::new(bit_width),
+        })
+    }
+}
+
+impl FastFieldCodec for LinearCodec {
+    const CODEC_TYPE: FastFieldCodecType = FastFieldCodecType::Linear;
+
+    type Reader = LinearReader;
+
+    /// Opens a fast field given a file.
+    fn open_from_bytes(mut data: OwnedBytes, header: NormalizedHeader) -> io::Result<Self::Reader> {
+        let linear_params = LinearParams::deserialize(&mut data)?;
+        Ok(LinearReader {
+            data,
+            linear_params,
+            header,
+        })
+    }
+
+    /// Creates a new fast field serializer.
+    fn serialize(column: &dyn Column, write: &mut impl Write) -> io::Result<()> {
+        assert_eq!(column.min_value(), 0);
+        let line = Line::train(column);
+
+        let max_offset_from_line = column
+            .iter()
+            .enumerate()
+            .map(|(pos, actual_value)| {
+                let calculated_value = line.eval(pos as u32);
+                actual_value.wrapping_sub(calculated_value)
+            })
+            .max()
+            .unwrap();
+
+        let num_bits = compute_num_bits(max_offset_from_line);
+        let linear_params = LinearParams {
+            line,
+            bit_unpacker: BitUnpacker::new(num_bits),
+        };
+        linear_params.serialize(write)?;
+
+        let mut bit_packer = BitPacker::new();
+        for (pos, actual_value) in column.iter().enumerate() {
+            let calculated_value = line.eval(pos as u32);
+            let offset = actual_value.wrapping_sub(calculated_value);
+            bit_packer.write(offset, num_bits, write)?;
+        }
+        bit_packer.close(write)?;
+
+        Ok(())
+    }
+
+    /// estimation for linear interpolation is hard because, you don't know
+    /// where the local maxima for the deviation of the calculated value are and
+    /// the offset to shift all values to >=0 is also unknown.
+    #[allow(clippy::question_mark)]
+    fn estimate(column: &dyn Column) -> Option<f32> {
+        if column.num_vals() < 3 {
+            return None; // disable compressor for this case
+        }
+
+        let limit_num_vals = column.num_vals().min(100_000);
+
+        let num_samples = 100;
+        let step_size = (limit_num_vals / num_samples).max(1); // 20 samples
+        let mut sample_positions_and_values: Vec<_> = Vec::new();
+        for (pos, val) in column.iter().enumerate().step_by(step_size as usize) {
+            sample_positions_and_values.push((pos as u64, val));
+        }
+
+        let line = Line::estimate(&sample_positions_and_values);
+
+        let estimated_bit_width = sample_positions_and_values
+            .into_iter()
+            .map(|(pos, actual_value)| {
+                let interpolated_val = line.eval(pos as u32);
+                actual_value.wrapping_sub(interpolated_val)
+            })
+            .map(|diff| ((diff as f32 * 1.5) * 2.0) as u64)
+            .map(compute_num_bits)
+            .max()
+            .unwrap_or(0);
+
+        // Extrapolate to whole column
+        let num_bits = (estimated_bit_width as u64 * column.num_vals() as u64) + 64;
+        let num_bits_uncompressed = 64 * column.num_vals();
+        Some(num_bits as f32 / num_bits_uncompressed as f32)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rand::RngCore;
+
+    use super::*;
+    use crate::tests::get_codec_test_datasets;
+
+    fn create_and_validate(data: &[u64], name: &str) -> Option<(f32, f32)> {
+        crate::tests::create_and_validate::<LinearCodec>(data, name)
+    }
+
+    #[test]
+    fn test_compression() {
+        let data = (10..=6_000_u64).collect::<Vec<_>>();
+        let (estimate, actual_compression) =
+            create_and_validate(&data, "simple monotonically large").unwrap();
+
+        assert_le!(actual_compression, 0.001);
+        assert_le!(estimate, 0.02);
+    }
+
+    #[test]
+    fn test_with_codec_datasets() {
+        let data_sets = get_codec_test_datasets();
+        for (mut data, name) in data_sets {
+            create_and_validate(&data, name);
+            data.reverse();
+            create_and_validate(&data, name);
+        }
+    }
+    #[test]
+    fn linear_interpol_fast_field_test_large_amplitude() {
+        let data = vec![
+            i64::MAX as u64 / 2,
+            i64::MAX as u64 / 3,
+            i64::MAX as u64 / 2,
+        ];
+
+        create_and_validate(&data, "large amplitude");
+    }
+
+    #[test]
+    fn overflow_error_test() {
+        let data = vec![1572656989877777, 1170935903116329, 720575940379279, 0];
+        create_and_validate(&data, "overflow test");
+    }
+
+    #[test]
+    fn linear_interpol_fast_concave_data() {
+        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
+        create_and_validate(&data, "concave data");
+    }
+    #[test]
+    fn linear_interpol_fast_convex_data() {
+        let data = vec![0, 40, 60, 70, 75, 77];
+        create_and_validate(&data, "convex data");
+    }
+    #[test]
+    fn linear_interpol_fast_field_test_simple() {
+        let data = (10..=20_u64).collect::<Vec<_>>();
+        create_and_validate(&data, "simple monotonically");
+    }
+
+    #[test]
+    fn linear_interpol_fast_field_rand() {
+        let mut rng = rand::thread_rng();
+        for _ in 0..50 {
+            let mut data = (0..10_000).map(|_| rng.next_u64()).collect::<Vec<_>>();
+            create_and_validate(&data, "random");
+            data.reverse();
+            create_and_validate(&data, "random");
+        }
+    }
+}
--- a/fastfield_codecs/src/linearinterpol.rs
+++ b/fastfield_codecs/src/linearinterpol.rs
@@ -1,300 +0,0 @@
-use std::io::{self, Read, Write};
-use std::ops::Sub;
-
-use common::{BinarySerializable, FixedSize};
-use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
-
-use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
-
-/// Depending on the field type, a different
-/// fast field is required.
-#[derive(Clone)]
-pub struct LinearInterpolFastFieldReader {
-    bit_unpacker: BitUnpacker,
-    pub footer: LinearInterpolFooter,
-    pub slope: f32,
-}
-
-#[derive(Clone, Debug)]
-pub struct LinearInterpolFooter {
-    pub relative_max_value: u64,
-    pub offset: u64,
-    pub first_val: u64,
-    pub last_val: u64,
-    pub num_vals: u64,
-    pub min_value: u64,
-    pub max_value: u64,
-}
-
-impl BinarySerializable for LinearInterpolFooter {
-    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
-        self.relative_max_value.serialize(write)?;
-        self.offset.serialize(write)?;
-        self.first_val.serialize(write)?;
-        self.last_val.serialize(write)?;
-        self.num_vals.serialize(write)?;
-        self.min_value.serialize(write)?;
-        self.max_value.serialize(write)?;
-        Ok(())
-    }
-
-    fn deserialize<R: Read>(reader: &mut R) -> io::Result<LinearInterpolFooter> {
-        Ok(LinearInterpolFooter {
-            relative_max_value: u64::deserialize(reader)?,
-            offset: u64::deserialize(reader)?,
-            first_val: u64::deserialize(reader)?,
-            last_val: u64::deserialize(reader)?,
-            num_vals: u64::deserialize(reader)?,
-            min_value: u64::deserialize(reader)?,
-            max_value: u64::deserialize(reader)?,
-        })
-    }
-}
-
-impl FixedSize for LinearInterpolFooter {
-    const SIZE_IN_BYTES: usize = 56;
-}
-
-impl FastFieldCodecReader for LinearInterpolFastFieldReader {
-    /// Opens a fast field given a file.
-    fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
-        let (_data, mut footer) = bytes.split_at(bytes.len() - LinearInterpolFooter::SIZE_IN_BYTES);
-        let footer = LinearInterpolFooter::deserialize(&mut footer)?;
-        let slope = get_slope(footer.first_val, footer.last_val, footer.num_vals);
-
-        let num_bits = compute_num_bits(footer.relative_max_value);
-        let bit_unpacker = BitUnpacker::new(num_bits);
-        Ok(LinearInterpolFastFieldReader {
-            bit_unpacker,
-            footer,
-            slope,
-        })
-    }
-    #[inline]
-    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
-        let calculated_value = get_calculated_value(self.footer.first_val, doc, self.slope);
-        (calculated_value + self.bit_unpacker.get(doc, data)) - self.footer.offset
-    }
-
-    #[inline]
-    fn min_value(&self) -> u64 {
-        self.footer.min_value
-    }
-    #[inline]
-    fn max_value(&self) -> u64 {
-        self.footer.max_value
-    }
-}
-
-/// Fastfield serializer, which tries to guess values by linear interpolation
-/// and stores the difference bitpacked.
-pub struct LinearInterpolFastFieldSerializer {}
-
-#[inline]
-fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
-    if num_vals <= 1 {
-        return 0.0;
-    }
-    //  We calculate the slope with f64 high precision and use the result in lower precision f32
-    //  This is done in order to handle estimations for very large values like i64::MAX
-    ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
-}
-
-#[inline]
-fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
-    first_val + (pos as f32 * slope) as u64
-}
-
-impl FastFieldCodecSerializer for LinearInterpolFastFieldSerializer {
-    const NAME: &'static str = "LinearInterpol";
-    const ID: u8 = 2;
-    /// Creates a new fast field serializer.
-    fn serialize(
-        write: &mut impl Write,
-        fastfield_accessor: &dyn FastFieldDataAccess,
-        stats: FastFieldStats,
-        data_iter: impl Iterator<Item = u64>,
-        data_iter1: impl Iterator<Item = u64>,
-    ) -> io::Result<()> {
-        assert!(stats.min_value <= stats.max_value);
-
-        let first_val = fastfield_accessor.get_val(0);
-        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
-        let slope = get_slope(first_val, last_val, stats.num_vals);
-        // calculate offset to ensure all values are positive
-        let mut offset = 0;
-        let mut rel_positive_max = 0;
-        for (pos, actual_value) in data_iter1.enumerate() {
-            let calculated_value = get_calculated_value(first_val, pos as u64, slope);
-            if calculated_value > actual_value {
-                // negative value we need to apply an offset
-                // we ignore negative values in the max value calculation, because negative values
-                // will be offset to 0
-                offset = offset.max(calculated_value - actual_value);
-            } else {
-                // positive value no offset reuqired
-                rel_positive_max = rel_positive_max.max(actual_value - calculated_value);
-            }
-        }
-
-        // rel_positive_max will be adjusted by offset
-        let relative_max_value = rel_positive_max + offset;
-
-        let num_bits = compute_num_bits(relative_max_value);
-        let mut bit_packer = BitPacker::new();
-        for (pos, val) in data_iter.enumerate() {
-            let calculated_value = get_calculated_value(first_val, pos as u64, slope);
-            let diff = (val + offset) - calculated_value;
-            bit_packer.write(diff, num_bits, write)?;
-        }
-        bit_packer.close(write)?;
-
-        let footer = LinearInterpolFooter {
-            relative_max_value,
-            offset,
-            first_val,
-            last_val,
-            num_vals: stats.num_vals,
-            min_value: stats.min_value,
-            max_value: stats.max_value,
-        };
-        footer.serialize(write)?;
-        Ok(())
-    }
-    fn is_applicable(
-        _fastfield_accessor: &impl FastFieldDataAccess,
-        stats: FastFieldStats,
-    ) -> bool {
-        if stats.num_vals < 3 {
-            return false; // disable compressor for this case
-        }
-        // On serialisation the offset is added to the actual value.
-        // We need to make sure this won't run into overflow calculation issues.
-        // For this we take the maximum theroretical offset and add this to the max value.
-        // If this doesn't overflow the algortihm should be fine
-        let theorethical_maximum_offset = stats.max_value - stats.min_value;
-        if stats
-            .max_value
-            .checked_add(theorethical_maximum_offset)
-            .is_none()
-        {
-            return false;
-        }
-        true
-    }
-    /// estimation for linear interpolation is hard because, you don't know
-    /// where the local maxima for the deviation of the calculated value are and
-    /// the offset to shift all values to >=0 is also unknown.
-    fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
-        let first_val = fastfield_accessor.get_val(0);
-        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
-        let slope = get_slope(first_val, last_val, stats.num_vals);
-
-        // let's sample at 0%, 5%, 10% .. 95%, 100%
-        let num_vals = stats.num_vals as f32 / 100.0;
-        let sample_positions = (0..20)
-            .map(|pos| (num_vals * pos as f32 * 5.0) as usize)
-            .collect::<Vec<_>>();
-
-        let max_distance = sample_positions
-            .iter()
-            .map(|pos| {
-                let calculated_value = get_calculated_value(first_val, *pos as u64, slope);
-                let actual_value = fastfield_accessor.get_val(*pos as u64);
-                distance(calculated_value, actual_value)
-            })
-            .max()
-            .unwrap_or(0);
-
-        // the theory would be that we don't have the actual max_distance, but we are close within
-        // 50% threshold.
-        // It is multiplied by 2 because in a log case scenario the line would be as much above as
-        // below. So the offset would = max_distance
-        //
-        let relative_max_value = (max_distance as f32 * 1.5) * 2.0;
-
-        let num_bits = compute_num_bits(relative_max_value as u64) as u64 * stats.num_vals as u64
-            + LinearInterpolFooter::SIZE_IN_BYTES as u64;
-        let num_bits_uncompressed = 64 * stats.num_vals;
-        num_bits as f32 / num_bits_uncompressed as f32
-    }
-}
-
-#[inline]
-fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
-    if x < y {
-        y - x
-    } else {
-        x - y
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::tests::get_codec_test_data_sets;
-
-    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
-        crate::tests::create_and_validate::<
-            LinearInterpolFastFieldSerializer,
-            LinearInterpolFastFieldReader,
-        >(data, name)
-    }
-
-    #[test]
-    fn test_compression() {
-        let data = (10..=6_000_u64).collect::<Vec<_>>();
-        let (estimate, actual_compression) =
-            create_and_validate(&data, "simple monotonically large");
-
-        assert!(actual_compression < 0.01);
-        assert!(estimate < 0.01);
-    }
-
-    #[test]
-    fn test_with_codec_data_sets() {
-        let data_sets = get_codec_test_data_sets();
-        for (mut data, name) in data_sets {
-            create_and_validate(&data, name);
-            data.reverse();
-            create_and_validate(&data, name);
-        }
-    }
-    #[test]
-    fn linear_interpol_fast_field_test_large_amplitude() {
-        let data = vec![
-            i64::MAX as u64 / 2,
-            i64::MAX as u64 / 3,
-            i64::MAX as u64 / 2,
-        ];
-
-        create_and_validate(&data, "large amplitude");
-    }
-    #[test]
-    fn linear_interpol_fast_concave_data() {
-        let data = vec![0, 1, 2, 5, 8, 10, 20, 50];
-        create_and_validate(&data, "concave data");
-    }
-    #[test]
-    fn linear_interpol_fast_convex_data() {
-        let data = vec![0, 40, 60, 70, 75, 77];
-        create_and_validate(&data, "convex data");
-    }
-    #[test]
-    fn linear_interpol_fast_field_test_simple() {
-        let data = (10..=20_u64).collect::<Vec<_>>();
-
-        create_and_validate(&data, "simple monotonically");
-    }
-
-    #[test]
-    fn linear_interpol_fast_field_rand() {
-        for _ in 0..5000 {
-            let mut data = (0..50).map(|_| rand::random::<u64>()).collect::<Vec<_>>();
-            create_and_validate(&data, "random");
-
-            data.reverse();
-            create_and_validate(&data, "random");
-        }
-    }
-}
--- a/fastfield_codecs/src/main.rs
+++ b/fastfield_codecs/src/main.rs
@@ -1,52 +1,170 @@
 #[macro_use]
 extern crate prettytable;
-use fastfield_codecs::linearinterpol::LinearInterpolFastFieldSerializer;
-use fastfield_codecs::multilinearinterpol::MultiLinearInterpolFastFieldSerializer;
-use fastfield_codecs::{FastFieldCodecSerializer, FastFieldStats};
+use std::collections::HashSet;
+use std::env;
+use std::io::BufRead;
+use std::net::{IpAddr, Ipv6Addr};
+use std::str::FromStr;
+
+use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn};
+use itertools::Itertools;
+use measure_time::print_time;
+use ownedbytes::OwnedBytes;
 use prettytable::{Cell, Row, Table};

+fn print_set_stats(ip_addrs: &[u128]) {
+    println!("NumIps\t{}", ip_addrs.len());
+    let ip_addr_set: HashSet<u128> = ip_addrs.iter().cloned().collect();
+    println!("NumUniqueIps\t{}", ip_addr_set.len());
+    let ratio_unique = ip_addr_set.len() as f64 / ip_addrs.len() as f64;
+    println!("RatioUniqueOverTotal\t{ratio_unique:.4}");
+
+    // histogram
+    let mut ip_addrs = ip_addrs.to_vec();
+    ip_addrs.sort();
+    let mut cnts: Vec<usize> = ip_addrs
+        .into_iter()
+        .dedup_with_count()
+        .map(|(cnt, _)| cnt)
+        .collect();
+    cnts.sort();
+
+    let top_256_cnt: usize = cnts.iter().rev().take(256).sum();
+    let top_128_cnt: usize = cnts.iter().rev().take(128).sum();
+    let top_64_cnt: usize = cnts.iter().rev().take(64).sum();
+    let top_8_cnt: usize = cnts.iter().rev().take(8).sum();
+    let total: usize = cnts.iter().sum();
+
+    println!("{}", total);
+    println!("{}", top_256_cnt);
+    println!("{}", top_128_cnt);
+    println!("Percentage Top8 {:02}", top_8_cnt as f32 / total as f32);
+    println!("Percentage Top64 {:02}", top_64_cnt as f32 / total as f32);
+    println!("Percentage Top128 {:02}", top_128_cnt as f32 / total as f32);
+    println!("Percentage Top256 {:02}", top_256_cnt as f32 / total as f32);
+
+    let mut cnts: Vec<(usize, usize)> = cnts.into_iter().dedup_with_count().collect();
+    cnts.sort_by(|a, b| {
+        if a.1 == b.1 {
+            a.0.cmp(&b.0)
+        } else {
+            b.1.cmp(&a.1)
+        }
+    });
+}
+
+fn ip_dataset() -> Vec<u128> {
+    let mut ip_addr_v4 = 0;
+
+    let stdin = std::io::stdin();
+    let ip_addrs: Vec<u128> = stdin
+        .lock()
+        .lines()
+        .flat_map(|line| {
+            let line = line.unwrap();
+            let line = line.trim();
+            let ip_addr = IpAddr::from_str(line.trim()).ok()?;
+            if ip_addr.is_ipv4() {
+                ip_addr_v4 += 1;
+            }
+            let ip_addr_v6: Ipv6Addr = match ip_addr {
+                IpAddr::V4(v4) => v4.to_ipv6_mapped(),
+                IpAddr::V6(v6) => v6,
+            };
+            Some(ip_addr_v6)
+        })
+        .map(|ip_v6| u128::from_be_bytes(ip_v6.octets()))
+        .collect();
+
+    println!("IpAddrsAny\t{}", ip_addrs.len());
+    println!("IpAddrsV4\t{}", ip_addr_v4);
+
+    ip_addrs
+}
+
+fn bench_ip() {
+    let dataset = ip_dataset();
+    print_set_stats(&dataset);
+
+    // Chunks
+    {
+        let mut data = vec![];
+        for dataset in dataset.chunks(500_000) {
+            serialize_u128(|| dataset.iter().cloned(), dataset.len() as u32, &mut data).unwrap();
+        }
+        let compression = data.len() as f64 / (dataset.len() * 16) as f64;
+        println!("Compression 50_000 chunks {:.4}", compression);
+        println!(
+            "Num Bits per elem {:.2}",
+            (data.len() * 8) as f32 / dataset.len() as f32
+        );
+    }
+
+    let mut data = vec![];
+    {
+        print_time!("creation");
+        serialize_u128(|| dataset.iter().cloned(), dataset.len() as u32, &mut data).unwrap();
+    }
+
+    let compression = data.len() as f64 / (dataset.len() * 16) as f64;
+    println!("Compression {:.2}", compression);
+    println!(
+        "Num Bits per elem {:.2}",
+        (data.len() * 8) as f32 / dataset.len() as f32
+    );
+
+    let decompressor = open_u128::<u128>(OwnedBytes::new(data)).unwrap();
+    // Sample some ranges
+    let mut doc_values = Vec::new();
+    for value in dataset.iter().take(1110).skip(1100).cloned() {
+        doc_values.clear();
+        print_time!("get range");
+        decompressor.get_docids_for_value_range(
+            value..=value,
+            0..decompressor.num_vals(),
+            &mut doc_values,
+        );
+        println!("{:?}", doc_values.len());
+    }
+}
+
 fn main() {
+    if env::args().nth(1).unwrap() == "bench_ip" {
+        bench_ip();
+        return;
+    }
+
    let mut table = Table::new();

    // Add a row per time
    table.add_row(row!["", "Compression Ratio", "Compression Estimation"]);

    for (data, data_set_name) in get_codec_test_data_sets() {
-        let mut results = vec![];
-        let res = serialize_with_codec::<LinearInterpolFastFieldSerializer>(&data);
-        results.push(res);
-        let res = serialize_with_codec::<MultiLinearInterpolFastFieldSerializer>(&data);
-        results.push(res);
-        let res = serialize_with_codec::<fastfield_codecs::bitpacked::BitpackedFastFieldSerializer>(
-            &data,
-        );
-        results.push(res);
-
-        // let best_estimation_codec = results
-        //.iter()
-        //.min_by(|res1, res2| res1.partial_cmp(&res2).unwrap())
-        //.unwrap();
+        let results: Vec<(f32, f32, FastFieldCodecType)> = [
+            serialize_with_codec(&data, FastFieldCodecType::Bitpacked),
+            serialize_with_codec(&data, FastFieldCodecType::Linear),
+            serialize_with_codec(&data, FastFieldCodecType::BlockwiseLinear),
+        ]
+        .into_iter()
+        .flatten()
+        .collect();
        let best_compression_ratio_codec = results
            .iter()
-            .min_by(|res1, res2| res1.partial_cmp(res2).unwrap())
+            .min_by(|&res1, &res2| res1.partial_cmp(res2).unwrap())
            .cloned()
            .unwrap();

        table.add_row(Row::new(vec![Cell::new(data_set_name).style_spec("Bbb")]));
-        for (is_applicable, est, comp, name) in results {
-            let (est_cell, ratio_cell) = if !is_applicable {
-                ("Codec Disabled".to_string(), "".to_string())
-            } else {
-                (est.to_string(), comp.to_string())
-            };
+        for (est, comp, codec_type) in results {
+            let est_cell = est.to_string();
+            let ratio_cell = comp.to_string();
            let style = if comp == best_compression_ratio_codec.1 {
                "Fb"
            } else {
                ""
            };
-
            table.add_row(Row::new(vec![
-                Cell::new(name).style_spec("bFg"),
+                Cell::new(&format!("{codec_type:?}")).style_spec("bFg"),
                Cell::new(&ratio_cell).style_spec(style),
                Cell::new(&est_cell).style_spec(""),
            ]));
@@ -91,34 +209,14 @@ pub fn get_codec_test_data_sets() -> Vec<(Vec<u64>, &'static str)> {
    data_and_names
 }

-pub fn serialize_with_codec<S: FastFieldCodecSerializer>(
+pub fn serialize_with_codec(
    data: &[u64],
-) -> (bool, f32, f32, &'static str) {
-    let is_applicable = S::is_applicable(&data, stats_from_vec(data));
-    if !is_applicable {
-        return (false, 0.0, 0.0, S::NAME);
-    }
-    let estimation = S::estimate(&data, stats_from_vec(data));
-    let mut out = vec![];
-    S::serialize(
-        &mut out,
-        &data,
-        stats_from_vec(data),
-        data.iter().cloned(),
-        data.iter().cloned(),
-    )
-    .unwrap();
-
-    let actual_compression = out.len() as f32 / (data.len() * 8) as f32;
-    (true, estimation, actual_compression, S::NAME)
-}
-
-pub fn stats_from_vec(data: &[u64]) -> FastFieldStats {
-    let min_value = data.iter().cloned().min().unwrap_or(0);
-    let max_value = data.iter().cloned().max().unwrap_or(0);
-    FastFieldStats {
-        min_value,
-        max_value,
-        num_vals: data.len() as u64,
-    }
+    codec_type: FastFieldCodecType,
+) -> Option<(f32, f32, FastFieldCodecType)> {
+    let col = VecColumn::from(data);
+    let estimation = fastfield_codecs::estimate(&col, codec_type)?;
+    let mut out = Vec::new();
+    fastfield_codecs::serialize(&col, &mut out, &[codec_type]).ok()?;
+    let actual_compression = out.len() as f32 / (col.num_vals() * 8) as f32;
+    Some((estimation, actual_compression, codec_type))
 }
--- a/fastfield_codecs/src/monotonic_mapping.rs
+++ b/fastfield_codecs/src/monotonic_mapping.rs
@@ -0,0 +1,267 @@
+use std::marker::PhantomData;
+
+use fastdivide::DividerU64;
+use ordered_float::NotNan;
+
+use crate::MonotonicallyMappableToU128;
+
+/// Monotonic maps a value to u64 value space.
+/// Monotonic mapping enables `PartialOrd` on u64 space without conversion to original space.
+pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy + Send + Sync {
+    /// Converts a value to u64.
+    ///
+    /// Internally all fast field values are encoded as u64.
+    fn to_u64(self) -> u64;
+
+    /// Converts a value from u64
+    ///
+    /// Internally all fast field values are encoded as u64.
+    /// **Note: To be used for converting encoded Term, Posting values.**
+    fn from_u64(val: u64) -> Self;
+}
+
+/// Values need to be strictly monotonic mapped to a `Internal` value (u64 or u128) that can be
+/// used in fast field codecs.
+///
+/// The monotonic mapping is required so that `PartialOrd` can be used on `Internal` without
+/// converting to `External`.
+///
+/// All strictly monotonic functions are invertible because they are guaranteed to have a one-to-one
+/// mapping from their range to their domain. The `inverse` method is required when opening a codec,
+/// so a value can be converted back to its original domain (e.g. ip address or f64) from its
+/// internal representation.
+pub trait StrictlyMonotonicFn<External, Internal> {
+    /// Strictly monotonically maps the value from External to Internal.
+    fn mapping(&self, inp: External) -> Internal;
+    /// Inverse of `mapping`. Maps the value from Internal to External.
+    fn inverse(&self, out: Internal) -> External;
+}
+
+/// Inverts a strictly monotonic mapping from `StrictlyMonotonicFn<A, B>` to
+/// `StrictlyMonotonicFn<B, A>`.
+///
+/// # Warning
+///
+/// This type comes with a footgun. A type being strictly monotonic does not impose that the inverse
+/// mapping is strictly monotonic over the entire space External. e.g. a -> a * 2. Use at your own
+/// risks.
+pub(crate) struct StrictlyMonotonicMappingInverter<T> {
+    orig_mapping: T,
+}
+impl<T> From<T> for StrictlyMonotonicMappingInverter<T> {
+    fn from(orig_mapping: T) -> Self {
+        Self { orig_mapping }
+    }
+}
+
+impl<From, To, T> StrictlyMonotonicFn<To, From> for StrictlyMonotonicMappingInverter<T>
+where T: StrictlyMonotonicFn<From, To>
+{
+    fn mapping(&self, val: To) -> From {
+        self.orig_mapping.inverse(val)
+    }
+
+    fn inverse(&self, val: From) -> To {
+        self.orig_mapping.mapping(val)
+    }
+}
+
+/// Applies the strictly monotonic mapping from `T` without any additional changes.
+pub(crate) struct StrictlyMonotonicMappingToInternal<T> {
+    _phantom: PhantomData<T>,
+}
+
+impl<T> StrictlyMonotonicMappingToInternal<T> {
+    pub(crate) fn new() -> StrictlyMonotonicMappingToInternal<T> {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<External: MonotonicallyMappableToU128, T: MonotonicallyMappableToU128>
+    StrictlyMonotonicFn<External, u128> for StrictlyMonotonicMappingToInternal<T>
+where T: MonotonicallyMappableToU128
+{
+    fn mapping(&self, inp: External) -> u128 {
+        External::to_u128(inp)
+    }
+
+    fn inverse(&self, out: u128) -> External {
+        External::from_u128(out)
+    }
+}
+
+impl<External: MonotonicallyMappableToU64, T: MonotonicallyMappableToU64>
+    StrictlyMonotonicFn<External, u64> for StrictlyMonotonicMappingToInternal<T>
+where T: MonotonicallyMappableToU64
+{
+    fn mapping(&self, inp: External) -> u64 {
+        External::to_u64(inp)
+    }
+
+    fn inverse(&self, out: u64) -> External {
+        External::from_u64(out)
+    }
+}
+
+/// Mapping dividing by  gcd and a base value.
+///
+/// The function is assumed to be only called on values divided by passed
+/// gcd value. (It is necessary for the function to be monotonic.)
+pub(crate) struct StrictlyMonotonicMappingToInternalGCDBaseval {
+    gcd_divider: DividerU64,
+    gcd: u64,
+    min_value: u64,
+}
+impl StrictlyMonotonicMappingToInternalGCDBaseval {
+    pub(crate) fn new(gcd: u64, min_value: u64) -> Self {
+        let gcd_divider = DividerU64::divide_by(gcd);
+        Self {
+            gcd_divider,
+            gcd,
+            min_value,
+        }
+    }
+}
+impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
+    for StrictlyMonotonicMappingToInternalGCDBaseval
+{
+    fn mapping(&self, inp: External) -> u64 {
+        self.gcd_divider
+            .divide(External::to_u64(inp) - self.min_value)
+    }
+
+    fn inverse(&self, out: u64) -> External {
+        External::from_u64(self.min_value + out * self.gcd)
+    }
+}
+
+/// Strictly monotonic mapping with a base value.
+pub(crate) struct StrictlyMonotonicMappingToInternalBaseval {
+    min_value: u64,
+}
+impl StrictlyMonotonicMappingToInternalBaseval {
+    pub(crate) fn new(min_value: u64) -> Self {
+        Self { min_value }
+    }
+}
+
+impl<External: MonotonicallyMappableToU64> StrictlyMonotonicFn<External, u64>
+    for StrictlyMonotonicMappingToInternalBaseval
+{
+    fn mapping(&self, val: External) -> u64 {
+        External::to_u64(val) - self.min_value
+    }
+
+    fn inverse(&self, val: u64) -> External {
+        External::from_u64(self.min_value + val)
+    }
+}
+
+impl MonotonicallyMappableToU64 for u64 {
+    fn to_u64(self) -> u64 {
+        self
+    }
+
+    fn from_u64(val: u64) -> Self {
+        val
+    }
+}
+
+impl MonotonicallyMappableToU64 for i64 {
+    #[inline(always)]
+    fn to_u64(self) -> u64 {
+        common::i64_to_u64(self)
+    }
+
+    #[inline(always)]
+    fn from_u64(val: u64) -> Self {
+        common::u64_to_i64(val)
+    }
+}
+
+impl MonotonicallyMappableToU64 for bool {
+    #[inline(always)]
+    fn to_u64(self) -> u64 {
+        u64::from(self)
+    }
+
+    #[inline(always)]
+    fn from_u64(val: u64) -> Self {
+        val > 0
+    }
+}
+
+// TODO remove me.
+// Tantivy should refuse NaN values and work with NotNaN internally.
+impl MonotonicallyMappableToU64 for f64 {
+    fn to_u64(self) -> u64 {
+        common::f64_to_u64(self)
+    }
+
+    fn from_u64(val: u64) -> Self {
+        common::u64_to_f64(val)
+    }
+}
+
+impl MonotonicallyMappableToU64 for ordered_float::NotNan<f64> {
+    fn to_u64(self) -> u64 {
+        common::f64_to_u64(self.into_inner())
+    }
+
+    fn from_u64(val: u64) -> Self {
+        NotNan::new(common::u64_to_f64(val)).expect("Invalid NotNaN f64 value.")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+
+    #[test]
+    fn test_from_u64_pos_inf() {
+        let inf_as_u64 = common::f64_to_u64(f64::INFINITY);
+        let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
+        assert_eq!(inf_back_to_f64, NotNan::new(f64::INFINITY).unwrap());
+    }
+
+    #[test]
+    fn test_from_u64_neg_inf() {
+        let inf_as_u64 = common::f64_to_u64(-f64::INFINITY);
+        let inf_back_to_f64 = NotNan::from_u64(inf_as_u64);
+        assert_eq!(inf_back_to_f64, NotNan::new(-f64::INFINITY).unwrap());
+    }
+
+    #[test]
+    #[should_panic(expected = "Invalid NotNaN")]
+    fn test_from_u64_nan_panics() {
+        let nan_as_u64 = common::f64_to_u64(f64::NAN);
+        NotNan::from_u64(nan_as_u64);
+    }
+
+    #[test]
+    fn strictly_monotonic_test() {
+        // identity mapping
+        test_round_trip(&StrictlyMonotonicMappingToInternal::<u64>::new(), 100u64);
+        // round trip to i64
+        test_round_trip(&StrictlyMonotonicMappingToInternal::<i64>::new(), 100u64);
+        // identity mapping
+        test_round_trip(&StrictlyMonotonicMappingToInternal::<u128>::new(), 100u128);
+
+        // base value to i64 round trip
+        let mapping = StrictlyMonotonicMappingToInternalBaseval::new(100);
+        test_round_trip::<_, _, u64>(&mapping, 100i64);
+        // base value and gcd to u64 round trip
+        let mapping = StrictlyMonotonicMappingToInternalGCDBaseval::new(10, 100);
+        test_round_trip::<_, _, u64>(&mapping, 100u64);
+    }
+
+    fn test_round_trip<T: StrictlyMonotonicFn<K, L>, K: std::fmt::Debug + Eq + Copy, L>(
+        mapping: &T,
+        test_val: K,
+    ) {
+        assert_eq!(mapping.inverse(mapping.mapping(test_val)), test_val);
+    }
+}
--- a/fastfield_codecs/src/monotonic_mapping_u128.rs
+++ b/fastfield_codecs/src/monotonic_mapping_u128.rs
@@ -0,0 +1,40 @@
+use std::net::Ipv6Addr;
+
+/// Montonic maps a value to u128 value space
+/// Monotonic mapping enables `PartialOrd` on u128 space without conversion to original space.
+pub trait MonotonicallyMappableToU128: 'static + PartialOrd + Copy + Send + Sync {
+    /// Converts a value to u128.
+    ///
+    /// Internally all fast field values are encoded as u64.
+    fn to_u128(self) -> u128;
+
+    /// Converts a value from u128
+    ///
+    /// Internally all fast field values are encoded as u64.
+    /// **Note: To be used for converting encoded Term, Posting values.**
+    fn from_u128(val: u128) -> Self;
+}
+
+impl MonotonicallyMappableToU128 for u128 {
+    fn to_u128(self) -> u128 {
+        self
+    }
+
+    fn from_u128(val: u128) -> Self {
+        val
+    }
+}
+
+impl MonotonicallyMappableToU128 for Ipv6Addr {
+    fn to_u128(self) -> u128 {
+        ip_to_u128(self)
+    }
+
+    fn from_u128(val: u128) -> Self {
+        Ipv6Addr::from(val.to_be_bytes())
+    }
+}
+
+fn ip_to_u128(ip_addr: Ipv6Addr) -> u128 {
+    u128::from_be_bytes(ip_addr.octets())
+}
--- a/fastfield_codecs/src/multilinearinterpol.rs
+++ b/fastfield_codecs/src/multilinearinterpol.rs
@@ -1,427 +0,0 @@
-//! MultiLinearInterpol compressor uses linear interpolation to guess a values and stores the
-//! offset, but in blocks of 512.
-//!
-//! With a CHUNK_SIZE of 512 and 29 byte metadata per block, we get a overhead for metadata of 232 /
-//! 512 = 0,45 bits per element. The additional space required per element in a block is the the
-//! maximum deviation of the linear interpolation estimation function.
-//!
-//! E.g. if the maximum deviation of an element is 12, all elements cost 4bits.
-//!
-//! Size per block:
-//! Num Elements * Maximum Deviation from Interpolation + 29 Byte Metadata
-
-use std::io::{self, Read, Write};
-use std::ops::Sub;
-
-use common::{BinarySerializable, CountingWriter, DeserializeFrom};
-use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
-
-use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
-
-const CHUNK_SIZE: u64 = 512;
-
-/// Depending on the field type, a different
-/// fast field is required.
-#[derive(Clone)]
-pub struct MultiLinearInterpolFastFieldReader {
-    pub footer: MultiLinearInterpolFooter,
-}
-
-#[derive(Clone, Debug, Default)]
-struct Function {
-    // The offset in the data is required, because we have diffrent bit_widths per block
-    data_start_offset: u64,
-    // start_pos in the block will be CHUNK_SIZE * BLOCK_NUM
-    start_pos: u64,
-    // only used during serialization, 0 after deserialization
-    end_pos: u64,
-    // only used during serialization, 0 after deserialization
-    value_start_pos: u64,
-    // only used during serialization, 0 after deserialization
-    value_end_pos: u64,
-    slope: f32,
-    // The offset so that all values are positive when writing them
-    positive_val_offset: u64,
-    num_bits: u8,
-    bit_unpacker: BitUnpacker,
-}
-
-impl Function {
-    fn calc_slope(&mut self) {
-        let num_vals = self.end_pos - self.start_pos;
-        self.slope = get_slope(self.value_start_pos, self.value_end_pos, num_vals);
-    }
-    // split the interpolation into two function, change self and return the second split
-    fn split(&mut self, split_pos: u64, split_pos_value: u64) -> Function {
-        let mut new_function = Function {
-            start_pos: split_pos,
-            end_pos: self.end_pos,
-            value_start_pos: split_pos_value,
-            value_end_pos: self.value_end_pos,
-            ..Default::default()
-        };
-        new_function.calc_slope();
-        self.end_pos = split_pos;
-        self.value_end_pos = split_pos_value;
-        self.calc_slope();
-        new_function
-    }
-}
-
-impl BinarySerializable for Function {
-    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
-        self.data_start_offset.serialize(write)?;
-        self.value_start_pos.serialize(write)?;
-        self.positive_val_offset.serialize(write)?;
-        self.slope.serialize(write)?;
-        self.num_bits.serialize(write)?;
-        Ok(())
-    }
-
-    fn deserialize<R: Read>(reader: &mut R) -> io::Result<Function> {
-        let data_start_offset = u64::deserialize(reader)?;
-        let value_start_pos = u64::deserialize(reader)?;
-        let offset = u64::deserialize(reader)?;
-        let slope = f32::deserialize(reader)?;
-        let num_bits = u8::deserialize(reader)?;
-        let interpolation = Function {
-            data_start_offset,
-            value_start_pos,
-            positive_val_offset: offset,
-            num_bits,
-            bit_unpacker: BitUnpacker::new(num_bits),
-            slope,
-            ..Default::default()
-        };
-
-        Ok(interpolation)
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct MultiLinearInterpolFooter {
-    pub num_vals: u64,
-    pub min_value: u64,
-    pub max_value: u64,
-    interpolations: Vec<Function>,
-}
-
-impl BinarySerializable for MultiLinearInterpolFooter {
-    fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
-        let mut out = vec![];
-        self.num_vals.serialize(&mut out)?;
-        self.min_value.serialize(&mut out)?;
-        self.max_value.serialize(&mut out)?;
-        self.interpolations.serialize(&mut out)?;
-        write.write_all(&out)?;
-        (out.len() as u32).serialize(write)?;
-        Ok(())
-    }
-
-    fn deserialize<R: Read>(reader: &mut R) -> io::Result<MultiLinearInterpolFooter> {
-        let mut footer = MultiLinearInterpolFooter {
-            num_vals: u64::deserialize(reader)?,
-            min_value: u64::deserialize(reader)?,
-            max_value: u64::deserialize(reader)?,
-            interpolations: Vec::<Function>::deserialize(reader)?,
-        };
-        for (num, interpol) in footer.interpolations.iter_mut().enumerate() {
-            interpol.start_pos = CHUNK_SIZE * num as u64;
-        }
-        Ok(footer)
-    }
-}
-
-#[inline]
-fn get_interpolation_position(doc: u64) -> usize {
-    let index = doc / CHUNK_SIZE;
-    index as usize
-}
-
-#[inline]
-fn get_interpolation_function(doc: u64, interpolations: &[Function]) -> &Function {
-    &interpolations[get_interpolation_position(doc)]
-}
-
-impl FastFieldCodecReader for MultiLinearInterpolFastFieldReader {
-    /// Opens a fast field given a file.
-    fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
-        let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
-
-        let (_data, mut footer) = bytes.split_at(bytes.len() - (4 + footer_len) as usize);
-        let footer = MultiLinearInterpolFooter::deserialize(&mut footer)?;
-
-        Ok(MultiLinearInterpolFastFieldReader { footer })
-    }
-
-    #[inline]
-    fn get_u64(&self, doc: u64, data: &[u8]) -> u64 {
-        let interpolation = get_interpolation_function(doc, &self.footer.interpolations);
-        let doc = doc - interpolation.start_pos;
-        let calculated_value =
-            get_calculated_value(interpolation.value_start_pos, doc, interpolation.slope);
-        let diff = interpolation
-            .bit_unpacker
-            .get(doc, &data[interpolation.data_start_offset as usize..]);
-        (calculated_value + diff) - interpolation.positive_val_offset
-    }
-
-    #[inline]
-    fn min_value(&self) -> u64 {
-        self.footer.min_value
-    }
-    #[inline]
-    fn max_value(&self) -> u64 {
-        self.footer.max_value
-    }
-}
-
-#[inline]
-fn get_slope(first_val: u64, last_val: u64, num_vals: u64) -> f32 {
-    ((last_val as f64 - first_val as f64) / (num_vals as u64 - 1) as f64) as f32
-}
-
-#[inline]
-fn get_calculated_value(first_val: u64, pos: u64, slope: f32) -> u64 {
-    (first_val as i64 + (pos as f32 * slope) as i64) as u64
-}
-
-/// Same as LinearInterpolFastFieldSerializer, but working on chunks of CHUNK_SIZE elements.
-pub struct MultiLinearInterpolFastFieldSerializer {}
-
-impl FastFieldCodecSerializer for MultiLinearInterpolFastFieldSerializer {
-    const NAME: &'static str = "MultiLinearInterpol";
-    const ID: u8 = 3;
-    /// Creates a new fast field serializer.
-    fn serialize(
-        write: &mut impl Write,
-        fastfield_accessor: &dyn FastFieldDataAccess,
-        stats: FastFieldStats,
-        data_iter: impl Iterator<Item = u64>,
-        _data_iter1: impl Iterator<Item = u64>,
-    ) -> io::Result<()> {
-        assert!(stats.min_value <= stats.max_value);
-
-        let first_val = fastfield_accessor.get_val(0);
-        let last_val = fastfield_accessor.get_val(stats.num_vals as u64 - 1);
-
-        let mut first_function = Function {
-            end_pos: stats.num_vals,
-            value_start_pos: first_val,
-            value_end_pos: last_val,
-            ..Default::default()
-        };
-        first_function.calc_slope();
-        let mut interpolations = vec![first_function];
-
-        // Since we potentially apply multiple passes over the data, the data is cached.
-        // Multiple iteration can be expensive (merge with index sorting can add lot of overhead per
-        // iteration)
-        let data = data_iter.collect::<Vec<_>>();
-
-        //// let's split this into chunks of CHUNK_SIZE
-        for data_pos in (0..data.len() as u64).step_by(CHUNK_SIZE as usize).skip(1) {
-            let new_fun = {
-                let current_interpolation = interpolations.last_mut().unwrap();
-                current_interpolation.split(data_pos, data[data_pos as usize])
-            };
-            interpolations.push(new_fun);
-        }
-        // calculate offset and max (-> numbits) for each function
-        for interpolation in &mut interpolations {
-            let mut offset = 0;
-            let mut rel_positive_max = 0;
-            for (pos, actual_value) in data
-                [interpolation.start_pos as usize..interpolation.end_pos as usize]
-                .iter()
-                .cloned()
-                .enumerate()
-            {
-                let calculated_value = get_calculated_value(
-                    interpolation.value_start_pos,
-                    pos as u64,
-                    interpolation.slope,
-                );
-                if calculated_value > actual_value {
-                    // negative value we need to apply an offset
-                    // we ignore negative values in the max value calculation, because negative
-                    // values will be offset to 0
-                    offset = offset.max(calculated_value - actual_value);
-                } else {
-                    // positive value no offset reuqired
-                    rel_positive_max = rel_positive_max.max(actual_value - calculated_value);
-                }
-            }
-
-            interpolation.positive_val_offset = offset;
-            interpolation.num_bits = compute_num_bits(rel_positive_max + offset);
-        }
-        let mut bit_packer = BitPacker::new();
-
-        let write = &mut CountingWriter::wrap(write);
-        for interpolation in &mut interpolations {
-            interpolation.data_start_offset = write.written_bytes();
-            let num_bits = interpolation.num_bits;
-            for (pos, actual_value) in data
-                [interpolation.start_pos as usize..interpolation.end_pos as usize]
-                .iter()
-                .cloned()
-                .enumerate()
-            {
-                let calculated_value = get_calculated_value(
-                    interpolation.value_start_pos,
-                    pos as u64,
-                    interpolation.slope,
-                );
-                let diff = (actual_value + interpolation.positive_val_offset) - calculated_value;
-                bit_packer.write(diff, num_bits, write)?;
-            }
-            bit_packer.flush(write)?;
-        }
-        bit_packer.close(write)?;
-
-        let footer = MultiLinearInterpolFooter {
-            num_vals: stats.num_vals,
-            min_value: stats.min_value,
-            max_value: stats.max_value,
-            interpolations,
-        };
-        footer.serialize(write)?;
-        Ok(())
-    }
-
-    fn is_applicable(
-        _fastfield_accessor: &impl FastFieldDataAccess,
-        stats: FastFieldStats,
-    ) -> bool {
-        if stats.num_vals < 5_000 {
-            return false;
-        }
-        // On serialization the offset is added to the actual value.
-        // We need to make sure this won't run into overflow calculation issues.
-        // For this we take the maximum theroretical offset and add this to the max value.
-        // If this doesn't overflow the algortihm should be fine
-        let theorethical_maximum_offset = stats.max_value - stats.min_value;
-        if stats
-            .max_value
-            .checked_add(theorethical_maximum_offset)
-            .is_none()
-        {
-            return false;
-        }
-        true
-    }
-    /// estimation for linear interpolation is hard because, you don't know
-    /// where the local maxima are for the deviation of the calculated value and
-    /// the offset is also unknown.
-    fn estimate(fastfield_accessor: &impl FastFieldDataAccess, stats: FastFieldStats) -> f32 {
-        let first_val_in_first_block = fastfield_accessor.get_val(0);
-        let last_elem_in_first_chunk = CHUNK_SIZE.min(stats.num_vals);
-        let last_val_in_first_block =
-            fastfield_accessor.get_val(last_elem_in_first_chunk as u64 - 1);
-        let slope = get_slope(
-            first_val_in_first_block,
-            last_val_in_first_block,
-            stats.num_vals,
-        );
-
-        // let's sample at 0%, 5%, 10% .. 95%, 100%, but for the first block only
-        let sample_positions = (0..20)
-            .map(|pos| (last_elem_in_first_chunk as f32 / 100.0 * pos as f32 * 5.0) as usize)
-            .collect::<Vec<_>>();
-
-        let max_distance = sample_positions
-            .iter()
-            .map(|pos| {
-                let calculated_value =
-                    get_calculated_value(first_val_in_first_block, *pos as u64, slope);
-                let actual_value = fastfield_accessor.get_val(*pos as u64);
-                distance(calculated_value, actual_value)
-            })
-            .max()
-            .unwrap();
-
-        // Estimate one block and extrapolate the cost to all blocks.
-        // the theory would be that we don't have the actual max_distance, but we are close within
-        // 50% threshold.
-        // It is multiplied by 2 because in a log case scenario the line would be as much above as
-        // below. So the offset would = max_distance
-        //
-        let relative_max_value = (max_distance as f32 * 1.5) * 2.0;
-
-        let num_bits = compute_num_bits(relative_max_value as u64) as u64 * stats.num_vals as u64
-            // function metadata per block
-            + 29 * (stats.num_vals / CHUNK_SIZE);
-        let num_bits_uncompressed = 64 * stats.num_vals;
-        num_bits as f32 / num_bits_uncompressed as f32
-    }
-}
-
-fn distance<T: Sub<Output = T> + Ord>(x: T, y: T) -> T {
-    if x < y {
-        y - x
-    } else {
-        x - y
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::tests::get_codec_test_data_sets;
-
-    fn create_and_validate(data: &[u64], name: &str) -> (f32, f32) {
-        crate::tests::create_and_validate::<
-            MultiLinearInterpolFastFieldSerializer,
-            MultiLinearInterpolFastFieldReader,
-        >(data, name)
-    }
-
-    #[test]
-    fn test_compression() {
-        let data = (10..=6_000_u64).collect::<Vec<_>>();
-        let (estimate, actual_compression) =
-            create_and_validate(&data, "simple monotonically large");
-        assert!(actual_compression < 0.2);
-        assert!(estimate < 0.20);
-        assert!(estimate > 0.15);
-        assert!(actual_compression > 0.01);
-    }
-
-    #[test]
-    fn test_with_codec_data_sets() {
-        let data_sets = get_codec_test_data_sets();
-        for (mut data, name) in data_sets {
-            create_and_validate(&data, name);
-            data.reverse();
-            create_and_validate(&data, name);
-        }
-    }
-    #[test]
-    fn test_simple() {
-        let data = (10..=20_u64).collect::<Vec<_>>();
-        create_and_validate(&data, "simple monotonically");
-    }
-
-    #[test]
-    fn border_cases_1() {
-        let data = (0..1024).collect::<Vec<_>>();
-        create_and_validate(&data, "border case");
-    }
-    #[test]
-    fn border_case_2() {
-        let data = (0..1025).collect::<Vec<_>>();
-        create_and_validate(&data, "border case");
-    }
-    #[test]
-    fn rand() {
-        for _ in 0..10 {
-            let mut data = (5_000..20_000)
-                .map(|_| rand::random::<u32>() as u64)
-                .collect::<Vec<_>>();
-            let _ = create_and_validate(&data, "random");
-            data.reverse();
-            create_and_validate(&data, "random");
-        }
-    }
-}
--- a/fastfield_codecs/src/null_index/dense.rs
+++ b/fastfield_codecs/src/null_index/dense.rs
@@ -0,0 +1,454 @@
+use std::convert::TryInto;
+use std::io::{self, Write};
+
+use common::BinarySerializable;
+use itertools::Itertools;
+use ownedbytes::OwnedBytes;
+
+use super::{get_bit_at, set_bit_at};
+
+/// For the `DenseCodec`, `data` which contains the encoded blocks.
+/// Each block consists of [u8; 12]. The first 8 bytes is a bitvec for 64 elements.
+/// The last 4 bytes are the offset, the number of set bits so far.
+///
+/// When translating the original index to a dense index, the correct block can be computed
+/// directly `orig_idx/64`. Inside the block the position is `orig_idx%64`.
+///
+/// When translating a dense index to the original index, we can use the offset to find the correct
+/// block. Direct computation is not possible, but we can employ a linear or binary search.
+#[derive(Clone)]
+pub struct DenseCodec {
+    // data consists of blocks of 64 bits.
+    //
+    // The format is &[(u64, u32)]
+    // u64 is the bitvec
+    // u32 is the offset of the block, the number of set bits so far.
+    //
+    // At the end one block is appended, to store the number of values in the index in offset.
+    data: OwnedBytes,
+}
+const ELEMENTS_PER_BLOCK: u32 = 64;
+const BLOCK_BITVEC_SIZE: usize = 8;
+const BLOCK_OFFSET_SIZE: usize = 4;
+const SERIALIZED_BLOCK_SIZE: usize = BLOCK_BITVEC_SIZE + BLOCK_OFFSET_SIZE;
+
+#[inline]
+fn count_ones(bitvec: u64, pos_in_bitvec: u32) -> u32 {
+    if pos_in_bitvec == 63 {
+        bitvec.count_ones()
+    } else {
+        let mask = (1u64 << (pos_in_bitvec + 1)) - 1;
+        let masked_bitvec = bitvec & mask;
+        masked_bitvec.count_ones()
+    }
+}
+
+#[derive(Clone, Copy)]
+struct DenseIndexBlock {
+    bitvec: u64,
+    offset: u32,
+}
+
+impl From<[u8; SERIALIZED_BLOCK_SIZE]> for DenseIndexBlock {
+    fn from(data: [u8; SERIALIZED_BLOCK_SIZE]) -> Self {
+        let bitvec = u64::from_le_bytes(data[..BLOCK_BITVEC_SIZE].try_into().unwrap());
+        let offset = u32::from_le_bytes(data[BLOCK_BITVEC_SIZE..].try_into().unwrap());
+        Self { bitvec, offset }
+    }
+}
+
+impl DenseCodec {
+    /// Open the DenseCodec from OwnedBytes
+    pub fn open(data: OwnedBytes) -> Self {
+        Self { data }
+    }
+    #[inline]
+    /// Check if value at position is not null.
+    pub fn exists(&self, idx: u32) -> bool {
+        let block_pos = idx / ELEMENTS_PER_BLOCK;
+        let bitvec = self.dense_index_block(block_pos).bitvec;
+
+        let pos_in_bitvec = idx % ELEMENTS_PER_BLOCK;
+
+        get_bit_at(bitvec, pos_in_bitvec)
+    }
+    #[inline]
+    fn dense_index_block(&self, block_pos: u32) -> DenseIndexBlock {
+        dense_index_block(&self.data, block_pos)
+    }
+
+    /// Return the number of non-null values in an index
+    pub fn num_non_nulls(&self) -> u32 {
+        let last_block = (self.data.len() / SERIALIZED_BLOCK_SIZE) - 1;
+        self.dense_index_block(last_block as u32).offset
+    }
+
+    #[inline]
+    /// Translate from the original index to the codec index.
+    pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> {
+        let block_pos = idx / ELEMENTS_PER_BLOCK;
+        let index_block = self.dense_index_block(block_pos);
+        let pos_in_block_bit_vec = idx % ELEMENTS_PER_BLOCK;
+        let ones_in_block = count_ones(index_block.bitvec, pos_in_block_bit_vec);
+        if get_bit_at(index_block.bitvec, pos_in_block_bit_vec) {
+            // -1 is ok, since idx does exist, so there's at least one
+            Some(index_block.offset + ones_in_block - 1)
+        } else {
+            None
+        }
+    }
+
+    /// Translate positions from the codec index to the original index.
+    ///
+    /// # Panics
+    ///
+    /// May panic if any `idx` is greater than the max codec index.
+    pub fn translate_codec_idx_to_original_idx<'a>(
+        &'a self,
+        iter: impl Iterator<Item = u32> + 'a,
+    ) -> impl Iterator<Item = u32> + 'a {
+        let mut block_pos = 0u32;
+        iter.map(move |dense_idx| {
+            // update block_pos to limit search scope
+            block_pos = find_block(dense_idx, block_pos, &self.data);
+            let index_block = self.dense_index_block(block_pos);
+
+            // The next offset is higher than dense_idx and therefore:
+            // dense_idx <= offset + num_set_bits in block
+            let mut num_set_bits = 0;
+            for idx_in_bitvec in 0..ELEMENTS_PER_BLOCK {
+                if get_bit_at(index_block.bitvec, idx_in_bitvec) {
+                    num_set_bits += 1;
+                }
+                if num_set_bits == (dense_idx - index_block.offset + 1) {
+                    let orig_idx = block_pos * ELEMENTS_PER_BLOCK + idx_in_bitvec;
+                    return orig_idx;
+                }
+            }
+            panic!("Internal Error: Offset calculation in dense idx seems to be wrong.");
+        })
+    }
+}
+
+#[inline]
+fn dense_index_block(data: &[u8], block_pos: u32) -> DenseIndexBlock {
+    let data_start_pos = block_pos as usize * SERIALIZED_BLOCK_SIZE;
+    let block_data: [u8; SERIALIZED_BLOCK_SIZE] = data[data_start_pos..][..SERIALIZED_BLOCK_SIZE]
+        .try_into()
+        .unwrap();
+    block_data.into()
+}
+
+#[inline]
+/// Finds the block position containing the dense_idx.
+///
+/// # Correctness
+/// dense_idx needs to be smaller than the number of values in the index
+///
+/// The last offset number is equal to the number of values in the index.
+fn find_block(dense_idx: u32, mut block_pos: u32, data: &[u8]) -> u32 {
+    loop {
+        let offset = dense_index_block(data, block_pos).offset;
+        if offset > dense_idx {
+            return block_pos - 1;
+        }
+        block_pos += 1;
+    }
+}
+
+/// Iterator over all values, true if set, otherwise false
+pub fn serialize_dense_codec(
+    iter: impl Iterator<Item = bool>,
+    mut out: impl Write,
+) -> io::Result<()> {
+    let mut offset: u32 = 0;
+
+    for chunk in &iter.chunks(ELEMENTS_PER_BLOCK as usize) {
+        let mut block: u64 = 0;
+        for (pos, is_bit_set) in chunk.enumerate() {
+            if is_bit_set {
+                set_bit_at(&mut block, pos as u64);
+            }
+        }
+
+        block.serialize(&mut out)?;
+        offset.serialize(&mut out)?;
+
+        offset += block.count_ones();
+    }
+    // Add sentinal block for the offset
+    let block: u64 = 0;
+    block.serialize(&mut out)?;
+    offset.serialize(&mut out)?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use proptest::prelude::{any, prop, *};
+    use proptest::strategy::Strategy;
+    use proptest::{prop_oneof, proptest};
+
+    use super::*;
+
+    fn random_bitvec() -> BoxedStrategy<Vec<bool>> {
+        prop_oneof![
+            1 => prop::collection::vec(proptest::bool::weighted(1.0), 0..100),
+            1 => prop::collection::vec(proptest::bool::weighted(1.0), 0..64),
+            1 => prop::collection::vec(proptest::bool::weighted(0.0), 0..100),
+            1 => prop::collection::vec(proptest::bool::weighted(0.0), 0..64),
+            8 => vec![any::<bool>()],
+            2 => prop::collection::vec(any::<bool>(), 0..50),
+        ]
+        .boxed()
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(500))]
+        #[test]
+        fn test_with_random_bitvecs(bitvec1 in random_bitvec(), bitvec2 in random_bitvec(), bitvec3 in random_bitvec()) {
+            let mut bitvec = Vec::new();
+            bitvec.extend_from_slice(&bitvec1);
+            bitvec.extend_from_slice(&bitvec2);
+            bitvec.extend_from_slice(&bitvec3);
+            test_null_index(bitvec);
+        }
+    }
+
+    #[test]
+    fn dense_codec_test_one_block_false() {
+        let mut iter = vec![false; 64];
+        iter.push(true);
+        test_null_index(iter);
+    }
+
+    fn test_null_index(data: Vec<bool>) {
+        let mut out = vec![];
+
+        serialize_dense_codec(data.iter().cloned(), &mut out).unwrap();
+        let null_index = DenseCodec::open(OwnedBytes::new(out));
+
+        let orig_idx_with_value: Vec<u32> = data
+            .iter()
+            .enumerate()
+            .filter(|(_pos, val)| **val)
+            .map(|(pos, _val)| pos as u32)
+            .collect();
+
+        assert_eq!(
+            null_index
+                .translate_codec_idx_to_original_idx(0..orig_idx_with_value.len() as u32)
+                .collect_vec(),
+            orig_idx_with_value
+        );
+
+        for (dense_idx, orig_idx) in orig_idx_with_value.iter().enumerate() {
+            assert_eq!(
+                null_index.translate_to_codec_idx(*orig_idx),
+                Some(dense_idx as u32)
+            );
+        }
+
+        for (pos, value) in data.iter().enumerate() {
+            assert_eq!(null_index.exists(pos as u32), *value);
+        }
+    }
+
+    #[test]
+    fn dense_codec_test_translation() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_dense_codec(iter, &mut out).unwrap();
+        let null_index = DenseCodec::open(OwnedBytes::new(out));
+
+        assert_eq!(
+            null_index
+                .translate_codec_idx_to_original_idx(0..2)
+                .collect_vec(),
+            vec![0, 2]
+        );
+    }
+
+    #[test]
+    fn dense_codec_translate() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_dense_codec(iter, &mut out).unwrap();
+        let null_index = DenseCodec::open(OwnedBytes::new(out));
+        assert_eq!(null_index.translate_to_codec_idx(0), Some(0));
+        assert_eq!(null_index.translate_to_codec_idx(2), Some(1));
+    }
+
+    #[test]
+    fn dense_codec_test_small() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_dense_codec(iter, &mut out).unwrap();
+        let null_index = DenseCodec::open(OwnedBytes::new(out));
+        assert!(null_index.exists(0));
+        assert!(!null_index.exists(1));
+        assert!(null_index.exists(2));
+        assert!(!null_index.exists(3));
+    }
+
+    #[test]
+    fn dense_codec_test_large() {
+        let mut docs = vec![];
+        docs.extend((0..1000).map(|_idx| false));
+        docs.extend((0..=1000).map(|_idx| true));
+
+        let iter = docs.iter().cloned();
+        let mut out = vec![];
+        serialize_dense_codec(iter, &mut out).unwrap();
+        let null_index = DenseCodec::open(OwnedBytes::new(out));
+        assert!(!null_index.exists(0));
+        assert!(!null_index.exists(100));
+        assert!(!null_index.exists(999));
+        assert!(null_index.exists(1000));
+        assert!(null_index.exists(1999));
+        assert!(null_index.exists(2000));
+        assert!(!null_index.exists(2001));
+    }
+
+    #[test]
+    fn test_count_ones() {
+        let mut block = 0;
+        set_bit_at(&mut block, 0);
+        set_bit_at(&mut block, 2);
+
+        assert_eq!(count_ones(block, 0), 1);
+        assert_eq!(count_ones(block, 1), 1);
+        assert_eq!(count_ones(block, 2), 2);
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use test::Bencher;
+
+    use super::*;
+
+    const TOTAL_NUM_VALUES: u32 = 1_000_000;
+    fn gen_bools(fill_ratio: f64) -> DenseCodec {
+        let mut out = Vec::new();
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        let bools: Vec<_> = (0..TOTAL_NUM_VALUES)
+            .map(|_| rng.gen_bool(fill_ratio))
+            .collect();
+        serialize_dense_codec(bools.into_iter(), &mut out).unwrap();
+
+        let codec = DenseCodec::open(OwnedBytes::new(out));
+        codec
+    }
+
+    fn random_range_iterator(start: u32, end: u32, step_size: u32) -> impl Iterator<Item = u32> {
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        let mut current = start;
+        std::iter::from_fn(move || {
+            current += rng.gen_range(1..step_size + 1);
+            if current >= end {
+                None
+            } else {
+                Some(current)
+            }
+        })
+    }
+
+    fn walk_over_data(codec: &DenseCodec, max_step_size: u32) -> Option<u32> {
+        walk_over_data_from_positions(
+            codec,
+            random_range_iterator(0, TOTAL_NUM_VALUES, max_step_size),
+        )
+    }
+
+    fn walk_over_data_from_positions(
+        codec: &DenseCodec,
+        positions: impl Iterator<Item = u32>,
+    ) -> Option<u32> {
+        let mut dense_idx: Option<u32> = None;
+        for idx in positions {
+            dense_idx = dense_idx.or(codec.translate_to_codec_idx(idx));
+        }
+        dense_idx
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_orig_to_codec_90percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_orig_to_codec_50percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.5f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_orig_to_codec_full_scan_10percent(bench: &mut Bencher) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_orig_to_codec_full_scan_90percent(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_orig_to_codec_10percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_codec_to_orig_90percent_filled_random_stride_big_step(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.9f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 50_000))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_codec_to_orig_90percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.9f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 100))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_dense_codec_translate_codec_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(0..num_vals)
+                .last()
+        });
+    }
+}
--- a/fastfield_codecs/src/null_index/mod.rs
+++ b/fastfield_codecs/src/null_index/mod.rs
@@ -0,0 +1,14 @@
+pub use dense::{serialize_dense_codec, DenseCodec};
+
+mod dense;
+mod sparse;
+
+#[inline]
+fn get_bit_at(input: u64, n: u32) -> bool {
+    input & (1 << n) != 0
+}
+
+#[inline]
+fn set_bit_at(input: &mut u64, n: u64) {
+    *input |= 1 << n;
+}
--- a/fastfield_codecs/src/null_index/sparse.rs
+++ b/fastfield_codecs/src/null_index/sparse.rs
@@ -0,0 +1,752 @@
+use std::io::{self, Write};
+
+use common::BitSet;
+use ownedbytes::OwnedBytes;
+
+use super::{serialize_dense_codec, DenseCodec};
+
+/// `SparseCodec` is the codec for data, when only few documents have values.
+/// In contrast to `DenseCodec` opening a `SparseCodec` causes runtime data to be produced, for
+/// faster access.
+///
+/// The lower 16 bits of doc ids are stored as u16 while the upper 16 bits are given by the block
+/// id. Each block contains 1<<16 docids.
+///
+/// # Serialized Data Layout
+/// The data starts with the block data. Each block is either dense or sparse encoded, depending on
+/// the number of values in the block. A block is sparse when it contains less than
+/// DENSE_BLOCK_THRESHOLD (6144) values.
+/// [Sparse data block | dense data block, .. #repeat*; Desc: Either a sparse or dense encoded
+/// block]
+/// ### Sparse block data
+/// [u16 LE, .. #repeat*; Desc: Positions with values in a block]
+/// ### Dense block data
+/// [Dense codec for the whole block; Desc: Similar to a bitvec(0..ELEMENTS_PER_BLOCK) + Metadata
+/// for faster lookups. See dense.rs]
+///
+/// The data is followed by block metadata, to know which area of the raw block data belongs to
+/// which block. Only metadata for blocks with elements is recorded to
+/// keep the overhead low for scenarios with many very sparse columns. The block metadata consists
+/// of the block index and the number of values in the block. Since we don't store empty blocks
+/// num_vals is incremented by 1, e.g. 0 means 1 value.
+///
+/// The last u16 is storing the number of metadata blocks.
+/// [u16 LE, .. #repeat*; Desc: Positions with values in a block][(u16 LE, u16 LE), .. #repeat*;
+/// Desc: (Block Id u16, Num Elements u16)][u16 LE; Desc: num blocks with values u16]
+///
+/// # Opening
+/// When opening the data layout, the data is expanded to `Vec<SparseCodecBlockVariant>`, where the
+/// index is the block index. For each block `byte_start` and `offset` is computed.
+pub struct SparseCodec {
+    data: OwnedBytes,
+    blocks: Vec<SparseCodecBlockVariant>,
+}
+
+/// The threshold for for number of elements after which we switch to dense block encoding
+const DENSE_BLOCK_THRESHOLD: u32 = 6144;
+
+const ELEMENTS_PER_BLOCK: u32 = u16::MAX as u32 + 1;
+
+/// 1.5 bit per Element + 12 bytes for the sentinal block
+const NUM_BYTES_DENSE_BLOCK: u32 = (ELEMENTS_PER_BLOCK + ELEMENTS_PER_BLOCK / 2 + 64 + 32) / 8;
+
+#[derive(Clone)]
+enum SparseCodecBlockVariant {
+    Empty { offset: u32 },
+    Dense(DenseBlock),
+    Sparse(SparseBlock),
+}
+
+impl SparseCodecBlockVariant {
+    /// The number of non-null values that preceeded that block.
+    fn offset(&self) -> u32 {
+        match self {
+            SparseCodecBlockVariant::Empty { offset } => *offset,
+            SparseCodecBlockVariant::Dense(dense) => dense.offset,
+            SparseCodecBlockVariant::Sparse(sparse) => sparse.offset,
+        }
+    }
+}
+
+/// A block consists of max u16 values
+#[derive(Clone)]
+struct DenseBlock {
+    /// The number of values set before the block
+    offset: u32,
+    /// The data for the dense encoding
+    codec: DenseCodec,
+}
+
+impl DenseBlock {
+    pub fn exists(&self, idx: u32) -> bool {
+        self.codec.exists(idx)
+    }
+    pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> {
+        self.codec.translate_to_codec_idx(idx)
+    }
+    pub fn translate_codec_idx_to_original_idx(&self, idx: u32) -> u32 {
+        self.codec
+            .translate_codec_idx_to_original_idx(idx..=idx)
+            .next()
+            .unwrap()
+    }
+}
+
+/// A block consists of max u16 values
+#[derive(Debug, Copy, Clone)]
+struct SparseBlock {
+    /// The number of values in the block
+    num_vals: u32,
+    /// The number of values set before the block
+    offset: u32,
+    /// The start position of the data for the block
+    byte_start: u32,
+}
+
+impl SparseBlock {
+    fn empty_block(offset: u32) -> Self {
+        Self {
+            num_vals: 0,
+            byte_start: 0,
+            offset,
+        }
+    }
+
+    #[inline]
+    fn value_at_idx(&self, data: &[u8], idx: u16) -> u16 {
+        let start_offset: usize = self.byte_start as usize + (idx as u32 as usize * 2);
+        get_u16(data, start_offset)
+    }
+
+    #[inline]
+    #[allow(clippy::comparison_chain)]
+    // Looks for the element in the block. Returns the positions if found.
+    fn binary_search(&self, data: &[u8], target: u16) -> Option<u16> {
+        let mut size = self.num_vals as u16;
+        let mut left = 0;
+        let mut right = size;
+        // TODO try different implem.
+        //  e.g. exponential search into binary search
+        while left < right {
+            let mid = left + size / 2;
+
+            // TODO do boundary check only once, and then use an
+            // unsafe `value_at_idx`
+            let mid_val = self.value_at_idx(data, mid);
+
+            if target > mid_val {
+                left = mid + 1;
+            } else if target < mid_val {
+                right = mid;
+            } else {
+                return Some(mid);
+            }
+
+            size = right - left;
+        }
+        None
+    }
+}
+
+#[inline]
+fn get_u16(data: &[u8], byte_position: usize) -> u16 {
+    let bytes: [u8; 2] = data[byte_position..byte_position + 2].try_into().unwrap();
+    u16::from_le_bytes(bytes)
+}
+
+const SERIALIZED_BLOCK_METADATA_SIZE: usize = 4;
+
+fn deserialize_sparse_codec_block(data: &OwnedBytes) -> Vec<SparseCodecBlockVariant> {
+    // The number of vals so far
+    let mut offset = 0;
+    let mut sparse_codec_blocks = Vec::new();
+    let num_blocks = get_u16(data, data.len() - 2);
+    let block_data_index_start =
+        data.len() - 2 - num_blocks as usize * SERIALIZED_BLOCK_METADATA_SIZE;
+    let mut byte_start = 0;
+    for block_num in 0..num_blocks as usize {
+        let block_data_index = block_data_index_start + SERIALIZED_BLOCK_METADATA_SIZE * block_num;
+        let block_idx = get_u16(data, block_data_index);
+        let num_vals = get_u16(data, block_data_index + 2) as u32 + 1;
+        sparse_codec_blocks.resize(
+            block_idx as usize,
+            SparseCodecBlockVariant::Empty { offset },
+        );
+
+        if is_sparse(num_vals) {
+            let block = SparseBlock {
+                num_vals,
+                offset,
+                byte_start,
+            };
+            sparse_codec_blocks.push(SparseCodecBlockVariant::Sparse(block));
+            byte_start += 2 * num_vals;
+        } else {
+            let block = DenseBlock {
+                offset,
+                codec: DenseCodec::open(data.slice(byte_start as usize..data.len()).clone()),
+            };
+            sparse_codec_blocks.push(SparseCodecBlockVariant::Dense(block));
+            // Dense blocks have a fixed size spanning ELEMENTS_PER_BLOCK.
+            byte_start += NUM_BYTES_DENSE_BLOCK;
+        }
+
+        offset += num_vals;
+    }
+    sparse_codec_blocks.push(SparseCodecBlockVariant::Empty { offset });
+    sparse_codec_blocks
+}
+
+/// Splits a value address into lower and upper 16bits.
+/// The lower 16 bits are the value in the block
+/// The upper 16 bits are the block index
+#[derive(Debug, Clone, Copy)]
+struct ValueAddr {
+    block_idx: u16,
+    value_in_block: u16,
+}
+
+/// Splits a idx into block index and value in the block
+fn value_addr(idx: u32) -> ValueAddr {
+    /// Static assert number elements per block this method expects
+    #[allow(clippy::assertions_on_constants)]
+    const _: () = assert!(ELEMENTS_PER_BLOCK == (1 << 16));
+
+    let value_in_block = idx as u16;
+    let block_idx = (idx >> 16) as u16;
+    ValueAddr {
+        block_idx,
+        value_in_block,
+    }
+}
+
+impl SparseCodec {
+    /// Open the SparseCodec from OwnedBytes
+    pub fn open(data: OwnedBytes) -> Self {
+        let blocks = deserialize_sparse_codec_block(&data);
+        Self { data, blocks }
+    }
+
+    #[inline]
+    /// Check if value at position is not null.
+    pub fn exists(&self, idx: u32) -> bool {
+        let value_addr = value_addr(idx);
+        // There may be trailing nulls without data, those are not stored as blocks. It would be
+        // possible to create empty blocks, but for that we would need to serialize the number of
+        // values or pass them when opening
+
+        if let Some(block) = self.blocks.get(value_addr.block_idx as usize) {
+            match block {
+                SparseCodecBlockVariant::Empty { offset: _ } => false,
+                SparseCodecBlockVariant::Dense(block) => {
+                    block.exists(value_addr.value_in_block as u32)
+                }
+                SparseCodecBlockVariant::Sparse(block) => block
+                    .binary_search(&self.data, value_addr.value_in_block)
+                    .is_some(),
+            }
+        } else {
+            false
+        }
+    }
+
+    /// Return the number of non-null values in an index
+    pub fn num_non_nulls(&self) -> u32 {
+        self.blocks.last().map(|block| block.offset()).unwrap_or(0)
+    }
+
+    #[inline]
+    /// Translate from the original index to the codec index.
+    pub fn translate_to_codec_idx(&self, idx: u32) -> Option<u32> {
+        let value_addr = value_addr(idx);
+        let block = self.blocks.get(value_addr.block_idx as usize)?;
+
+        match block {
+            SparseCodecBlockVariant::Empty { offset: _ } => None,
+            SparseCodecBlockVariant::Dense(block) => block
+                .translate_to_codec_idx(value_addr.value_in_block as u32)
+                .map(|pos_in_block| pos_in_block + block.offset),
+            SparseCodecBlockVariant::Sparse(block) => {
+                let pos_in_block = block.binary_search(&self.data, value_addr.value_in_block);
+                pos_in_block.map(|pos_in_block: u16| block.offset + pos_in_block as u32)
+            }
+        }
+    }
+
+    fn find_block(&self, dense_idx: u32, mut block_pos: u32) -> u32 {
+        loop {
+            let offset = self.blocks[block_pos as usize].offset();
+            if offset > dense_idx {
+                return block_pos - 1;
+            }
+            block_pos += 1;
+        }
+    }
+
+    /// Translate positions from the codec index to the original index.
+    ///
+    /// # Panics
+    ///
+    /// May panic if any `idx` is greater than the max codec index.
+    pub fn translate_codec_idx_to_original_idx<'a>(
+        &'a self,
+        iter: impl Iterator<Item = u32> + 'a,
+    ) -> impl Iterator<Item = u32> + 'a {
+        // TODO: There's a big potential performance gain, by using iterators per block instead of
+        // random access for each element in a block
+        // group_by itertools won't help though, since it requires a temporary local variable
+        let mut block_pos = 0u32;
+        iter.map(move |codec_idx| {
+            // update block_pos to limit search scope
+            block_pos = self.find_block(codec_idx, block_pos);
+            let block_doc_idx_start = block_pos * ELEMENTS_PER_BLOCK;
+            let block = &self.blocks[block_pos as usize];
+            let idx_in_block = codec_idx - block.offset();
+            match block {
+                SparseCodecBlockVariant::Empty { offset: _ } => {
+                    panic!(
+                        "invalid input, cannot translate to original index. associated empty \
+                         block with dense idx. block_pos {}, idx_in_block {}",
+                        block_pos, idx_in_block
+                    )
+                }
+                SparseCodecBlockVariant::Dense(dense) => {
+                    dense.translate_codec_idx_to_original_idx(idx_in_block) + block_doc_idx_start
+                }
+                SparseCodecBlockVariant::Sparse(block) => {
+                    block.value_at_idx(&self.data, idx_in_block as u16) as u32 + block_doc_idx_start
+                }
+            }
+        })
+    }
+}
+
+fn is_sparse(num_elem_in_block: u32) -> bool {
+    num_elem_in_block < DENSE_BLOCK_THRESHOLD
+}
+
+#[derive(Default)]
+struct BlockDataSerialized {
+    block_idx: u16,
+    num_vals: u32,
+}
+
+/// Iterator over positions of set values.
+pub fn serialize_sparse_codec<W: Write>(
+    mut iter: impl Iterator<Item = u32>,
+    mut out: W,
+) -> io::Result<()> {
+    let mut block_metadata: Vec<BlockDataSerialized> = Vec::new();
+    let mut current_block = Vec::new();
+    // This if-statement for the first element ensures that
+    // `block_metadata` is not empty in the loop below.
+    if let Some(idx) = iter.next() {
+        let value_addr = value_addr(idx);
+        block_metadata.push(BlockDataSerialized {
+            block_idx: value_addr.block_idx,
+            num_vals: 1,
+        });
+        current_block.push(value_addr.value_in_block);
+    }
+    let flush_block = |current_block: &mut Vec<u16>, out: &mut W| -> io::Result<()> {
+        let is_sparse = is_sparse(current_block.len() as u32);
+        if is_sparse {
+            for val_in_block in current_block.iter() {
+                out.write_all(val_in_block.to_le_bytes().as_ref())?;
+            }
+        } else {
+            let mut bitset = BitSet::with_max_value(ELEMENTS_PER_BLOCK + 1);
+            for val_in_block in current_block.iter() {
+                bitset.insert(*val_in_block as u32);
+            }
+
+            let iter = (0..ELEMENTS_PER_BLOCK).map(|idx| bitset.contains(idx));
+            serialize_dense_codec(iter, out)?;
+        }
+        current_block.clear();
+        Ok(())
+    };
+    for idx in iter {
+        let value_addr = value_addr(idx);
+        if block_metadata[block_metadata.len() - 1].block_idx == value_addr.block_idx {
+            let last_idx_metadata = block_metadata.len() - 1;
+            block_metadata[last_idx_metadata].num_vals += 1;
+        } else {
+            // flush prev block
+            flush_block(&mut current_block, &mut out)?;
+
+            block_metadata.push(BlockDataSerialized {
+                block_idx: value_addr.block_idx,
+                num_vals: 1,
+            });
+        }
+        current_block.push(value_addr.value_in_block);
+    }
+    // handle last block
+    flush_block(&mut current_block, &mut out)?;
+
+    for block in &block_metadata {
+        out.write_all(block.block_idx.to_le_bytes().as_ref())?;
+        // We don't store empty blocks, therefore we can subtract 1.
+        // This way we will be able to use u16 when the number of elements is 1 << 16 or u16::MAX+1
+        out.write_all(((block.num_vals - 1) as u16).to_le_bytes().as_ref())?;
+    }
+    out.write_all((block_metadata.len() as u16).to_le_bytes().as_ref())?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use itertools::Itertools;
+    use proptest::prelude::{any, prop, *};
+    use proptest::strategy::Strategy;
+    use proptest::{prop_oneof, proptest};
+
+    use super::*;
+
+    fn random_bitvec() -> BoxedStrategy<Vec<bool>> {
+        prop_oneof![
+            1 => prop::collection::vec(proptest::bool::weighted(1.0), 0..100),
+            1 => prop::collection::vec(proptest::bool::weighted(0.00), 0..(ELEMENTS_PER_BLOCK as usize * 3)), // empty blocks
+            1 => prop::collection::vec(proptest::bool::weighted(1.00), 0..(ELEMENTS_PER_BLOCK as usize + 10)), // full block
+            1 => prop::collection::vec(proptest::bool::weighted(0.01), 0..100),
+            1 => prop::collection::vec(proptest::bool::weighted(0.01), 0..u16::MAX as usize),
+            8 => vec![any::<bool>()],
+        ]
+        .boxed()
+    }
+
+    proptest! {
+        #![proptest_config(ProptestConfig::with_cases(50))]
+        #[test]
+        fn test_with_random_bitvecs(bitvec1 in random_bitvec(), bitvec2 in random_bitvec(), bitvec3 in random_bitvec()) {
+            let mut bitvec = Vec::new();
+            bitvec.extend_from_slice(&bitvec1);
+            bitvec.extend_from_slice(&bitvec2);
+            bitvec.extend_from_slice(&bitvec3);
+            test_null_index(bitvec);
+        }
+    }
+
+    #[test]
+    fn sparse_codec_test_one_block_false() {
+        let mut iter = vec![false; ELEMENTS_PER_BLOCK as usize];
+        iter.push(true);
+        test_null_index(iter);
+    }
+
+    #[test]
+    fn sparse_codec_test_one_block_true() {
+        let mut iter = vec![true; ELEMENTS_PER_BLOCK as usize];
+        iter.push(true);
+        test_null_index(iter);
+    }
+
+    fn test_null_index(data: Vec<bool>) {
+        let mut out = vec![];
+
+        serialize_sparse_codec(
+            data.iter()
+                .cloned()
+                .enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+        let null_index = SparseCodec::open(OwnedBytes::new(out));
+
+        let orig_idx_with_value: Vec<u32> = data
+            .iter()
+            .enumerate()
+            .filter(|(_pos, val)| **val)
+            .map(|(pos, _val)| pos as u32)
+            .collect();
+
+        assert_eq!(
+            null_index
+                .translate_codec_idx_to_original_idx(0..orig_idx_with_value.len() as u32)
+                .collect_vec(),
+            orig_idx_with_value
+        );
+
+        let step_size = (orig_idx_with_value.len() / 100).max(1);
+        for (dense_idx, orig_idx) in orig_idx_with_value.iter().enumerate().step_by(step_size) {
+            assert_eq!(
+                null_index.translate_to_codec_idx(*orig_idx),
+                Some(dense_idx as u32)
+            );
+        }
+
+        // 100 samples
+        let step_size = (data.len() / 100).max(1);
+        for (pos, value) in data.iter().enumerate().step_by(step_size) {
+            assert_eq!(null_index.exists(pos as u32), *value);
+        }
+    }
+
+    #[test]
+    fn sparse_codec_test_translation() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_sparse_codec(
+            iter.enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+        let null_index = SparseCodec::open(OwnedBytes::new(out));
+
+        assert_eq!(
+            null_index
+                .translate_codec_idx_to_original_idx(0..2)
+                .collect_vec(),
+            vec![0, 2]
+        );
+    }
+
+    #[test]
+    fn sparse_codec_translate() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_sparse_codec(
+            iter.enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+        let null_index = SparseCodec::open(OwnedBytes::new(out));
+        assert_eq!(null_index.translate_to_codec_idx(0), Some(0));
+        assert_eq!(null_index.translate_to_codec_idx(2), Some(1));
+    }
+
+    #[test]
+    fn sparse_codec_test_small() {
+        let mut out = vec![];
+
+        let iter = ([true, false, true, false]).iter().cloned();
+        serialize_sparse_codec(
+            iter.enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+        let null_index = SparseCodec::open(OwnedBytes::new(out));
+        assert!(null_index.exists(0));
+        assert!(!null_index.exists(1));
+        assert!(null_index.exists(2));
+        assert!(!null_index.exists(3));
+    }
+
+    #[test]
+    fn sparse_codec_test_large() {
+        let mut docs = vec![];
+        docs.extend((0..ELEMENTS_PER_BLOCK).map(|_idx| false));
+        docs.extend((0..=1).map(|_idx| true));
+
+        let iter = docs.iter().cloned();
+        let mut out = vec![];
+        serialize_sparse_codec(
+            iter.enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+        let null_index = SparseCodec::open(OwnedBytes::new(out));
+        assert!(!null_index.exists(0));
+        assert!(!null_index.exists(100));
+        assert!(!null_index.exists(ELEMENTS_PER_BLOCK - 1));
+        assert!(null_index.exists(ELEMENTS_PER_BLOCK));
+        assert!(null_index.exists(ELEMENTS_PER_BLOCK + 1));
+    }
+}
+
+#[cfg(all(test, feature = "unstable"))]
+mod bench {
+
+    use rand::rngs::StdRng;
+    use rand::{Rng, SeedableRng};
+    use test::Bencher;
+
+    use super::*;
+
+    const TOTAL_NUM_VALUES: u32 = 1_000_000;
+    fn gen_bools(fill_ratio: f64) -> SparseCodec {
+        let mut out = Vec::new();
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        serialize_sparse_codec(
+            (0..TOTAL_NUM_VALUES)
+                .map(|_| rng.gen_bool(fill_ratio))
+                .enumerate()
+                .filter(|(_pos, val)| *val)
+                .map(|(pos, _val)| pos as u32),
+            &mut out,
+        )
+        .unwrap();
+
+        let codec = SparseCodec::open(OwnedBytes::new(out));
+        codec
+    }
+
+    fn random_range_iterator(start: u32, end: u32, step_size: u32) -> impl Iterator<Item = u32> {
+        let mut rng: StdRng = StdRng::from_seed([1u8; 32]);
+        let mut current = start;
+        std::iter::from_fn(move || {
+            current += rng.gen_range(1..step_size + 1);
+            if current >= end {
+                None
+            } else {
+                Some(current)
+            }
+        })
+    }
+
+    fn walk_over_data(codec: &SparseCodec, max_step_size: u32) -> Option<u32> {
+        walk_over_data_from_positions(
+            codec,
+            random_range_iterator(0, TOTAL_NUM_VALUES, max_step_size),
+        )
+    }
+
+    fn walk_over_data_from_positions(
+        codec: &SparseCodec,
+        positions: impl Iterator<Item = u32>,
+    ) -> Option<u32> {
+        let mut dense_idx: Option<u32> = None;
+        for idx in positions {
+            dense_idx = dense_idx.or(codec.translate_to_codec_idx(idx));
+        }
+        dense_idx
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_1percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.01f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_5percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.05f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_full_scan_10percent(bench: &mut Bencher) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_full_scan_90percent(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_full_scan_1percent(bench: &mut Bencher) {
+        let codec = gen_bools(0.01f64);
+        bench.iter(|| walk_over_data_from_positions(&codec, 0..TOTAL_NUM_VALUES));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_10percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.1f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_orig_to_codec_90percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.9f64);
+        bench.iter(|| walk_over_data(&codec, 100));
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_1percent_filled_random_stride_big_step(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.01f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 50_000))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_1percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.01f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 100))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_1percent_filled_full_scan(bench: &mut Bencher) {
+        let codec = gen_bools(0.01f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(0..num_vals)
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_90percent_filled_random_stride_big_step(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.90f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 50_000))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_90percent_filled_random_stride(
+        bench: &mut Bencher,
+    ) {
+        let codec = gen_bools(0.9f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(random_range_iterator(0, num_vals, 100))
+                .last()
+        });
+    }
+
+    #[bench]
+    fn bench_sparse_codec_translate_codec_to_orig_90percent_filled_full_scan(bench: &mut Bencher) {
+        let codec = gen_bools(0.9f64);
+        let num_vals = codec.num_non_nulls();
+        bench.iter(|| {
+            codec
+                .translate_codec_idx_to_original_idx(0..num_vals)
+                .last()
+        });
+    }
+}
--- a/fastfield_codecs/src/null_index_footer.rs
+++ b/fastfield_codecs/src/null_index_footer.rs
@@ -0,0 +1,146 @@
+use std::io::{self, Write};
+use std::ops::Range;
+
+use common::{BinarySerializable, CountingWriter, VInt};
+use ownedbytes::OwnedBytes;
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+pub(crate) enum FastFieldCardinality {
+    Single = 1,
+    Multi = 2,
+}
+
+impl BinarySerializable for FastFieldCardinality {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
+        self.to_code().serialize(wrt)
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let code = u8::deserialize(reader)?;
+        let codec_type: Self = Self::from_code(code)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
+        Ok(codec_type)
+    }
+}
+
+impl FastFieldCardinality {
+    pub(crate) fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub(crate) fn from_code(code: u8) -> Option<Self> {
+        match code {
+            1 => Some(Self::Single),
+            2 => Some(Self::Multi),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum NullIndexCodec {
+    Full = 1,
+}
+
+impl BinarySerializable for NullIndexCodec {
+    fn serialize<W: Write>(&self, wrt: &mut W) -> io::Result<()> {
+        self.to_code().serialize(wrt)
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let code = u8::deserialize(reader)?;
+        let codec_type: Self = Self::from_code(code)
+            .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Unknown code `{code}.`"))?;
+        Ok(codec_type)
+    }
+}
+
+impl NullIndexCodec {
+    pub(crate) fn to_code(self) -> u8 {
+        self as u8
+    }
+
+    pub(crate) fn from_code(code: u8) -> Option<Self> {
+        match code {
+            1 => Some(Self::Full),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub(crate) struct NullIndexFooter {
+    pub(crate) cardinality: FastFieldCardinality,
+    pub(crate) null_index_codec: NullIndexCodec,
+    // Unused for NullIndexCodec::Full
+    pub(crate) null_index_byte_range: Range<u64>,
+}
+
+impl BinarySerializable for NullIndexFooter {
+    fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+        self.cardinality.serialize(writer)?;
+        self.null_index_codec.serialize(writer)?;
+        VInt(self.null_index_byte_range.start).serialize(writer)?;
+        VInt(self.null_index_byte_range.end - self.null_index_byte_range.start)
+            .serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let cardinality = FastFieldCardinality::deserialize(reader)?;
+        let null_index_codec = NullIndexCodec::deserialize(reader)?;
+        let null_index_byte_range_start = VInt::deserialize(reader)?.0;
+        let null_index_byte_range_end = VInt::deserialize(reader)?.0 + null_index_byte_range_start;
+        Ok(Self {
+            cardinality,
+            null_index_codec,
+            null_index_byte_range: null_index_byte_range_start..null_index_byte_range_end,
+        })
+    }
+}
+
+pub(crate) fn append_null_index_footer(
+    output: &mut impl io::Write,
+    null_index_footer: NullIndexFooter,
+) -> io::Result<()> {
+    let mut counting_write = CountingWriter::wrap(output);
+    null_index_footer.serialize(&mut counting_write)?;
+    let footer_payload_len = counting_write.written_bytes();
+    BinarySerializable::serialize(&(footer_payload_len as u16), &mut counting_write)?;
+
+    Ok(())
+}
+
+pub(crate) fn read_null_index_footer(
+    data: OwnedBytes,
+) -> io::Result<(OwnedBytes, NullIndexFooter)> {
+    let (data, null_footer_length_bytes) = data.rsplit(2);
+
+    let footer_length = u16::deserialize(&mut null_footer_length_bytes.as_slice())?;
+    let (data, null_index_footer_bytes) = data.rsplit(footer_length as usize);
+    let null_index_footer = NullIndexFooter::deserialize(&mut null_index_footer_bytes.as_ref())?;
+
+    Ok((data, null_index_footer))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn null_index_footer_deser_test() {
+        let null_index_footer = NullIndexFooter {
+            cardinality: FastFieldCardinality::Single,
+            null_index_codec: NullIndexCodec::Full,
+            null_index_byte_range: 100..120,
+        };
+
+        let mut out = vec![];
+        null_index_footer.serialize(&mut out).unwrap();
+
+        assert_eq!(
+            null_index_footer,
+            NullIndexFooter::deserialize(&mut &out[..]).unwrap()
+        );
+    }
+}
--- a/fastfield_codecs/src/serialize.rs
+++ b/fastfield_codecs/src/serialize.rs
@@ -0,0 +1,408 @@
+use std::io;
+use std::num::NonZeroU64;
+use std::sync::Arc;
+
+use common::{BinarySerializable, VInt};
+use log::warn;
+use ownedbytes::OwnedBytes;
+
+use crate::bitpacked::BitpackedCodec;
+use crate::blockwise_linear::BlockwiseLinearCodec;
+use crate::compact_space::CompactSpaceCompressor;
+use crate::format_version::append_format_version;
+use crate::linear::LinearCodec;
+use crate::monotonic_mapping::{
+    StrictlyMonotonicFn, StrictlyMonotonicMappingToInternal,
+    StrictlyMonotonicMappingToInternalGCDBaseval,
+};
+use crate::null_index_footer::{
+    append_null_index_footer, FastFieldCardinality, NullIndexCodec, NullIndexFooter,
+};
+use crate::{
+    monotonic_map_column, Column, FastFieldCodec, FastFieldCodecType, MonotonicallyMappableToU64,
+    U128FastFieldCodecType, VecColumn, ALL_CODEC_TYPES,
+};
+
+/// The normalized header gives some parameters after applying the following
+/// normalization of the vector:
+/// `val -> (val - min_value) / gcd`
+///
+/// By design, after normalization, `min_value = 0` and `gcd = 1`.
+#[derive(Debug, Copy, Clone)]
+pub struct NormalizedHeader {
+    /// The number of values in the underlying column.
+    pub num_vals: u32,
+    /// The max value of the underlying column.
+    pub max_value: u64,
+}
+
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct Header {
+    pub num_vals: u32,
+    pub min_value: u64,
+    pub max_value: u64,
+    pub gcd: Option<NonZeroU64>,
+    pub codec_type: FastFieldCodecType,
+}
+
+impl Header {
+    pub fn normalized(self) -> NormalizedHeader {
+        let gcd = self.gcd.map(|gcd| gcd.get()).unwrap_or(1);
+        let gcd_min_val_mapping =
+            StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd, self.min_value);
+
+        let max_value = gcd_min_val_mapping.mapping(self.max_value);
+        NormalizedHeader {
+            num_vals: self.num_vals,
+            max_value,
+        }
+    }
+
+    pub fn normalize_column<C: Column>(&self, from_column: C) -> impl Column {
+        normalize_column(from_column, self.min_value, self.gcd)
+    }
+
+    pub fn compute_header(
+        column: impl Column<u64>,
+        codecs: &[FastFieldCodecType],
+    ) -> Option<Header> {
+        let num_vals = column.num_vals();
+        let min_value = column.min_value();
+        let max_value = column.max_value();
+        let gcd = crate::gcd::find_gcd(column.iter().map(|val| val - min_value))
+            .filter(|gcd| gcd.get() > 1u64);
+        let normalized_column = normalize_column(column, min_value, gcd);
+        let codec_type = detect_codec(normalized_column, codecs)?;
+        Some(Header {
+            num_vals,
+            min_value,
+            max_value,
+            gcd,
+            codec_type,
+        })
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub(crate) struct U128Header {
+    pub num_vals: u32,
+    pub codec_type: U128FastFieldCodecType,
+}
+
+impl BinarySerializable for U128Header {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        VInt(self.num_vals as u64).serialize(writer)?;
+        self.codec_type.serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let num_vals = VInt::deserialize(reader)?.0 as u32;
+        let codec_type = U128FastFieldCodecType::deserialize(reader)?;
+        Ok(U128Header {
+            num_vals,
+            codec_type,
+        })
+    }
+}
+
+pub fn normalize_column<C: Column>(
+    from_column: C,
+    min_value: u64,
+    gcd: Option<NonZeroU64>,
+) -> impl Column {
+    let gcd = gcd.map(|gcd| gcd.get()).unwrap_or(1);
+    let mapping = StrictlyMonotonicMappingToInternalGCDBaseval::new(gcd, min_value);
+    monotonic_map_column(from_column, mapping)
+}
+
+impl BinarySerializable for Header {
+    fn serialize<W: io::Write>(&self, writer: &mut W) -> io::Result<()> {
+        VInt(self.num_vals as u64).serialize(writer)?;
+        VInt(self.min_value).serialize(writer)?;
+        VInt(self.max_value - self.min_value).serialize(writer)?;
+        if let Some(gcd) = self.gcd {
+            VInt(gcd.get()).serialize(writer)?;
+        } else {
+            VInt(0u64).serialize(writer)?;
+        }
+        self.codec_type.serialize(writer)?;
+        Ok(())
+    }
+
+    fn deserialize<R: io::Read>(reader: &mut R) -> io::Result<Self> {
+        let num_vals = VInt::deserialize(reader)?.0 as u32;
+        let min_value = VInt::deserialize(reader)?.0;
+        let amplitude = VInt::deserialize(reader)?.0;
+        let max_value = min_value + amplitude;
+        let gcd_u64 = VInt::deserialize(reader)?.0;
+        let codec_type = FastFieldCodecType::deserialize(reader)?;
+        Ok(Header {
+            num_vals,
+            min_value,
+            max_value,
+            gcd: NonZeroU64::new(gcd_u64),
+            codec_type,
+        })
+    }
+}
+
+/// Return estimated compression for given codec in the value range [0.0..1.0], where 1.0 means no
+/// compression.
+pub fn estimate<T: MonotonicallyMappableToU64>(
+    typed_column: impl Column<T>,
+    codec_type: FastFieldCodecType,
+) -> Option<f32> {
+    let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::<T>::new());
+    let min_value = column.min_value();
+    let gcd = crate::gcd::find_gcd(column.iter().map(|val| val - min_value))
+        .filter(|gcd| gcd.get() > 1u64);
+    let mapping = StrictlyMonotonicMappingToInternalGCDBaseval::new(
+        gcd.map(|gcd| gcd.get()).unwrap_or(1u64),
+        min_value,
+    );
+    let normalized_column = monotonic_map_column(&column, mapping);
+    match codec_type {
+        FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&normalized_column),
+        FastFieldCodecType::Linear => LinearCodec::estimate(&normalized_column),
+        FastFieldCodecType::BlockwiseLinear => BlockwiseLinearCodec::estimate(&normalized_column),
+    }
+}
+
+/// Serializes u128 values with the compact space codec.
+pub fn serialize_u128<F: Fn() -> I, I: Iterator<Item = u128>>(
+    iter_gen: F,
+    num_vals: u32,
+    output: &mut impl io::Write,
+) -> io::Result<()> {
+    serialize_u128_new(ValueIndexInfo::default(), iter_gen, num_vals, output)
+}
+
+#[allow(dead_code)]
+pub enum ValueIndexInfo<'a> {
+    MultiValue(Box<dyn MultiValueIndexInfo + 'a>),
+    SingleValue(Box<dyn SingleValueIndexInfo + 'a>),
+}
+
+impl Default for ValueIndexInfo<'static> {
+    fn default() -> Self {
+        struct Dummy {}
+        impl SingleValueIndexInfo for Dummy {
+            fn num_vals(&self) -> u32 {
+                todo!()
+            }
+            fn num_non_nulls(&self) -> u32 {
+                todo!()
+            }
+            fn iter(&self) -> Box<dyn Iterator<Item = u32>> {
+                todo!()
+            }
+        }
+
+        Self::SingleValue(Box::new(Dummy {}))
+    }
+}
+
+impl<'a> ValueIndexInfo<'a> {
+    fn get_cardinality(&self) -> FastFieldCardinality {
+        match self {
+            ValueIndexInfo::MultiValue(_) => FastFieldCardinality::Multi,
+            ValueIndexInfo::SingleValue(_) => FastFieldCardinality::Single,
+        }
+    }
+}
+
+pub trait MultiValueIndexInfo {
+    /// The number of docs in the column.
+    fn num_docs(&self) -> u32;
+    /// The number of values in the column.
+    fn num_vals(&self) -> u32;
+    /// Return the start index of the values for each doc
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
+}
+
+pub trait SingleValueIndexInfo {
+    /// The number of values including nulls in the column.
+    fn num_vals(&self) -> u32;
+    /// The number of non-null values in the column.
+    fn num_non_nulls(&self) -> u32;
+    /// Return a iterator of the positions of docs with a value
+    fn iter(&self) -> Box<dyn Iterator<Item = u32> + '_>;
+}
+
+/// Serializes u128 values with the compact space codec.
+pub fn serialize_u128_new<F: Fn() -> I, I: Iterator<Item = u128>>(
+    value_index: ValueIndexInfo,
+    iter_gen: F,
+    num_vals: u32,
+    output: &mut impl io::Write,
+) -> io::Result<()> {
+    let header = U128Header {
+        num_vals,
+        codec_type: U128FastFieldCodecType::CompactSpace,
+    };
+    header.serialize(output)?;
+    let compressor = CompactSpaceCompressor::train_from(iter_gen(), num_vals);
+    compressor.compress_into(iter_gen(), output).unwrap();
+
+    let null_index_footer = NullIndexFooter {
+        cardinality: value_index.get_cardinality(),
+        null_index_codec: NullIndexCodec::Full,
+        null_index_byte_range: 0..0,
+    };
+    append_null_index_footer(output, null_index_footer)?;
+    append_format_version(output)?;
+
+    Ok(())
+}
+
+/// Serializes the column with the codec with the best estimate on the data.
+pub fn serialize<T: MonotonicallyMappableToU64>(
+    typed_column: impl Column<T>,
+    output: &mut impl io::Write,
+    codecs: &[FastFieldCodecType],
+) -> io::Result<()> {
+    serialize_new(ValueIndexInfo::default(), typed_column, output, codecs)
+}
+
+/// Serializes the column with the codec with the best estimate on the data.
+pub fn serialize_new<T: MonotonicallyMappableToU64>(
+    value_index: ValueIndexInfo,
+    typed_column: impl Column<T>,
+    output: &mut impl io::Write,
+    codecs: &[FastFieldCodecType],
+) -> io::Result<()> {
+    let column = monotonic_map_column(typed_column, StrictlyMonotonicMappingToInternal::<T>::new());
+    let header = Header::compute_header(&column, codecs).ok_or_else(|| {
+        io::Error::new(
+            io::ErrorKind::InvalidInput,
+            format!(
+                "Data cannot be serialized with this list of codec. {:?}",
+                codecs
+            ),
+        )
+    })?;
+    header.serialize(output)?;
+    let normalized_column = header.normalize_column(column);
+    assert_eq!(normalized_column.min_value(), 0u64);
+    serialize_given_codec(normalized_column, header.codec_type, output)?;
+
+    let null_index_footer = NullIndexFooter {
+        cardinality: value_index.get_cardinality(),
+        null_index_codec: NullIndexCodec::Full,
+        null_index_byte_range: 0..0,
+    };
+    append_null_index_footer(output, null_index_footer)?;
+    append_format_version(output)?;
+
+    Ok(())
+}
+
+fn detect_codec(
+    column: impl Column<u64>,
+    codecs: &[FastFieldCodecType],
+) -> Option<FastFieldCodecType> {
+    let mut estimations = Vec::new();
+    for &codec in codecs {
+        let estimation_opt = match codec {
+            FastFieldCodecType::Bitpacked => BitpackedCodec::estimate(&column),
+            FastFieldCodecType::Linear => LinearCodec::estimate(&column),
+            FastFieldCodecType::BlockwiseLinear => BlockwiseLinearCodec::estimate(&column),
+        };
+        if let Some(estimation) = estimation_opt {
+            estimations.push((estimation, codec));
+        }
+    }
+    if let Some(broken_estimation) = estimations.iter().find(|estimation| estimation.0.is_nan()) {
+        warn!(
+            "broken estimation for fast field codec {:?}",
+            broken_estimation.1
+        );
+    }
+    // removing nan values for codecs with broken calculations, and max values which disables
+    // codecs
+    estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX);
+    estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right));
+    Some(estimations.first()?.1)
+}
+
+fn serialize_given_codec(
+    column: impl Column<u64>,
+    codec_type: FastFieldCodecType,
+    output: &mut impl io::Write,
+) -> io::Result<()> {
+    match codec_type {
+        FastFieldCodecType::Bitpacked => {
+            BitpackedCodec::serialize(&column, output)?;
+        }
+        FastFieldCodecType::Linear => {
+            LinearCodec::serialize(&column, output)?;
+        }
+        FastFieldCodecType::BlockwiseLinear => {
+            BlockwiseLinearCodec::serialize(&column, output)?;
+        }
+    }
+    output.flush()?;
+    Ok(())
+}
+
+/// Helper function to serialize a column (autodetect from all codecs) and then open it
+pub fn serialize_and_load<T: MonotonicallyMappableToU64 + Ord + Default>(
+    column: &[T],
+) -> Arc<dyn Column<T>> {
+    let mut buffer = Vec::new();
+    super::serialize(VecColumn::from(&column), &mut buffer, &ALL_CODEC_TYPES).unwrap();
+    super::open(OwnedBytes::new(buffer)).unwrap()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_serialize_deserialize_u128_header() {
+        let original = U128Header {
+            num_vals: 11,
+            codec_type: U128FastFieldCodecType::CompactSpace,
+        };
+        let mut out = Vec::new();
+        original.serialize(&mut out).unwrap();
+        let restored = U128Header::deserialize(&mut &out[..]).unwrap();
+        assert_eq!(restored, original);
+    }
+
+    #[test]
+    fn test_serialize_deserialize() {
+        let original = [1u64, 5u64, 10u64];
+        let restored: Vec<u64> = serialize_and_load(&original[..]).iter().collect();
+        assert_eq!(&restored, &original[..]);
+    }
+
+    #[test]
+    fn test_fastfield_bool_size_bitwidth_1() {
+        let mut buffer = Vec::new();
+        let col = VecColumn::from(&[false, true][..]);
+        serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
+        // 5 bytes of header, 1 byte of value, 7 bytes of padding.
+        assert_eq!(buffer.len(), 3 + 5 + 8 + 4 + 2);
+    }
+
+    #[test]
+    fn test_fastfield_bool_bit_size_bitwidth_0() {
+        let mut buffer = Vec::new();
+        let col = VecColumn::from(&[true][..]);
+        serialize(col, &mut buffer, &ALL_CODEC_TYPES).unwrap();
+        // 5 bytes of header, 0 bytes of value, 7 bytes of padding.
+        assert_eq!(buffer.len(), 3 + 5 + 7 + 4 + 2);
+    }
+
+    #[test]
+    fn test_fastfield_gcd() {
+        let mut buffer = Vec::new();
+        let vals: Vec<u64> = (0..80).map(|val| (val % 7) * 1_000u64).collect();
+        let col = VecColumn::from(&vals[..]);
+        serialize(col, &mut buffer, &[FastFieldCodecType::Bitpacked]).unwrap();
+        // Values are stored over 3 bits.
+        assert_eq!(buffer.len(), 3 + 7 + (3 * 80 / 8) + 7 + 4 + 2);
+    }
+}
--- a/ownedbytes/Cargo.toml
+++ b/ownedbytes/Cargo.toml
@@ -1,10 +1,14 @@
 [package]
 authors = ["Paul Masurel <paul@quickwit.io>", "Pascal Seitz <pascal@quickwit.io>"]
 name = "ownedbytes"
-version = "0.3.0"
+version = "0.5.0"
 edition = "2021"
 description = "Expose data as static slice"
 license = "MIT"
+documentation = "https://docs.rs/ownedbytes/"
+homepage = "https://github.com/quickwit-oss/tantivy"
+repository = "https://github.com/quickwit-oss/tantivy"
+
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
--- a/ownedbytes/src/lib.rs
+++ b/ownedbytes/src/lib.rs
@@ -3,10 +3,10 @@ use std::ops::{Deref, Range};
 use std::sync::Arc;
 use std::{fmt, io, mem};

-use stable_deref_trait::StableDeref;
+pub use stable_deref_trait::StableDeref;

 /// An OwnedBytes simply wraps an object that owns a slice of data and exposes
-/// this data as a static slice.
+/// this data as a slice.
 ///
 /// The backing object is required to be `StableDeref`.
 #[derive(Clone)]
@@ -21,7 +21,7 @@ impl OwnedBytes {
        OwnedBytes::new(&[][..])
    }

-    /// Creates an `OwnedBytes` intance given a `StableDeref` object.
+    /// Creates an `OwnedBytes` instance given a `StableDeref` object.
    pub fn new<T: StableDeref + Deref<Target = [u8]> + 'static + Send + Sync>(
        data_holder: T,
    ) -> OwnedBytes {
@@ -80,6 +80,21 @@ impl OwnedBytes {
        (left, right)
    }

+    /// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
+    ///
+    /// Right will hold `split_len` bytes.
+    ///
+    /// This operation is cheap and does not require to copy any memory.
+    /// On the other hand, both `left` and `right` retain a handle over
+    /// the entire slice of memory. In other words, the memory will only
+    /// be released when both left and right are dropped.
+    #[inline]
+    #[must_use]
+    pub fn rsplit(self, split_len: usize) -> (OwnedBytes, OwnedBytes) {
+        let data_len = self.data.len();
+        self.split(data_len - split_len)
+    }
+
    /// Splits the right part of the `OwnedBytes` at the given offset.
    ///
    /// `self` is truncated to `split_len`, left with the remaining bytes.
--- a/query-grammar/Cargo.toml
+++ b/query-grammar/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "tantivy-query-grammar"
-version = "0.18.0"
+version = "0.19.0"
 authors = ["Paul Masurel <paul.masurel@gmail.com>"]
 license = "MIT"
 categories = ["database-implementations", "data-structures"]
--- a/query-grammar/src/lib.rs
+++ b/query-grammar/src/lib.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::derive_partial_eq_without_eq)]
+
 mod occur;
 mod query_grammar;
 mod user_input_ast;
--- a/query-grammar/src/query_grammar.rs
+++ b/query-grammar/src/query_grammar.rs
@@ -5,7 +5,8 @@ use combine::parser::range::{take_while, take_while1};
 use combine::parser::repeat::escaped;
 use combine::parser::Parser;
 use combine::{
-    attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
+    attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
+    skip_many1, value,
 };
 use once_cell::sync::Lazy;
 use regex::Regex;
@@ -23,7 +24,7 @@ const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)|
 /// Parses a field_name
 /// A field name must have at least one character and be followed by a colon.
 /// All characters are allowed including special characters `SPECIAL_CHARS`, but these
-/// need to be escaped with a backslack character '\'.
+/// need to be escaped with a backslash character '\'.
 fn field_name<'a>() -> impl Parser<&'a str, Output = String> {
    static ESCAPED_SPECIAL_CHARS_RE: Lazy<Regex> =
        Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap());
@@ -62,13 +63,27 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
        })
 }

+// word variant that allows more characters, e.g. for range queries that don't allow field
+// specifier
+fn relaxed_word<'a>() -> impl Parser<&'a str, Output = String> {
+    (
+        satisfy(|c: char| {
+            !c.is_whitespace() && !['`', '{', '}', '"', '[', ']', '(', ')'].contains(&c)
+        }),
+        many(satisfy(|c: char| {
+            !c.is_whitespace() && !['{', '}', '"', '[', ']', '(', ')'].contains(&c)
+        })),
+    )
+        .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+}
+
 /// Parses a date time according to rfc3339
 /// 2015-08-02T18:54:42+02
 /// 2021-04-13T19:46:26.266051969+00:00
 ///
 /// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
-/// We delegate rejecting such invalid dates to the logical AST compuation code
-/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
+/// We delegate rejecting such invalid dates to the logical AST computation code
+/// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse
 /// it (instead of merely extracting the datetime value as string as done here).
 fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
    let two_digits = || recognize::<String, _, _>((digit(), digit()));
@@ -181,8 +196,8 @@ fn spaces1<'a>() -> impl Parser<&'a str, Output = ()> {
 fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
    let range_term_val = || {
        attempt(date_time())
-            .or(word())
            .or(negative_number())
+            .or(relaxed_word())
            .or(char('*').with(value("*".to_string())))
    };

@@ -250,6 +265,17 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
    })
 }

+/// Function that parses a set out of a Stream
+/// Supports ranges like: `IN [val1 val2 val3]`
+fn set<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
+    let term_list = between(char('['), char(']'), sep_by(term_val(), spaces()));
+
+    let set_content = ((string("IN"), spaces()), term_list).map(|(_, elements)| elements);
+
+    (optional(attempt(field_name().skip(spaces()))), set_content)
+        .map(|(field, elements)| UserInputLeaf::Set { field, elements })
+}
+
 fn negate(expr: UserInputAst) -> UserInputAst {
    expr.unary(Occur::MustNot)
 }
@@ -264,6 +290,7 @@ fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
                string("NOT").skip(spaces1()).with(leaf()).map(negate),
            ))
            .or(attempt(range().map(UserInputAst::from)))
+            .or(attempt(set().map(UserInputAst::from)))
            .or(literal().map(UserInputAst::from))
            .parse_stream(input)
            .into_result()
@@ -649,6 +676,34 @@ mod test {
            .expect("Cannot parse date range")
            .0;
        assert_eq!(res6, expected_flexible_dates);
+        // IP Range Unbounded
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("ip".to_string()),
+            lower: UserInputBound::Inclusive("::1".to_string()),
+            upper: UserInputBound::Unbounded,
+        };
+        let res1 = range()
+            .parse("ip: >=::1")
+            .expect("Cannot parse ip v6 format")
+            .0;
+        let res2 = range()
+            .parse("ip:[::1 TO *}")
+            .expect("Cannot parse ip v6 format")
+            .0;
+        assert_eq!(res1, expected_weight);
+        assert_eq!(res2, expected_weight);
+
+        // IP Range Bounded
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("ip".to_string()),
+            lower: UserInputBound::Inclusive("::0.0.0.50".to_string()),
+            upper: UserInputBound::Exclusive("::0.0.0.52".to_string()),
+        };
+        let res1 = range()
+            .parse("ip:[::0.0.0.50 TO ::0.0.0.52}")
+            .expect("Cannot parse ip v6 format")
+            .0;
+        assert_eq!(res1, expected_weight);
    }

    #[test]
@@ -705,6 +760,14 @@ mod test {
        test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")");
    }

+    #[test]
+    fn test_parse_test_query_set() {
+        test_parse_query_to_ast_helper("abc: IN [a b c]", r#""abc": IN ["a" "b" "c"]"#);
+        test_parse_query_to_ast_helper("abc: IN [1]", r#""abc": IN ["1"]"#);
+        test_parse_query_to_ast_helper("abc: IN []", r#""abc": IN []"#);
+        test_parse_query_to_ast_helper("IN [1 2]", r#"IN ["1" "2"]"#);
+    }
+
    #[test]
    fn test_parse_test_query_other() {
        test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
--- a/query-grammar/src/user_input_ast.rs
+++ b/query-grammar/src/user_input_ast.rs
@@ -12,6 +12,10 @@ pub enum UserInputLeaf {
        lower: UserInputBound,
        upper: UserInputBound,
    },
+    Set {
+        field: Option<String>,
+        elements: Vec<String>,
+    },
 }

 impl Debug for UserInputLeaf {
@@ -31,6 +35,19 @@ impl Debug for UserInputLeaf {
                upper.display_upper(formatter)?;
                Ok(())
            }
+            UserInputLeaf::Set { field, elements } => {
+                if let Some(ref field) = field {
+                    write!(formatter, "\"{}\": ", field)?;
+                }
+                write!(formatter, "IN [")?;
+                for (i, element) in elements.iter().enumerate() {
+                    if i != 0 {
+                        write!(formatter, " ")?;
+                    }
+                    write!(formatter, "\"{}\"", element)?;
+                }
+                write!(formatter, "]")
+            }
            UserInputLeaf::All => write!(formatter, "*"),
        }
    }
--- a/src/aggregation/agg_req.rs
+++ b/src/aggregation/agg_req.rs
@@ -1,7 +1,7 @@
 //! Contains the aggregation request tree. Used to build an
-//! [AggregationCollector](super::AggregationCollector).
+//! [`AggregationCollector`](super::AggregationCollector).
 //!
-//! [Aggregations] is the top level entry point to create a request, which is a `HashMap<String,
+//! [`Aggregations`] is the top level entry point to create a request, which is a `HashMap<String,
 //! Aggregation>`.
 //!
 //! Requests are compatible with the json format of elasticsearch.
@@ -54,8 +54,8 @@ use super::bucket::{HistogramAggregation, TermsAggregation};
 use super::metric::{AverageAggregation, StatsAggregation};
 use super::VecWithNames;

-/// The top-level aggregation request structure, which contains [Aggregation] and their user defined
-/// names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
+/// The top-level aggregation request structure, which contains [`Aggregation`] and their user
+/// defined names. It is also used in [buckets](BucketAggregation) to define sub-aggregations.
 ///
 /// The key is the user defined name of the aggregation.
 pub type Aggregations = HashMap<String, Aggregation>;
@@ -139,15 +139,15 @@ pub fn get_fast_field_names(aggs: &Aggregations) -> HashSet<String> {
    fast_field_names
 }

-/// Aggregation request of [BucketAggregation] or [MetricAggregation].
+/// Aggregation request of [`BucketAggregation`] or [`MetricAggregation`].
 ///
 /// An aggregation is either a bucket or a metric.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 #[serde(untagged)]
 pub enum Aggregation {
-    /// Bucket aggregation, see [BucketAggregation] for details.
+    /// Bucket aggregation, see [`BucketAggregation`] for details.
    Bucket(BucketAggregation),
-    /// Metric aggregation, see [MetricAggregation] for details.
+    /// Metric aggregation, see [`MetricAggregation`] for details.
    Metric(MetricAggregation),
 }

--- a/src/aggregation/agg_req_with_accessor.rs
+++ b/src/aggregation/agg_req_with_accessor.rs
@@ -4,14 +4,14 @@ use std::rc::Rc;
 use std::sync::atomic::AtomicU32;
 use std::sync::Arc;

+use fastfield_codecs::Column;
+
 use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
 use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation};
 use super::metric::{AverageAggregation, StatsAggregation};
 use super::segment_agg_result::BucketCount;
 use super::VecWithNames;
-use crate::fastfield::{
-    type_and_cardinality, DynamicFastFieldReader, FastType, MultiValuedFastFieldReader,
-};
+use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader};
 use crate::schema::{Cardinality, Type};
 use crate::{InvertedIndexReader, SegmentReader, TantivyError};

@@ -37,10 +37,16 @@ impl AggregationsWithAccessor {
 #[derive(Clone)]
 pub(crate) enum FastFieldAccessor {
    Multi(MultiValuedFastFieldReader<u64>),
-    Single(DynamicFastFieldReader<u64>),
+    Single(Arc<dyn Column<u64>>),
 }
 impl FastFieldAccessor {
-    pub fn as_single(&self) -> Option<&DynamicFastFieldReader<u64>> {
+    pub fn as_single(&self) -> Option<&dyn Column<u64>> {
+        match self {
+            FastFieldAccessor::Multi(_) => None,
+            FastFieldAccessor::Single(reader) => Some(&**reader),
+        }
+    }
+    pub fn into_single(self) -> Option<Arc<dyn Column<u64>>> {
        match self {
            FastFieldAccessor::Multi(_) => None,
            FastFieldAccessor::Single(reader) => Some(reader),
@@ -118,7 +124,7 @@ impl BucketAggregationWithAccessor {
 pub struct MetricAggregationWithAccessor {
    pub metric: MetricAggregation,
    pub field_type: Type,
-    pub accessor: DynamicFastFieldReader<u64>,
+    pub accessor: Arc<dyn Column>,
 }

 impl MetricAggregationWithAccessor {
@@ -134,9 +140,8 @@ impl MetricAggregationWithAccessor {

                Ok(MetricAggregationWithAccessor {
                    accessor: accessor
-                        .as_single()
-                        .expect("unexpected fast field cardinality")
-                        .clone(),
+                        .into_single()
+                        .expect("unexpected fast field cardinality"),
                    field_type,
                    metric: metric.clone(),
                })
@@ -189,13 +194,7 @@ fn get_ff_reader_and_validate(
        .ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?;
    let field_type = reader.schema().get_field_entry(field).field_type();

-    if let Some((ff_type, field_cardinality)) = type_and_cardinality(field_type) {
-        if ff_type == FastType::Date {
-            return Err(TantivyError::InvalidArgument(
-                "Unsupported field type date in aggregation".to_string(),
-            ));
-        }
-
+    if let Some((_ff_type, field_cardinality)) = type_and_cardinality(field_type) {
        if cardinality != field_cardinality {
            return Err(TantivyError::InvalidArgument(format!(
                "Invalid field cardinality on field {} expected {:?}, but got {:?}",
--- a/src/aggregation/agg_result.rs
+++ b/src/aggregation/agg_result.rs
@@ -4,9 +4,7 @@
 //! intermediate average results, which is the sum and the number of values. The actual average is
 //! calculated on the step from intermediate to final aggregation result tree.

-use std::collections::HashMap;
-
-use fnv::FnvHashMap;
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use super::agg_req::BucketAggregationInternal;
@@ -14,11 +12,12 @@ use super::bucket::GetDocCount;
 use super::intermediate_agg_result::{IntermediateBucketResult, IntermediateMetricResult};
 use super::metric::{SingleMetricResult, Stats};
 use super::Key;
+use crate::schema::Schema;
 use crate::TantivyError;

 #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
 /// The final aggegation result.
-pub struct AggregationResults(pub HashMap<String, AggregationResult>);
+pub struct AggregationResults(pub FxHashMap<String, AggregationResult>);

 impl AggregationResults {
    pub(crate) fn get_value_from_aggregation(
@@ -57,8 +56,7 @@ impl AggregationResult {
        match self {
            AggregationResult::BucketResult(_bucket) => Err(TantivyError::InternalError(
                "Tried to retrieve value from bucket aggregation. This is not supported and \
-                 should not happen during collection phase, but should be catched during \
-                 validation"
+                 should not happen during collection phase, but should be caught during validation"
                    .to_string(),
            )),
            AggregationResult::MetricResult(metric) => metric.get_value(agg_property),
@@ -114,14 +112,14 @@ pub enum BucketResult {
        ///
        /// If there are holes depends on the request, if min_doc_count is 0, then there are no
        /// holes between the first and last bucket.
-        /// See [HistogramAggregation](super::bucket::HistogramAggregation)
+        /// See [`HistogramAggregation`](super::bucket::HistogramAggregation)
        buckets: BucketEntries<BucketEntry>,
    },
    /// This is the term result
    Terms {
        /// The buckets.
        ///
-        /// See [TermsAggregation](super::bucket::TermsAggregation)
+        /// See [`TermsAggregation`](super::bucket::TermsAggregation)
        buckets: Vec<BucketEntry>,
        /// The number of documents that didn’t make it into to TOP N due to shard_size or size
        sum_other_doc_count: u64,
@@ -132,9 +130,12 @@ pub enum BucketResult {
 }

 impl BucketResult {
-    pub(crate) fn empty_from_req(req: &BucketAggregationInternal) -> crate::Result<Self> {
+    pub(crate) fn empty_from_req(
+        req: &BucketAggregationInternal,
+        schema: &Schema,
+    ) -> crate::Result<Self> {
        let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg);
-        empty_bucket.into_final_bucket_result(req)
+        empty_bucket.into_final_bucket_result(req, schema)
    }
 }

@@ -146,7 +147,7 @@ pub enum BucketEntries<T> {
    /// Vector format bucket entries
    Vec(Vec<T>),
    /// HashMap format bucket entries
-    HashMap(FnvHashMap<String, T>),
+    HashMap(FxHashMap<String, T>),
 }

 /// This is the default entry for a bucket, which contains a key, count, and optionally
@@ -177,6 +178,9 @@ pub enum BucketEntries<T> {
 /// ```
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct BucketEntry {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    /// The string representation of the bucket.
+    pub key_as_string: Option<String>,
    /// The identifier of the bucket.
    pub key: Key,
    /// Number of documents in the bucket.
@@ -235,10 +239,16 @@ pub struct RangeBucketEntry {
    #[serde(flatten)]
    /// sub-aggregations in this bucket.
    pub sub_aggregation: AggregationResults,
-    /// The from range of the bucket. Equals f64::MIN when None.
+    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub from: Option<f64>,
-    /// The to range of the bucket. Equals f64::MAX when None.
+    /// The to range of the bucket. Equals `f64::MAX` when `None`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub to: Option<f64>,
+    /// The optional string representation for the `from` range.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub from_as_string: Option<String>,
+    /// The optional string representation for the `to` range.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub to_as_string: Option<String>,
 }
--- a/src/aggregation/bucket/histogram/histogram.rs
+++ b/src/aggregation/bucket/histogram/histogram.rs
@@ -1,6 +1,7 @@
 use std::cmp::Ordering;
 use std::fmt::Display;

+use fastfield_codecs::Column;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};

@@ -9,13 +10,12 @@ use crate::aggregation::agg_req_with_accessor::{
    AggregationsWithAccessor, BucketAggregationWithAccessor,
 };
 use crate::aggregation::agg_result::BucketEntry;
-use crate::aggregation::f64_from_fastfield_u64;
 use crate::aggregation::intermediate_agg_result::{
    IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry,
 };
 use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector;
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
-use crate::schema::Type;
+use crate::aggregation::{f64_from_fastfield_u64, format_date};
+use crate::schema::{Schema, Type};
 use crate::{DocId, TantivyError};

 /// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`.
@@ -37,14 +37,14 @@ use crate::{DocId, TantivyError};
 /// [hard_bounds](HistogramAggregation::hard_bounds).
 ///
 /// # Result
-/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
-/// [BucketEntry](crate::aggregation::agg_result::BucketEntry) on the
-/// AggregationCollector.
+/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
+/// [`BucketEntry`](crate::aggregation::agg_result::BucketEntry) on the
+/// `AggregationCollector`.
 ///
 /// Result type is
-/// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with
-/// [crate::aggregation::intermediate_agg_result::IntermediateHistogramBucketEntry] on the
-/// DistributedAggregationCollector.
+/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
+/// [`IntermediateHistogramBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateHistogramBucketEntry) on the
+/// `DistributedAggregationCollector`.
 ///
 /// # Limitations/Compatibility
 ///
@@ -61,7 +61,7 @@ use crate::{DocId, TantivyError};
 /// ```
 ///
 /// Response
-/// See [BucketEntry](crate::aggregation::agg_result::BucketEntry)
+/// See [`BucketEntry`](crate::aggregation::agg_result::BucketEntry)

 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 pub struct HistogramAggregation {
@@ -70,7 +70,7 @@ pub struct HistogramAggregation {
    /// The interval to chunk your data range. Each bucket spans a value range of [0..interval).
    /// Must be a positive value.
    pub interval: f64,
-    /// Intervals implicitely defines an absolute grid of buckets `[interval * k, interval * (k +
+    /// Intervals implicitly defines an absolute grid of buckets `[interval * k, interval * (k +
    /// 1))`.
    ///
    /// Offset makes it possible to shift this grid into
@@ -206,6 +206,7 @@ pub struct SegmentHistogramCollector {
    field_type: Type,
    interval: f64,
    offset: f64,
+    min_doc_count: u64,
    first_bucket_num: i64,
    bounds: HistogramBounds,
 }
@@ -215,6 +216,30 @@ impl SegmentHistogramCollector {
        self,
        agg_with_accessor: &BucketAggregationWithAccessor,
    ) -> crate::Result<IntermediateBucketResult> {
+        // Compute the number of buckets to validate against max num buckets
+        // Note: We use min_doc_count here, but it's only an lowerbound here, since were are on the
+        // intermediate level and after merging the number of documents of a bucket could exceed
+        // `min_doc_count`.
+        {
+            let cut_off_buckets_front = self
+                .buckets
+                .iter()
+                .take_while(|bucket| bucket.doc_count <= self.min_doc_count)
+                .count();
+            let cut_off_buckets_back = self.buckets[cut_off_buckets_front..]
+                .iter()
+                .rev()
+                .take_while(|bucket| bucket.doc_count <= self.min_doc_count)
+                .count();
+            let estimate_num_buckets =
+                self.buckets.len() - cut_off_buckets_front - cut_off_buckets_back;
+
+            agg_with_accessor
+                .bucket_count
+                .add_count(estimate_num_buckets as u32);
+            agg_with_accessor.bucket_count.validate_bucket_count()?;
+        }
+
        let mut buckets = Vec::with_capacity(
            self.buckets
                .iter()
@@ -251,11 +276,6 @@ impl SegmentHistogramCollector {
            );
        };

-        agg_with_accessor
-            .bucket_count
-            .add_count(buckets.len() as u32);
-        agg_with_accessor.bucket_count.validate_bucket_count()?;
-
        Ok(IntermediateBucketResult::Histogram { buckets })
    }

@@ -263,7 +283,7 @@ impl SegmentHistogramCollector {
        req: &HistogramAggregation,
        sub_aggregation: &AggregationsWithAccessor,
        field_type: Type,
-        accessor: &DynamicFastFieldReader<u64>,
+        accessor: &dyn Column<u64>,
    ) -> crate::Result<Self> {
        req.validate()?;
        let min = f64_from_fastfield_u64(accessor.min_value(), &field_type);
@@ -308,6 +328,7 @@ impl SegmentHistogramCollector {
            first_bucket_num,
            bounds,
            sub_aggregations,
+            min_doc_count: req.min_doc_count(),
        })
    }

@@ -331,10 +352,10 @@ impl SegmentHistogramCollector {
            .expect("unexpected fast field cardinatility");
        let mut iter = doc.chunks_exact(4);
        for docs in iter.by_ref() {
-            let val0 = self.f64_from_fastfield_u64(accessor.get(docs[0]));
-            let val1 = self.f64_from_fastfield_u64(accessor.get(docs[1]));
-            let val2 = self.f64_from_fastfield_u64(accessor.get(docs[2]));
-            let val3 = self.f64_from_fastfield_u64(accessor.get(docs[3]));
+            let val0 = self.f64_from_fastfield_u64(accessor.get_val(docs[0]));
+            let val1 = self.f64_from_fastfield_u64(accessor.get_val(docs[1]));
+            let val2 = self.f64_from_fastfield_u64(accessor.get_val(docs[2]));
+            let val3 = self.f64_from_fastfield_u64(accessor.get_val(docs[3]));

            let bucket_pos0 = get_bucket_num(val0);
            let bucket_pos1 = get_bucket_num(val1);
@@ -370,8 +391,8 @@ impl SegmentHistogramCollector {
                &bucket_with_accessor.sub_aggregation,
            )?;
        }
-        for doc in iter.remainder() {
-            let val = f64_from_fastfield_u64(accessor.get(*doc), &self.field_type);
+        for &doc in iter.remainder() {
+            let val = f64_from_fastfield_u64(accessor.get_val(doc), &self.field_type);
            if !bounds.contains(val) {
                continue;
            }
@@ -380,9 +401,9 @@ impl SegmentHistogramCollector {

            debug_assert_eq!(
                self.buckets[bucket_pos].key,
-                get_bucket_val(val, self.interval, self.offset) as f64
+                get_bucket_val(val, self.interval, self.offset)
            );
-            self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?;
+            self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
        }
        if force_flush {
            if let Some(sub_aggregations) = self.sub_aggregations.as_mut() {
@@ -407,7 +428,7 @@ impl SegmentHistogramCollector {
        if bounds.contains(val) {
            debug_assert_eq!(
                self.buckets[bucket_pos].key,
-                get_bucket_val(val, self.interval, self.offset) as f64
+                get_bucket_val(val, self.interval, self.offset)
            );

            self.increment_bucket(bucket_pos, doc, bucket_with_accessor)?;
@@ -425,7 +446,7 @@ impl SegmentHistogramCollector {
        let bucket = &mut self.buckets[bucket_pos];
        bucket.doc_count += 1;
        if let Some(sub_aggregation) = self.sub_aggregations.as_mut() {
-            (&mut sub_aggregation[bucket_pos]).collect(doc, bucket_with_accessor)?;
+            sub_aggregation[bucket_pos].collect(doc, bucket_with_accessor)?;
        }
        Ok(())
    }
@@ -451,8 +472,9 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
    buckets: Vec<IntermediateHistogramBucketEntry>,
    histogram_req: &HistogramAggregation,
    sub_aggregation: &AggregationsInternal,
+    schema: &Schema,
 ) -> crate::Result<Vec<BucketEntry>> {
-    // Generate the the full list of buckets without gaps.
+    // Generate the full list of buckets without gaps.
    //
    // The bounds are the min max from the current buckets, optionally extended by
    // extended_bounds from the request
@@ -491,7 +513,9 @@ fn intermediate_buckets_to_final_buckets_fill_gaps(
                sub_aggregation: empty_sub_aggregation.clone(),
            },
        })
-        .map(|intermediate_bucket| intermediate_bucket.into_final_bucket_entry(sub_aggregation))
+        .map(|intermediate_bucket| {
+            intermediate_bucket.into_final_bucket_entry(sub_aggregation, schema)
+        })
        .collect::<crate::Result<Vec<_>>>()
 }

@@ -500,25 +524,48 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets(
    buckets: Vec<IntermediateHistogramBucketEntry>,
    histogram_req: &HistogramAggregation,
    sub_aggregation: &AggregationsInternal,
+    schema: &Schema,
 ) -> crate::Result<Vec<BucketEntry>> {
-    if histogram_req.min_doc_count() == 0 {
+    let mut buckets = if histogram_req.min_doc_count() == 0 {
        // With min_doc_count != 0, we may need to add buckets, so that there are no
        // gaps, since intermediate result does not contain empty buckets (filtered to
        // reduce serialization size).

-        intermediate_buckets_to_final_buckets_fill_gaps(buckets, histogram_req, sub_aggregation)
+        intermediate_buckets_to_final_buckets_fill_gaps(
+            buckets,
+            histogram_req,
+            sub_aggregation,
+            schema,
+        )?
    } else {
        buckets
            .into_iter()
            .filter(|histogram_bucket| histogram_bucket.doc_count >= histogram_req.min_doc_count())
-            .map(|histogram_bucket| histogram_bucket.into_final_bucket_entry(sub_aggregation))
-            .collect::<crate::Result<Vec<_>>>()
+            .map(|histogram_bucket| {
+                histogram_bucket.into_final_bucket_entry(sub_aggregation, schema)
+            })
+            .collect::<crate::Result<Vec<_>>>()?
+    };
+
+    // If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339
+    let field = schema
+        .get_field(&histogram_req.field)
+        .ok_or_else(|| TantivyError::FieldNotFound(histogram_req.field.to_string()))?;
+    if schema.get_field_entry(field).field_type().is_date() {
+        for bucket in buckets.iter_mut() {
+            if let crate::aggregation::Key::F64(val) = bucket.key {
+                let key_as_string = format_date(val as i64)?;
+                bucket.key_as_string = Some(key_as_string);
+            }
+        }
    }
+
+    Ok(buckets)
 }

 /// Applies req extended_bounds/hard_bounds on the min_max value
 ///
-/// May return (f64::MAX, f64::MIN), if there is no range.
+/// May return `(f64::MAX, f64::MIN)`, if there is no range.
 fn get_req_min_max(req: &HistogramAggregation, min_max: Option<(f64, f64)>) -> (f64, f64) {
    let (mut min, mut max) = min_max.unwrap_or((f64::MAX, f64::MIN));

@@ -1372,6 +1419,63 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn histogram_date_test_single_segment() -> crate::Result<()> {
+        histogram_date_test_with_opt(true)
+    }
+
+    #[test]
+    fn histogram_date_test_multi_segment() -> crate::Result<()> {
+        histogram_date_test_with_opt(false)
+    }
+
+    fn histogram_date_test_with_opt(merge_segments: bool) -> crate::Result<()> {
+        let index = get_test_index_2_segments(merge_segments)?;
+
+        let agg_req: Aggregations = vec![(
+            "histogram".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
+                    field: "date".to_string(),
+                    interval: 86400000000.0, // one day in microseconds
+                    ..Default::default()
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let agg_res = exec_request(agg_req, &index)?;
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+
+        assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000.0);
+        assert_eq!(
+            res["histogram"]["buckets"][0]["key_as_string"],
+            "2019-01-01T00:00:00Z"
+        );
+        assert_eq!(res["histogram"]["buckets"][0]["doc_count"], 1);
+
+        assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000.0);
+        assert_eq!(
+            res["histogram"]["buckets"][1]["key_as_string"],
+            "2019-01-02T00:00:00Z"
+        );
+
+        assert_eq!(res["histogram"]["buckets"][1]["doc_count"], 5);
+
+        assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0);
+        assert_eq!(
+            res["histogram"]["buckets"][2]["key_as_string"],
+            "2019-01-03T00:00:00Z"
+        );
+
+        assert_eq!(res["histogram"]["buckets"][3], Value::Null);
+
+        Ok(())
+    }
+
    #[test]
    fn histogram_invalid_request() -> crate::Result<()> {
        let index = get_test_index_2_segments(true)?;
@@ -1438,4 +1542,36 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn histogram_test_max_buckets_segments() -> crate::Result<()> {
+        let values = vec![0.0, 70000.0];
+
+        let index = get_test_index_from_values(true, &values)?;
+
+        let agg_req: Aggregations = vec![(
+            "my_interval".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
+                    field: "score_f64".to_string(),
+                    interval: 1.0,
+                    ..Default::default()
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let res = exec_request(agg_req, &index);
+
+        assert_eq!(
+            res.unwrap_err().to_string(),
+            "An invalid argument was passed: 'Aborting aggregation because too many buckets were \
+             created'"
+                .to_string()
+        );
+
+        Ok(())
+    }
 }
--- a/src/aggregation/bucket/mod.rs
+++ b/src/aggregation/bucket/mod.rs
@@ -1,11 +1,11 @@
 //! Module for all bucket aggregations.
 //!
 //! BucketAggregations create buckets of documents
-//! [BucketAggregation](super::agg_req::BucketAggregation).
+//! [`BucketAggregation`](super::agg_req::BucketAggregation).
 //!
-//! Results of final buckets are [BucketResult](super::agg_result::BucketResult).
+//! Results of final buckets are [`BucketResult`](super::agg_result::BucketResult).
 //! Results of intermediate buckets are
-//! [IntermediateBucketResult](super::intermediate_agg_result::IntermediateBucketResult)
+//! [`IntermediateBucketResult`](super::intermediate_agg_result::IntermediateBucketResult)

 mod histogram;
 mod range;
--- a/src/aggregation/bucket/range.rs
+++ b/src/aggregation/bucket/range.rs
@@ -1,7 +1,8 @@
 use std::fmt::Debug;
 use std::ops::Range;

-use fnv::FnvHashMap;
+use fastfield_codecs::MonotonicallyMappableToU64;
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use crate::aggregation::agg_req_with_accessor::{
@@ -11,8 +12,9 @@ use crate::aggregation::intermediate_agg_result::{
    IntermediateBucketResult, IntermediateRangeBucketEntry, IntermediateRangeBucketResult,
 };
 use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector};
-use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey};
-use crate::fastfield::FastFieldReader;
+use crate::aggregation::{
+    f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey,
+};
 use crate::schema::Type;
 use crate::{DocId, TantivyError};

@@ -23,14 +25,14 @@ use crate::{DocId, TantivyError};
 /// against each bucket range. Note that this aggregation includes the from value and excludes the
 /// to value for each range.
 ///
-/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
-/// [RangeBucketEntry](crate::aggregation::agg_result::RangeBucketEntry) on the
-/// AggregationCollector.
+/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
+/// [`RangeBucketEntry`](crate::aggregation::agg_result::RangeBucketEntry) on the
+/// `AggregationCollector`.
 ///
 /// Result type is
-/// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with
-/// [crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry] on the
-/// DistributedAggregationCollector.
+/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
+/// [`IntermediateRangeBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateRangeBucketEntry) on the
+/// `DistributedAggregationCollector`.
 ///
 /// # Limitations/Compatibility
 /// Overlapping ranges are not yet supported.
@@ -68,11 +70,11 @@ pub struct RangeAggregationRange {
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub key: Option<String>,
    /// The from range value, which is inclusive in the range.
-    /// None equals to an open ended interval.
+    /// `None` equals to an open ended interval.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub from: Option<f64>,
    /// The to range value, which is not inclusive in the range.
-    /// None equals to an open ended interval.
+    /// `None` equals to an open ended interval.
    #[serde(skip_serializing_if = "Option::is_none", default)]
    pub to: Option<f64>,
 }
@@ -102,7 +104,7 @@ impl From<Range<f64>> for RangeAggregationRange {
 pub(crate) struct InternalRangeAggregationRange {
    /// Custom key for the range bucket
    key: Option<String>,
-    /// u64 range value
+    /// `u64` range value
    range: Range<u64>,
 }

@@ -132,9 +134,9 @@ pub(crate) struct SegmentRangeBucketEntry {
    pub key: Key,
    pub doc_count: u64,
    pub sub_aggregation: Option<SegmentAggregationResultsCollector>,
-    /// The from range of the bucket. Equals f64::MIN when None.
+    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    pub from: Option<f64>,
-    /// The to range of the bucket. Equals f64::MAX when None. Open interval, `to` is not
+    /// The to range of the bucket. Equals `f64::MAX` when `None`. Open interval, `to` is not
    /// inclusive.
    pub to: Option<f64>,
 }
@@ -177,12 +179,12 @@ impl SegmentRangeCollector {
    ) -> crate::Result<IntermediateBucketResult> {
        let field_type = self.field_type;

-        let buckets: FnvHashMap<SerializedKey, IntermediateRangeBucketEntry> = self
+        let buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry> = self
            .buckets
            .into_iter()
            .map(move |range_bucket| {
                Ok((
-                    range_to_string(&range_bucket.range, &field_type),
+                    range_to_string(&range_bucket.range, &field_type)?,
                    range_bucket
                        .bucket
                        .into_intermediate_bucket_entry(&agg_with_accessor.sub_aggregation)?,
@@ -210,8 +212,8 @@ impl SegmentRangeCollector {
                let key = range
                    .key
                    .clone()
-                    .map(|key| Key::Str(key))
-                    .unwrap_or(range_to_key(&range.range, &field_type));
+                    .map(|key| Ok(Key::Str(key)))
+                    .unwrap_or_else(|| range_to_key(&range.range, &field_type))?;
                let to = if range.range.end == u64::MAX {
                    None
                } else {
@@ -229,6 +231,7 @@ impl SegmentRangeCollector {
                        sub_aggregation,
                    )?)
                };
+
                Ok(SegmentRangeAndBucketEntry {
                    range: range.range.clone(),
                    bucket: SegmentRangeBucketEntry {
@@ -262,12 +265,12 @@ impl SegmentRangeCollector {
        let accessor = bucket_with_accessor
            .accessor
            .as_single()
-            .expect("unexpected fast field cardinatility");
+            .expect("unexpected fast field cardinality");
        for docs in iter.by_ref() {
-            let val1 = accessor.get(docs[0]);
-            let val2 = accessor.get(docs[1]);
-            let val3 = accessor.get(docs[2]);
-            let val4 = accessor.get(docs[3]);
+            let val1 = accessor.get_val(docs[0]);
+            let val2 = accessor.get_val(docs[1]);
+            let val3 = accessor.get_val(docs[2]);
+            let val4 = accessor.get_val(docs[3]);
            let bucket_pos1 = self.get_bucket_pos(val1);
            let bucket_pos2 = self.get_bucket_pos(val2);
            let bucket_pos3 = self.get_bucket_pos(val3);
@@ -278,10 +281,10 @@ impl SegmentRangeCollector {
            self.increment_bucket(bucket_pos3, docs[2], &bucket_with_accessor.sub_aggregation)?;
            self.increment_bucket(bucket_pos4, docs[3], &bucket_with_accessor.sub_aggregation)?;
        }
-        for doc in iter.remainder() {
-            let val = accessor.get(*doc);
+        for &doc in iter.remainder() {
+            let val = accessor.get_val(doc);
            let bucket_pos = self.get_bucket_pos(val);
-            self.increment_bucket(bucket_pos, *doc, &bucket_with_accessor.sub_aggregation)?;
+            self.increment_bucket(bucket_pos, doc, &bucket_with_accessor.sub_aggregation)?;
        }
        if force_flush {
            for bucket in &mut self.buckets {
@@ -324,8 +327,8 @@ impl SegmentRangeCollector {
 /// Converts the user provided f64 range value to fast field value space.
 ///
 /// Internally fast field values are always stored as u64.
-/// If the fast field has u64 [1,2,5], these values are stored as is in the fast field.
-/// A fast field with f64 [1.0, 2.0, 5.0] is converted to u64 space, using a
+/// If the fast field has u64 `[1, 2, 5]`, these values are stored as is in the fast field.
+/// A fast field with f64 `[1.0, 2.0, 5.0]` is converted to u64 space, using a
 /// monotonic mapping function, so the order is preserved.
 ///
 /// Consequently, a f64 user range 1.0..3.0 needs to be converted to fast field value space using
@@ -403,33 +406,45 @@ fn extend_validate_ranges(
    Ok(converted_buckets)
 }

-pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> String {
+pub(crate) fn range_to_string(range: &Range<u64>, field_type: &Type) -> crate::Result<String> {
    // is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0,
    // it should be rendered as "*-0" and not "*-*"
    let to_str = |val: u64, is_start: bool| {
        if (is_start && val == u64::MIN) || (!is_start && val == u64::MAX) {
-            "*".to_string()
+            Ok("*".to_string())
+        } else if *field_type == Type::Date {
+            let val = i64::from_u64(val);
+            format_date(val)
        } else {
-            f64_from_fastfield_u64(val, field_type).to_string()
+            Ok(f64_from_fastfield_u64(val, field_type).to_string())
        }
    };

-    format!("{}-{}", to_str(range.start, true), to_str(range.end, false))
+    Ok(format!(
+        "{}-{}",
+        to_str(range.start, true)?,
+        to_str(range.end, false)?
+    ))
 }

-pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> Key {
-    Key::Str(range_to_string(range, field_type))
+pub(crate) fn range_to_key(range: &Range<u64>, field_type: &Type) -> crate::Result<Key> {
+    Ok(Key::Str(range_to_string(range, field_type)?))
 }

 #[cfg(test)]
 mod tests {

+    use fastfield_codecs::MonotonicallyMappableToU64;
+    use serde_json::Value;
+
    use super::*;
    use crate::aggregation::agg_req::{
        Aggregation, Aggregations, BucketAggregation, BucketAggregationType,
    };
-    use crate::aggregation::tests::{exec_request_with_query, get_test_index_with_num_docs};
-    use crate::fastfield::FastValue;
+    use crate::aggregation::tests::{
+        exec_request, exec_request_with_query, get_test_index_2_segments,
+        get_test_index_with_num_docs,
+    };

    pub fn get_collector_from_ranges(
        ranges: Vec<RangeAggregationRange>,
@@ -567,6 +582,77 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn range_date_test_single_segment() -> crate::Result<()> {
+        range_date_test_with_opt(true)
+    }
+
+    #[test]
+    fn range_date_test_multi_segment() -> crate::Result<()> {
+        range_date_test_with_opt(false)
+    }
+
+    fn range_date_test_with_opt(merge_segments: bool) -> crate::Result<()> {
+        let index = get_test_index_2_segments(merge_segments)?;
+
+        let agg_req: Aggregations = vec![(
+            "date_ranges".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Range(RangeAggregation {
+                    field: "date".to_string(),
+                    ranges: vec![
+                        RangeAggregationRange {
+                            key: None,
+                            from: None,
+                            to: Some(1546300800000000.0f64),
+                        },
+                        RangeAggregationRange {
+                            key: None,
+                            from: Some(1546300800000000.0f64),
+                            to: Some(1546387200000000.0f64),
+                        },
+                    ],
+                    keyed: false,
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let agg_res = exec_request(agg_req, &index)?;
+
+        let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?;
+
+        assert_eq!(
+            res["date_ranges"]["buckets"][0]["from_as_string"],
+            Value::Null
+        );
+        assert_eq!(
+            res["date_ranges"]["buckets"][0]["key"],
+            "*-2019-01-01T00:00:00Z"
+        );
+        assert_eq!(
+            res["date_ranges"]["buckets"][1]["from_as_string"],
+            "2019-01-01T00:00:00Z"
+        );
+        assert_eq!(
+            res["date_ranges"]["buckets"][1]["to_as_string"],
+            "2019-01-02T00:00:00Z"
+        );
+
+        assert_eq!(
+            res["date_ranges"]["buckets"][2]["from_as_string"],
+            "2019-01-02T00:00:00Z"
+        );
+        assert_eq!(
+            res["date_ranges"]["buckets"][2]["to_as_string"],
+            Value::Null
+        );
+
+        Ok(())
+    }
+
    #[test]
    fn range_custom_key_keyed_buckets_test() -> crate::Result<()> {
        let index = get_test_index_with_num_docs(false, 100)?;
--- a/src/aggregation/bucket/term_agg.rs
+++ b/src/aggregation/bucket/term_agg.rs
@@ -1,7 +1,7 @@
 use std::fmt::Debug;

-use fnv::FnvHashMap;
 use itertools::Itertools;
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use super::{CustomOrder, Order, OrderTarget};
@@ -17,7 +17,11 @@ use crate::fastfield::MultiValuedFastFieldReader;
 use crate::schema::Type;
 use crate::{DocId, TantivyError};

-/// Creates a bucket for every unique term
+/// Creates a bucket for every unique term and counts the number of occurences.
+/// Note that doc_count in the response buckets equals term count here.
+///
+/// If the text is untokenized and single value, that means one term per document and therefore it
+/// is in fact doc count.
 ///
 /// ### Terminology
 /// Shard parameters are supposed to be equivalent to elasticsearch shard parameter.
@@ -31,7 +35,7 @@ use crate::{DocId, TantivyError};
 ///
 /// Even with a larger `segment_size` value, doc_count values for a terms aggregation may be
 /// approximate. As a result, any sub-aggregations on the terms aggregation may also be approximate.
-/// `sum_other_doc_count` is the number of documents that didn’t make it into the the top size
+/// `sum_other_doc_count` is the number of documents that didn’t make it into the top size
 /// terms. If this is greater than 0, you can be sure that the terms agg had to throw away some
 /// buckets, either because they didn’t fit into size on the root node or they didn’t fit into
 /// `segment_size` on the segment node.
@@ -42,14 +46,14 @@ use crate::{DocId, TantivyError};
 /// each segment. It’s the sum of the size of the largest bucket on each segment that didn’t fit
 /// into segment_size.
 ///
-/// Result type is [BucketResult](crate::aggregation::agg_result::BucketResult) with
-/// [TermBucketEntry](crate::aggregation::agg_result::BucketEntry) on the
-/// AggregationCollector.
+/// Result type is [`BucketResult`](crate::aggregation::agg_result::BucketResult) with
+/// [`TermBucketEntry`](crate::aggregation::agg_result::BucketEntry) on the
+/// `AggregationCollector`.
 ///
 /// Result type is
-/// [crate::aggregation::intermediate_agg_result::IntermediateBucketResult] with
-/// [crate::aggregation::intermediate_agg_result::IntermediateTermBucketEntry] on the
-/// DistributedAggregationCollector.
+/// [`IntermediateBucketResult`](crate::aggregation::intermediate_agg_result::IntermediateBucketResult) with
+/// [`IntermediateTermBucketEntry`](crate::aggregation::intermediate_agg_result::IntermediateTermBucketEntry) on the
+/// `DistributedAggregationCollector`.
 ///
 /// # Limitations/Compatibility
 ///
@@ -64,6 +68,25 @@ use crate::{DocId, TantivyError};
 ///     }
 /// }
 /// ```
+///
+/// /// # Response JSON Format
+/// ```json
+/// {
+///     ...
+///     "aggregations": {
+///         "genres": {
+///             "doc_count_error_upper_bound": 0,   
+///             "sum_other_doc_count": 0,           
+///             "buckets": [                        
+///                 { "key": "drumnbass", "doc_count": 6 },
+///                 { "key": "raggae", "doc_count": 4 },
+///                 { "key": "jazz", "doc_count": 2 }
+///             ]
+///         }
+///     }
+/// }
+/// ```
+
 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 pub struct TermsAggregation {
    /// The field to aggregate on.
@@ -110,8 +133,8 @@ pub struct TermsAggregation {
    /// Set the order. `String` is here a target, which is either "_count", "_key", or the name of
    /// a metric sub_aggregation.
    ///
-    /// Single value metrics like average can be adressed by its name.
-    /// Multi value metrics like stats are required to adress their field by name e.g.
+    /// Single value metrics like average can be addressed by its name.
+    /// Multi value metrics like stats are required to address their field by name e.g.
    /// "stats.avg"
    ///
    /// Examples in JSON format:
@@ -176,7 +199,7 @@ impl TermsAggregationInternal {
 #[derive(Clone, Debug, PartialEq)]
 /// Container to store term_ids and their buckets.
 struct TermBuckets {
-    pub(crate) entries: FnvHashMap<u32, TermBucketEntry>,
+    pub(crate) entries: FxHashMap<u32, TermBucketEntry>,
    blueprint: Option<SegmentAggregationResultsCollector>,
 }

@@ -374,7 +397,7 @@ impl SegmentTermCollector {
            .expect("internal error: inverted index not loaded for term aggregation");
        let term_dict = inverted_index.terms();

-        let mut dict: FnvHashMap<String, IntermediateTermBucketEntry> = Default::default();
+        let mut dict: FxHashMap<String, IntermediateTermBucketEntry> = Default::default();
        let mut buffer = vec![];
        for (term_id, entry) in entries {
            term_dict
@@ -1106,9 +1129,9 @@ mod tests {

        assert_eq!(res["my_texts"]["buckets"][0]["key"], "terma");
        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 4);
-        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termb");
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "termc");
        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 0);
-        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termc");
+        assert_eq!(res["my_texts"]["buckets"][2]["key"], "termb");
        assert_eq!(res["my_texts"]["buckets"][2]["doc_count"], 0);
        assert_eq!(res["my_texts"]["sum_other_doc_count"], 0);
        assert_eq!(res["my_texts"]["doc_count_error_upper_bound"], 0);
@@ -1206,11 +1229,43 @@ mod tests {
        .collect();

        let res = exec_request_with_query(agg_req, &index, None);
+
        assert!(res.is_err());

        Ok(())
    }

+    #[test]
+    fn terms_aggregation_multi_token_per_doc() -> crate::Result<()> {
+        let terms = vec!["Hello Hello", "Hallo Hallo"];
+
+        let index = get_test_index_from_terms(true, &[terms])?;
+
+        let agg_req: Aggregations = vec![(
+            "my_texts".to_string(),
+            Aggregation::Bucket(BucketAggregation {
+                bucket_agg: BucketAggregationType::Terms(TermsAggregation {
+                    field: "text_id".to_string(),
+                    min_doc_count: Some(0),
+                    ..Default::default()
+                }),
+                sub_aggregation: Default::default(),
+            }),
+        )]
+        .into_iter()
+        .collect();
+
+        let res = exec_request_with_query(agg_req, &index, None).unwrap();
+
+        assert_eq!(res["my_texts"]["buckets"][0]["key"], "hello");
+        assert_eq!(res["my_texts"]["buckets"][0]["doc_count"], 2);
+
+        assert_eq!(res["my_texts"]["buckets"][1]["key"], "hallo");
+        assert_eq!(res["my_texts"]["buckets"][1]["doc_count"], 2);
+
+        Ok(())
+    }
+
    #[test]
    fn test_json_format() -> crate::Result<()> {
        let agg_req: Aggregations = vec![(
--- a/src/aggregation/collector.rs
+++ b/src/aggregation/collector.rs
@@ -7,6 +7,7 @@ use super::intermediate_agg_result::IntermediateAggregationResults;
 use super::segment_agg_result::SegmentAggregationResultsCollector;
 use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate;
 use crate::collector::{Collector, SegmentCollector};
+use crate::schema::Schema;
 use crate::{SegmentReader, TantivyError};

 /// The default max bucket count, before the aggregation fails.
@@ -16,6 +17,7 @@ pub const MAX_BUCKET_COUNT: u32 = 65000;
 ///
 /// The collector collects all aggregations by the underlying aggregation request.
 pub struct AggregationCollector {
+    schema: Schema,
    agg: Aggregations,
    max_bucket_count: u32,
 }
@@ -25,8 +27,9 @@ impl AggregationCollector {
    ///
    /// Aggregation fails when the total bucket count is higher than max_bucket_count.
    /// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset
-    pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>) -> Self {
+    pub fn from_aggs(agg: Aggregations, max_bucket_count: Option<u32>, schema: Schema) -> Self {
        Self {
+            schema,
            agg,
            max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT),
        }
@@ -39,7 +42,7 @@ impl AggregationCollector {
 ///
 /// # Purpose
 /// AggregationCollector returns `IntermediateAggregationResults` and not the final
-/// `AggregationResults`, so that results from differenct indices can be merged and then converted
+/// `AggregationResults`, so that results from different indices can be merged and then converted
 /// into the final `AggregationResults` via the `into_final_result()` method.
 pub struct DistributedAggregationCollector {
    agg: Aggregations,
@@ -113,7 +116,7 @@ impl Collector for AggregationCollector {
        segment_fruits: Vec<<Self::Child as SegmentCollector>::Fruit>,
    ) -> crate::Result<Self::Fruit> {
        let res = merge_fruits(segment_fruits)?;
-        res.into_final_bucket_result(self.agg.clone())
+        res.into_final_bucket_result(self.agg.clone(), &self.schema)
    }
 }

@@ -131,7 +134,7 @@ fn merge_fruits(
    }
 }

-/// AggregationSegmentCollector does the aggregation collection on a segment.
+/// `AggregationSegmentCollector` does the aggregation collection on a segment.
 pub struct AggregationSegmentCollector {
    aggs_with_accessor: AggregationsWithAccessor,
    result: SegmentAggregationResultsCollector,
@@ -139,8 +142,8 @@ pub struct AggregationSegmentCollector {
 }

 impl AggregationSegmentCollector {
-    /// Creates an AggregationSegmentCollector from an [Aggregations] request and a segment reader.
-    /// Also includes validation, e.g. checking field types and existence.
+    /// Creates an `AggregationSegmentCollector from` an [`Aggregations`] request and a segment
+    /// reader. Also includes validation, e.g. checking field types and existence.
    pub fn from_agg_req_and_reader(
        agg: &Aggregations,
        reader: &SegmentReader,
--- a/src/aggregation/date.rs
+++ b/src/aggregation/date.rs
@@ -0,0 +1,18 @@
+use time::format_description::well_known::Rfc3339;
+use time::OffsetDateTime;
+
+use crate::TantivyError;
+
+pub(crate) fn format_date(val: i64) -> crate::Result<String> {
+    let datetime =
+        OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)).map_err(|err| {
+            TantivyError::InvalidArgument(format!(
+                "Could not convert {:?} to OffsetDateTime, err {:?}",
+                val, err
+            ))
+        })?;
+    let key_as_string = datetime
+        .format(&Rfc3339)
+        .map_err(|_err| TantivyError::InvalidArgument("Could not serialize date".to_string()))?;
+    Ok(key_as_string)
+}
--- a/src/aggregation/intermediate_agg_result.rs
+++ b/src/aggregation/intermediate_agg_result.rs
@@ -3,15 +3,14 @@
 //! indices.

 use std::cmp::Ordering;
-use std::collections::HashMap;

-use fnv::FnvHashMap;
 use itertools::Itertools;
+use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};

 use super::agg_req::{
    Aggregations, AggregationsInternal, BucketAggregationInternal, BucketAggregationType,
-    MetricAggregation,
+    MetricAggregation, RangeAggregation,
 };
 use super::agg_result::{AggregationResult, BucketResult, RangeBucketEntry};
 use super::bucket::{
@@ -20,9 +19,11 @@ use super::bucket::{
 };
 use super::metric::{IntermediateAverage, IntermediateStats};
 use super::segment_agg_result::SegmentMetricResultCollector;
-use super::{Key, SerializedKey, VecWithNames};
+use super::{format_date, Key, SerializedKey, VecWithNames};
 use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry};
 use crate::aggregation::bucket::TermsAggregationInternal;
+use crate::schema::Schema;
+use crate::TantivyError;

 /// Contains the intermediate aggregation result, which is optimized to be merged with other
 /// intermediate results.
@@ -36,29 +37,34 @@ pub struct IntermediateAggregationResults {

 impl IntermediateAggregationResults {
    /// Convert intermediate result and its aggregation request to the final result.
-    pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result<AggregationResults> {
-        self.into_final_bucket_result_internal(&(req.into()))
+    pub fn into_final_bucket_result(
+        self,
+        req: Aggregations,
+        schema: &Schema,
+    ) -> crate::Result<AggregationResults> {
+        self.into_final_bucket_result_internal(&(req.into()), schema)
    }

    /// Convert intermediate result and its aggregation request to the final result.
    ///
    /// Internal function, AggregationsInternal is used instead Aggregations, which is optimized
-    /// for internal processing, by splitting metric and buckets into seperate groups.
+    /// for internal processing, by splitting metric and buckets into separate groups.
    pub(crate) fn into_final_bucket_result_internal(
        self,
        req: &AggregationsInternal,
+        schema: &Schema,
    ) -> crate::Result<AggregationResults> {
        // Important assumption:
        // When the tree contains buckets/metric, we expect it to have all buckets/metrics from the
        // request
-        let mut results: HashMap<String, AggregationResult> = HashMap::new();
+        let mut results: FxHashMap<String, AggregationResult> = FxHashMap::default();

        if let Some(buckets) = self.buckets {
-            convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets)?
+            convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets, schema)?
        } else {
            // When there are no buckets, we create empty buckets, so that the serialized json
            // format is constant
-            add_empty_final_buckets_to_result(&mut results, &req.buckets)?
+            add_empty_final_buckets_to_result(&mut results, &req.buckets, schema)?
        };

        if let Some(metrics) = self.metrics {
@@ -108,10 +114,10 @@ impl IntermediateAggregationResults {
        Self { metrics, buckets }
    }

-    /// Merge an other intermediate aggregation result into this result.
+    /// Merge another intermediate aggregation result into this result.
    ///
    /// The order of the values need to be the same on both results. This is ensured when the same
-    /// (key values) are present on the underlying VecWithNames struct.
+    /// (key values) are present on the underlying `VecWithNames` struct.
    pub fn merge_fruits(&mut self, other: IntermediateAggregationResults) {
        if let (Some(buckets_left), Some(buckets_right)) = (&mut self.buckets, other.buckets) {
            for (bucket_left, bucket_right) in
@@ -132,7 +138,7 @@ impl IntermediateAggregationResults {
 }

 fn convert_and_add_final_metrics_to_result(
-    results: &mut HashMap<String, AggregationResult>,
+    results: &mut FxHashMap<String, AggregationResult>,
    metrics: VecWithNames<IntermediateMetricResult>,
 ) {
    results.extend(
@@ -143,7 +149,7 @@ fn convert_and_add_final_metrics_to_result(
 }

 fn add_empty_final_metrics_to_result(
-    results: &mut HashMap<String, AggregationResult>,
+    results: &mut FxHashMap<String, AggregationResult>,
    req_metrics: &VecWithNames<MetricAggregation>,
 ) -> crate::Result<()> {
    results.extend(req_metrics.iter().map(|(key, req)| {
@@ -157,27 +163,30 @@ fn add_empty_final_metrics_to_result(
 }

 fn add_empty_final_buckets_to_result(
-    results: &mut HashMap<String, AggregationResult>,
+    results: &mut FxHashMap<String, AggregationResult>,
    req_buckets: &VecWithNames<BucketAggregationInternal>,
+    schema: &Schema,
 ) -> crate::Result<()> {
    let requested_buckets = req_buckets.iter();
    for (key, req) in requested_buckets {
-        let empty_bucket = AggregationResult::BucketResult(BucketResult::empty_from_req(req)?);
+        let empty_bucket =
+            AggregationResult::BucketResult(BucketResult::empty_from_req(req, schema)?);
        results.insert(key.to_string(), empty_bucket);
    }
    Ok(())
 }

 fn convert_and_add_final_buckets_to_result(
-    results: &mut HashMap<String, AggregationResult>,
+    results: &mut FxHashMap<String, AggregationResult>,
    buckets: VecWithNames<IntermediateBucketResult>,
    req_buckets: &VecWithNames<BucketAggregationInternal>,
+    schema: &Schema,
 ) -> crate::Result<()> {
    assert_eq!(buckets.len(), req_buckets.len());

    let buckets_with_request = buckets.into_iter().zip(req_buckets.values());
    for ((key, bucket), req) in buckets_with_request {
-        let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req)?);
+        let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req, schema)?);
        results.insert(key, result);
    }
    Ok(())
@@ -267,13 +276,21 @@ impl IntermediateBucketResult {
    pub(crate) fn into_final_bucket_result(
        self,
        req: &BucketAggregationInternal,
+        schema: &Schema,
    ) -> crate::Result<BucketResult> {
        match self {
            IntermediateBucketResult::Range(range_res) => {
                let mut buckets: Vec<RangeBucketEntry> = range_res
                    .buckets
-                    .into_iter()
-                    .map(|(_, bucket)| bucket.into_final_bucket_entry(&req.sub_aggregation))
+                    .into_values()
+                    .map(|bucket| {
+                        bucket.into_final_bucket_entry(
+                            &req.sub_aggregation,
+                            schema,
+                            req.as_range()
+                                .expect("unexpected aggregation, expected histogram aggregation"),
+                        )
+                    })
                    .collect::<crate::Result<Vec<_>>>()?;

                buckets.sort_by(|left, right| {
@@ -288,7 +305,7 @@ impl IntermediateBucketResult {
                    .keyed;
                let buckets = if is_keyed {
                    let mut bucket_map =
-                        FnvHashMap::with_capacity_and_hasher(buckets.len(), Default::default());
+                        FxHashMap::with_capacity_and_hasher(buckets.len(), Default::default());
                    for bucket in buckets {
                        bucket_map.insert(bucket.key.to_string(), bucket);
                    }
@@ -304,11 +321,12 @@ impl IntermediateBucketResult {
                    req.as_histogram()
                        .expect("unexpected aggregation, expected histogram aggregation"),
                    &req.sub_aggregation,
+                    schema,
                )?;

                let buckets = if req.as_histogram().unwrap().keyed {
                    let mut bucket_map =
-                        FnvHashMap::with_capacity_and_hasher(buckets.len(), Default::default());
+                        FxHashMap::with_capacity_and_hasher(buckets.len(), Default::default());
                    for bucket in buckets {
                        bucket_map.insert(bucket.key.to_string(), bucket);
                    }
@@ -322,6 +340,7 @@ impl IntermediateBucketResult {
                req.as_term()
                    .expect("unexpected aggregation, expected term aggregation"),
                &req.sub_aggregation,
+                schema,
            ),
        }
    }
@@ -396,13 +415,13 @@ impl IntermediateBucketResult {
 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// Range aggregation including error counts
 pub struct IntermediateRangeBucketResult {
-    pub(crate) buckets: FnvHashMap<SerializedKey, IntermediateRangeBucketEntry>,
+    pub(crate) buckets: FxHashMap<SerializedKey, IntermediateRangeBucketEntry>,
 }

 #[derive(Default, Clone, Debug, PartialEq, Serialize, Deserialize)]
 /// Term aggregation including error counts
 pub struct IntermediateTermBucketResult {
-    pub(crate) entries: FnvHashMap<String, IntermediateTermBucketEntry>,
+    pub(crate) entries: FxHashMap<String, IntermediateTermBucketEntry>,
    pub(crate) sum_other_doc_count: u64,
    pub(crate) doc_count_error_upper_bound: u64,
 }
@@ -412,6 +431,7 @@ impl IntermediateTermBucketResult {
        self,
        req: &TermsAggregation,
        sub_aggregation_req: &AggregationsInternal,
+        schema: &Schema,
    ) -> crate::Result<BucketResult> {
        let req = TermsAggregationInternal::from_req(req);
        let mut buckets: Vec<BucketEntry> = self
@@ -420,11 +440,12 @@ impl IntermediateTermBucketResult {
            .filter(|bucket| bucket.1.doc_count >= req.min_doc_count)
            .map(|(key, entry)| {
                Ok(BucketEntry {
+                    key_as_string: None,
                    key: Key::Str(key),
                    doc_count: entry.doc_count,
                    sub_aggregation: entry
                        .sub_aggregation
-                        .into_final_bucket_result_internal(sub_aggregation_req)?,
+                        .into_final_bucket_result_internal(sub_aggregation_req, schema)?,
                })
            })
            .collect::<crate::Result<_>>()?;
@@ -499,8 +520,8 @@ trait MergeFruits {
 }

 fn merge_maps<V: MergeFruits + Clone>(
-    entries_left: &mut FnvHashMap<SerializedKey, V>,
-    mut entries_right: FnvHashMap<SerializedKey, V>,
+    entries_left: &mut FxHashMap<SerializedKey, V>,
+    mut entries_right: FxHashMap<SerializedKey, V>,
 ) {
    for (name, entry_left) in entries_left.iter_mut() {
        if let Some(entry_right) = entries_right.remove(name) {
@@ -529,13 +550,15 @@ impl IntermediateHistogramBucketEntry {
    pub(crate) fn into_final_bucket_entry(
        self,
        req: &AggregationsInternal,
+        schema: &Schema,
    ) -> crate::Result<BucketEntry> {
        Ok(BucketEntry {
+            key_as_string: None,
            key: Key::F64(self.key),
            doc_count: self.doc_count,
            sub_aggregation: self
                .sub_aggregation
-                .into_final_bucket_result_internal(req)?,
+                .into_final_bucket_result_internal(req, schema)?,
        })
    }
 }
@@ -560,10 +583,10 @@ pub struct IntermediateRangeBucketEntry {
    pub doc_count: u64,
    /// The sub_aggregation in this bucket.
    pub sub_aggregation: IntermediateAggregationResults,
-    /// The from range of the bucket. Equals f64::MIN when None.
+    /// The from range of the bucket. Equals `f64::MIN` when `None`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub from: Option<f64>,
-    /// The to range of the bucket. Equals f64::MAX when None.
+    /// The to range of the bucket. Equals `f64::MAX` when `None`.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub to: Option<f64>,
 }
@@ -572,16 +595,38 @@ impl IntermediateRangeBucketEntry {
    pub(crate) fn into_final_bucket_entry(
        self,
        req: &AggregationsInternal,
+        schema: &Schema,
+        range_req: &RangeAggregation,
    ) -> crate::Result<RangeBucketEntry> {
-        Ok(RangeBucketEntry {
+        let mut range_bucket_entry = RangeBucketEntry {
            key: self.key,
            doc_count: self.doc_count,
            sub_aggregation: self
                .sub_aggregation
-                .into_final_bucket_result_internal(req)?,
+                .into_final_bucket_result_internal(req, schema)?,
            to: self.to,
            from: self.from,
-        })
+            to_as_string: None,
+            from_as_string: None,
+        };
+
+        // If we have a date type on the histogram buckets, we add the `key_as_string` field as
+        // rfc339
+        let field = schema
+            .get_field(&range_req.field)
+            .ok_or_else(|| TantivyError::FieldNotFound(range_req.field.to_string()))?;
+        if schema.get_field_entry(field).field_type().is_date() {
+            if let Some(val) = range_bucket_entry.to {
+                let key_as_string = format_date(val as i64)?;
+                range_bucket_entry.to_as_string = Some(key_as_string);
+            }
+            if let Some(val) = range_bucket_entry.from {
+                let key_as_string = format_date(val as i64)?;
+                range_bucket_entry.from_as_string = Some(key_as_string);
+            }
+        }
+
+        Ok(range_bucket_entry)
    }
 }

@@ -626,7 +671,7 @@ mod tests {

    fn get_sub_test_tree(data: &[(String, u64)]) -> IntermediateAggregationResults {
        let mut map = HashMap::new();
-        let mut buckets = FnvHashMap::default();
+        let mut buckets = FxHashMap::default();
        for (key, doc_count) in data {
            buckets.insert(
                key.to_string(),
@@ -653,7 +698,7 @@ mod tests {
        data: &[(String, u64, String, u64)],
    ) -> IntermediateAggregationResults {
        let mut map = HashMap::new();
-        let mut buckets: FnvHashMap<_, _> = Default::default();
+        let mut buckets: FxHashMap<_, _> = Default::default();
        for (key, doc_count, sub_aggregation_key, sub_aggregation_count) in data {
            buckets.insert(
                key.to_string(),
--- a/src/aggregation/metric/average.rs
+++ b/src/aggregation/metric/average.rs
@@ -1,9 +1,9 @@
 use std::fmt::Debug;

+use fastfield_codecs::Column;
 use serde::{Deserialize, Serialize};

 use crate::aggregation::f64_from_fastfield_u64;
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
 use crate::schema::Type;
 use crate::DocId;

@@ -57,13 +57,13 @@ impl SegmentAverageCollector {
            data: Default::default(),
        }
    }
-    pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
+    pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
        let mut iter = doc.chunks_exact(4);
        for docs in iter.by_ref() {
-            let val1 = field.get(docs[0]);
-            let val2 = field.get(docs[1]);
-            let val3 = field.get(docs[2]);
-            let val4 = field.get(docs[3]);
+            let val1 = field.get_val(docs[0]);
+            let val2 = field.get_val(docs[1]);
+            let val3 = field.get_val(docs[2]);
+            let val4 = field.get_val(docs[3]);
            let val1 = f64_from_fastfield_u64(val1, &self.field_type);
            let val2 = f64_from_fastfield_u64(val2, &self.field_type);
            let val3 = f64_from_fastfield_u64(val3, &self.field_type);
@@ -73,8 +73,8 @@ impl SegmentAverageCollector {
            self.data.collect(val3);
            self.data.collect(val4);
        }
-        for doc in iter.remainder() {
-            let val = field.get(*doc);
+        for &doc in iter.remainder() {
+            let val = field.get_val(doc);
            let val = f64_from_fastfield_u64(val, &self.field_type);
            self.data.collect(val);
        }
--- a/src/aggregation/metric/stats.rs
+++ b/src/aggregation/metric/stats.rs
@@ -1,14 +1,14 @@
+use fastfield_codecs::Column;
 use serde::{Deserialize, Serialize};

 use crate::aggregation::f64_from_fastfield_u64;
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader};
 use crate::schema::Type;
 use crate::{DocId, TantivyError};

 /// A multi-value metric aggregation that computes stats of numeric values that are
 /// extracted from the aggregated documents.
-/// Supported field types are u64, i64, and f64.
-/// See [Stats] for returned statistics.
+/// Supported field types are `u64`, `i64`, and `f64`.
+/// See [`Stats`] for returned statistics.
 ///
 /// # JSON Format
 /// ```json
@@ -43,13 +43,13 @@ pub struct Stats {
    pub count: usize,
    /// The sum of the fast field values.
    pub sum: f64,
-    /// The standard deviation of the fast field values. None for count == 0.
+    /// The standard deviation of the fast field values. `None` for count == 0.
    pub standard_deviation: Option<f64>,
    /// The min value of the fast field values.
    pub min: Option<f64>,
    /// The max value of the fast field values.
    pub max: Option<f64>,
-    /// The average of the values. None for count == 0.
+    /// The average of the values. `None` for count == 0.
    pub avg: Option<f64>,
 }

@@ -70,7 +70,7 @@ impl Stats {
    }
 }

-/// IntermediateStats contains the mergeable version for stats.
+/// `IntermediateStats` contains the mergeable version for stats.
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct IntermediateStats {
    count: usize,
@@ -163,13 +163,13 @@ impl SegmentStatsCollector {
            stats: IntermediateStats::default(),
        }
    }
-    pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &DynamicFastFieldReader<u64>) {
+    pub(crate) fn collect_block(&mut self, doc: &[DocId], field: &dyn Column<u64>) {
        let mut iter = doc.chunks_exact(4);
        for docs in iter.by_ref() {
-            let val1 = field.get(docs[0]);
-            let val2 = field.get(docs[1]);
-            let val3 = field.get(docs[2]);
-            let val4 = field.get(docs[3]);
+            let val1 = field.get_val(docs[0]);
+            let val2 = field.get_val(docs[1]);
+            let val3 = field.get_val(docs[2]);
+            let val4 = field.get_val(docs[3]);
            let val1 = f64_from_fastfield_u64(val1, &self.field_type);
            let val2 = f64_from_fastfield_u64(val2, &self.field_type);
            let val3 = f64_from_fastfield_u64(val3, &self.field_type);
@@ -179,8 +179,8 @@ impl SegmentStatsCollector {
            self.stats.collect(val3);
            self.stats.collect(val4);
        }
-        for doc in iter.remainder() {
-            let val = field.get(*doc);
+        for &doc in iter.remainder() {
+            let val = field.get_val(doc);
            let val = f64_from_fastfield_u64(val, &self.field_type);
            self.stats.collect(val);
        }
@@ -222,7 +222,7 @@ mod tests {
        .into_iter()
        .collect();

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -300,7 +300,7 @@ mod tests {
        .into_iter()
        .collect();

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

        let searcher = reader.searcher();
        let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
--- a/src/aggregation/mod.rs
+++ b/src/aggregation/mod.rs
@@ -10,20 +10,19 @@
 //!
 //! There are two categories: [Metrics](metric) and [Buckets](bucket).
 //!
-//! # Usage
-//!
+//! ## Prerequisite
+//! Currently aggregations work only on [fast fields](`crate::fastfield`). Single value fast fields
+//! of type `u64`, `f64`, `i64`, `date` and fast fields on text fields.
 //!
+//! ## Usage
 //! To use aggregations, build an aggregation request by constructing
-//! [Aggregations](agg_req::Aggregations).
-//! Create an [AggregationCollector] from this request. AggregationCollector implements the
-//! `Collector` trait and can be passed as collector into `searcher.search()`.
+//! [`Aggregations`](agg_req::Aggregations).
+//! Create an [`AggregationCollector`] from this request. `AggregationCollector` implements the
+//! [`Collector`](crate::collector::Collector) trait and can be passed as collector into
+//! [`Searcher::search()`](crate::Searcher::search).
 //!
-//! #### Limitations
 //!
-//! Currently aggregations work only on single value fast fields of type u64, f64, i64 and
-//! fast fields on text fields.
-//!
-//! # JSON Format
+//! ## JSON Format
 //! Aggregations request and result structures de/serialize into elasticsearch compatible JSON.
 //!
 //! ```verbatim
@@ -34,7 +33,7 @@
 //! let json_response_string: String = &serde_json::to_string(&agg_res)?;
 //! ```
 //!
-//! # Supported Aggregations
+//! ## Supported Aggregations
 //! - [Bucket](bucket)
 //!     - [Histogram](bucket::HistogramAggregation)
 //!     - [Range](bucket::RangeAggregation)
@@ -44,8 +43,8 @@
 //!     - [Stats](metric::StatsAggregation)
 //!
 //! # Example
-//! Compute the average metric, by building [agg_req::Aggregations], which is built from an (String,
-//! [agg_req::Aggregation]) iterator.
+//! Compute the average metric, by building [`agg_req::Aggregations`], which is built from an
+//! `(String, agg_req::Aggregation)` iterator.
 //!
 //! ```
 //! use tantivy::aggregation::agg_req::{Aggregations, Aggregation, MetricAggregation};
@@ -54,9 +53,10 @@
 //! use tantivy::query::AllQuery;
 //! use tantivy::aggregation::agg_result::AggregationResults;
 //! use tantivy::IndexReader;
+//! use tantivy::schema::Schema;
 //!
 //! # #[allow(dead_code)]
-//! fn aggregate_on_index(reader: &IndexReader) {
+//! fn aggregate_on_index(reader: &IndexReader, schema: Schema) {
 //!     let agg_req: Aggregations = vec![
 //!     (
 //!             "average".to_string(),
@@ -68,7 +68,7 @@
 //!     .into_iter()
 //!     .collect();
 //!
-//!     let collector = AggregationCollector::from_aggs(agg_req, None);
+//!     let collector = AggregationCollector::from_aggs(agg_req, None, schema);
 //!
 //!     let searcher = reader.searcher();
 //!     let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap();
@@ -143,25 +143,25 @@
 //! ```
 //!
 //! # Distributed Aggregation
-//! When the data is distributed on different [crate::Index] instances, the
-//! [DistributedAggregationCollector] provides functionality to merge data between independent
+//! When the data is distributed on different [`Index`](crate::Index) instances, the
+//! [`DistributedAggregationCollector`] provides functionality to merge data between independent
 //! search calls by returning
-//! [IntermediateAggregationResults](intermediate_agg_result::IntermediateAggregationResults).
-//! IntermediateAggregationResults provides the
-//! [merge_fruits](intermediate_agg_result::IntermediateAggregationResults::merge_fruits) method to
-//! merge multiple results. The merged result can then be converted into
-//! [agg_result::AggregationResults] via the
-//! [agg_result::AggregationResults::from_intermediate_and_req] method.
+//! [`IntermediateAggregationResults`](intermediate_agg_result::IntermediateAggregationResults).
+//! `IntermediateAggregationResults` provides the
+//! [`merge_fruits`](intermediate_agg_result::IntermediateAggregationResults::merge_fruits) method
+//! to merge multiple results. The merged result can then be converted into
+//! [`AggregationResults`](agg_result::AggregationResults) via the
+//! [`into_final_bucket_result`](intermediate_agg_result::IntermediateAggregationResults::into_final_bucket_result) method.

 pub mod agg_req;
 mod agg_req_with_accessor;
 pub mod agg_result;
 pub mod bucket;
 mod collector;
+mod date;
 pub mod intermediate_agg_result;
 pub mod metric;
 mod segment_agg_result;
-
 use std::collections::HashMap;
 use std::fmt::Display;

@@ -169,10 +169,11 @@ pub use collector::{
    AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector,
    MAX_BUCKET_COUNT,
 };
+pub(crate) use date::format_date;
+use fastfield_codecs::MonotonicallyMappableToU64;
 use itertools::Itertools;
 use serde::{Deserialize, Serialize};

-use crate::fastfield::FastValue;
 use crate::schema::Type;

 /// Represents an associative array `(key => values)` in a very efficient manner.
@@ -260,7 +261,7 @@ impl<T: Clone> VecWithNames<T> {
    }
 }

-/// The serialized key is used in a HashMap.
+/// The serialized key is used in a `HashMap`.
 pub type SerializedKey = String;

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize, PartialOrd)]
@@ -269,7 +270,7 @@ pub type SerializedKey = String;
 pub enum Key {
    /// String key
    Str(String),
-    /// f64 key
+    /// `f64` key
    F64(f64),
 }

@@ -282,14 +283,14 @@ impl Display for Key {
    }
 }

-/// Invert of to_fastfield_u64. Used to convert to f64 for metrics.
+/// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics.
 ///
 /// # Panics
-/// Only u64, f64, i64 is supported
+/// Only `u64`, `f64`, `date`, and `i64` are supported.
 pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
    match field_type {
        Type::U64 => val as f64,
-        Type::I64 => i64::from_u64(val) as f64,
+        Type::I64 | Type::Date => i64::from_u64(val) as f64,
        Type::F64 => f64::from_u64(val),
        _ => {
            panic!("unexpected type {:?}. This should not happen", field_type)
@@ -297,20 +298,19 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 {
    }
 }

-/// Converts the f64 value to fast field value space.
+/// Converts the `f64` value to fast field value space, which is always u64.
 ///
-/// If the fast field has u64, values are stored as u64 in the fast field.
-/// A f64 value of e.g. 2.0 therefore needs to be converted to 1u64
+/// If the fast field has `u64`, values are stored unchanged as `u64` in the fast field.
 ///
-/// If the fast field has f64 values are converted and stored to u64 using a
+/// If the fast field has `f64` values are converted and stored to `u64` using a
 /// monotonic mapping.
-/// A f64 value of e.g. 2.0 needs to be converted using the same monotonic
-/// conversion function, so that the value matches the u64 value stored in the fast
+/// A `f64` value of e.g. `2.0` needs to be converted using the same monotonic
+/// conversion function, so that the value matches the `u64` value stored in the fast
 /// field.
 pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
    match field_type {
        Type::U64 => Some(val as u64),
-        Type::I64 => Some((val as i64).to_u64()),
+        Type::I64 | Type::Date => Some((val as i64).to_u64()),
        Type::F64 => Some(val.to_u64()),
        _ => None,
    }
@@ -319,6 +319,7 @@ pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option<u64> {
 #[cfg(test)]
 mod tests {
    use serde_json::Value;
+    use time::OffsetDateTime;

    use super::agg_req::{Aggregation, Aggregations, BucketAggregation};
    use super::bucket::RangeAggregation;
@@ -334,7 +335,7 @@ mod tests {
    use crate::aggregation::DistributedAggregationCollector;
    use crate::query::{AllQuery, TermQuery};
    use crate::schema::{Cardinality, IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING};
-    use crate::{Index, Term};
+    use crate::{DateTime, Index, Term};

    fn get_avg_req(field_name: &str) -> Aggregation {
        Aggregation::Metric(MetricAggregation::Average(
@@ -360,7 +361,7 @@ mod tests {
        index: &Index,
        query: Option<(&str, &str)>,
    ) -> crate::Result<Value> {
-        let collector = AggregationCollector::from_aggs(agg_req, None);
+        let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());

        let reader = index.reader()?;
        let searcher = reader.searcher();
@@ -450,9 +451,9 @@ mod tests {
                        text_field_id => term.to_string(),
                        string_field_id => term.to_string(),
                        score_field => i as u64,
-                        score_field_f64 => i as f64,
+                        score_field_f64 => i,
                        score_field_i64 => i as i64,
-                        fraction_field => i as f64/100.0,
+                        fraction_field => i/100.0,
                    ))?;
                }
                index_writer.commit()?;
@@ -554,10 +555,10 @@ mod tests {
            let searcher = reader.searcher();
            let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap();
            intermediate_agg_result
-                .into_final_bucket_result(agg_req)
+                .into_final_bucket_result(agg_req, &index.schema())
                .unwrap()
        } else {
-            let collector = AggregationCollector::from_aggs(agg_req, None);
+            let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());

            let searcher = reader.searcher();
            searcher.search(&AllQuery, &collector).unwrap()
@@ -650,6 +651,7 @@ mod tests {
            .set_fast()
            .set_stored();
        let text_field = schema_builder.add_text_field("text", text_fieldtype);
+        let date_field = schema_builder.add_date_field("date", FAST);
        schema_builder.add_text_field("dummy_text", STRING);
        let score_fieldtype =
            crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
@@ -667,6 +669,7 @@ mod tests {
            // writing the segment
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800).unwrap()),
                score_field => 1u64,
                score_field_f64 => 1f64,
                score_field_i64 => 1i64,
@@ -675,6 +678,7 @@ mod tests {
            ))?;
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
                score_field => 3u64,
                score_field_f64 => 3f64,
                score_field_i64 => 3i64,
@@ -683,18 +687,21 @@ mod tests {
            ))?;
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
                score_field => 5u64,
                score_field_f64 => 5f64,
                score_field_i64 => 5i64,
            ))?;
            index_writer.add_document(doc!(
                text_field => "nohit",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
                score_field => 6u64,
                score_field_f64 => 6f64,
                score_field_i64 => 6i64,
            ))?;
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
                score_field => 7u64,
                score_field_f64 => 7f64,
                score_field_i64 => 7i64,
@@ -702,12 +709,14 @@ mod tests {
            index_writer.commit()?;
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()),
                score_field => 11u64,
                score_field_f64 => 11f64,
                score_field_i64 => 11i64,
            ))?;
            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
                score_field => 14u64,
                score_field_f64 => 14f64,
                score_field_i64 => 14i64,
@@ -715,6 +724,7 @@ mod tests {

            index_writer.add_document(doc!(
                text_field => "cool",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
                score_field => 44u64,
                score_field_f64 => 44.5f64,
                score_field_i64 => 44i64,
@@ -725,6 +735,7 @@ mod tests {
            // no hits segment
            index_writer.add_document(doc!(
                text_field => "nohit",
+                date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()),
                score_field => 44u64,
                score_field_f64 => 44.5f64,
                score_field_i64 => 44i64,
@@ -797,7 +808,7 @@ mod tests {
        .into_iter()
        .collect();

-        let collector = AggregationCollector::from_aggs(agg_req_1, None);
+        let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

        let searcher = reader.searcher();
        let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
@@ -997,9 +1008,10 @@ mod tests {
            // Test de/serialization roundtrip on intermediate_agg_result
            let res: IntermediateAggregationResults =
                serde_json::from_str(&serde_json::to_string(&res).unwrap()).unwrap();
-            res.into_final_bucket_result(agg_req.clone()).unwrap()
+            res.into_final_bucket_result(agg_req.clone(), &index.schema())
+                .unwrap()
        } else {
-            let collector = AggregationCollector::from_aggs(agg_req.clone(), None);
+            let collector = AggregationCollector::from_aggs(agg_req.clone(), None, index.schema());

            let searcher = reader.searcher();
            searcher.search(&term_query, &collector).unwrap()
@@ -1057,7 +1069,7 @@ mod tests {
        );

        // Test empty result set
-        let collector = AggregationCollector::from_aggs(agg_req, None);
+        let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());
        let searcher = reader.searcher();
        searcher.search(&query_with_no_hits, &collector).unwrap();

@@ -1122,7 +1134,7 @@ mod tests {
            .into_iter()
            .collect();

-            let collector = AggregationCollector::from_aggs(agg_req_1, None);
+            let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

            let searcher = reader.searcher();

@@ -1235,7 +1247,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1266,7 +1278,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1297,7 +1309,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1336,7 +1348,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1365,7 +1377,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req, None);
+                let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1394,7 +1406,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req, None);
+                let collector = AggregationCollector::from_aggs(agg_req, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1431,7 +1443,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1466,7 +1478,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1505,7 +1517,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1535,7 +1547,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
@@ -1592,7 +1604,7 @@ mod tests {
                .into_iter()
                .collect();

-                let collector = AggregationCollector::from_aggs(agg_req_1, None);
+                let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema());

                let searcher = reader.searcher();
                let agg_res: AggregationResults =
--- a/src/aggregation/segment_agg_result.rs
+++ b/src/aggregation/segment_agg_result.rs
@@ -185,10 +185,10 @@ impl SegmentMetricResultCollector {
    pub(crate) fn collect_block(&mut self, doc: &[DocId], metric: &MetricAggregationWithAccessor) {
        match self {
            SegmentMetricResultCollector::Average(avg_collector) => {
-                avg_collector.collect_block(doc, &metric.accessor);
+                avg_collector.collect_block(doc, &*metric.accessor);
            }
            SegmentMetricResultCollector::Stats(stats_collector) => {
-                stats_collector.collect_block(doc, &metric.accessor);
+                stats_collector.collect_block(doc, &*metric.accessor);
            }
        }
    }
@@ -305,7 +305,7 @@ impl BucketCount {
    }
    pub(crate) fn add_count(&self, count: u32) {
        self.bucket_count
-            .fetch_add(count as u32, std::sync::atomic::Ordering::Relaxed);
+            .fetch_add(count, std::sync::atomic::Ordering::Relaxed);
    }
    pub(crate) fn get_count(&self) -> u32 {
        self.bucket_count.load(std::sync::atomic::Ordering::Relaxed)
--- a/src/collector/custom_score_top_collector.rs
+++ b/src/collector/custom_score_top_collector.rs
@@ -24,7 +24,7 @@ where TScore: Clone + PartialOrd
 /// A custom segment scorer makes it possible to define any kind of score
 /// for a given document belonging to a specific segment.
 ///
-/// It is the segment local version of the [`CustomScorer`](./trait.CustomScorer.html).
+/// It is the segment local version of the [`CustomScorer`].
 pub trait CustomSegmentScorer<TScore>: 'static {
    /// Computes the score of a specific `doc`.
    fn score(&mut self, doc: DocId) -> TScore;
@@ -36,9 +36,9 @@ pub trait CustomSegmentScorer<TScore>: 'static {
 /// Instead, it helps constructing `Self::Child` instances that will compute
 /// the score at a segment scale.
 pub trait CustomScorer<TScore>: Sync {
-    /// Type of the associated [`CustomSegmentScorer`](./trait.CustomSegmentScorer.html).
+    /// Type of the associated [`CustomSegmentScorer`].
    type Child: CustomSegmentScorer<TScore>;
-    /// Builds a child scorer for a specific segment. The child scorer is associated to
+    /// Builds a child scorer for a specific segment. The child scorer is associated with
    /// a specific segment.
    fn segment_scorer(&self, segment_reader: &SegmentReader) -> crate::Result<Self::Child>;
 }
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -67,10 +67,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 /// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`).
 ///
 /// Once collection is finished, you can harvest its results in the form
-/// of a `FacetCounts` object, and extract your face                t counts from it.
+/// of a [`FacetCounts`] object, and extract your facet counts from it.
 ///
 /// This implementation assumes you are working with a number of facets that
-/// is much hundreds of time lower than your number of documents.
+/// is many hundreds of times smaller than your number of documents.
 ///
 ///
 /// ```rust
@@ -91,7 +91,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
 ///     let index = Index::create_in_ram(schema);
 ///     {
 ///         let mut index_writer = index.writer(3_000_000)?;
-///         // a document can be associated to any number of facets
+///         // a document can be associated with any number of facets
 ///         index_writer.add_document(doc!(
 ///             title => "The Name of the Wind",
 ///             facet => Facet::from("/lang/en"),
@@ -231,7 +231,7 @@ impl FacetCollector {
    ///
    /// Adding two facets within which one is the prefix of the other is forbidden.
    /// If you need the correct number of unique documents for two such facets,
-    /// just add them in separate `FacetCollector`.
+    /// just add them in a separate `FacetCollector`.
    pub fn add_facet<T>(&mut self, facet_from: T)
    where Facet: From<T> {
        let facet = Facet::from(facet_from);
@@ -338,11 +338,7 @@ impl SegmentCollector for FacetSegmentCollector {
        let mut previous_collapsed_ord: usize = usize::MAX;
        for &facet_ord in &self.facet_ords_buf {
            let collapsed_ord = self.collapse_mapping[facet_ord as usize];
-            self.counts[collapsed_ord] += if collapsed_ord == previous_collapsed_ord {
-                0
-            } else {
-                1
-            };
+            self.counts[collapsed_ord] += u64::from(collapsed_ord != previous_collapsed_ord);
            previous_collapsed_ord = collapsed_ord;
        }
    }
@@ -361,7 +357,7 @@ impl SegmentCollector for FacetSegmentCollector {
            let mut facet = vec![];
            let facet_ord = self.collapse_facet_ords[collapsed_facet_ord];
            // TODO handle errors.
-            if facet_dict.ord_to_term(facet_ord as u64, &mut facet).is_ok() {
+            if facet_dict.ord_to_term(facet_ord, &mut facet).is_ok() {
                if let Ok(facet) = Facet::from_encoded(facet) {
                    facet_counts.insert(facet, count);
                }
@@ -391,7 +387,7 @@ impl<'a> Iterator for FacetChildIterator<'a> {

 impl FacetCounts {
    /// Returns an iterator over all of the facet count pairs inside this result.
-    /// See the documentation for [FacetCollector] for a usage example.
+    /// See the documentation for [`FacetCollector`] for a usage example.
    pub fn get<T>(&self, facet_from: T) -> FacetChildIterator<'_>
    where Facet: From<T> {
        let facet = Facet::from(facet_from);
@@ -410,7 +406,7 @@ impl FacetCounts {
    }

    /// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts.
-    /// See the documentation for [FacetCollector] for a usage example.
+    /// See the documentation for [`FacetCollector`] for a usage example.
    pub fn top_k<T>(&self, facet: T, k: usize) -> Vec<(&Facet, u64)>
    where Facet: From<T> {
        let mut heap = BinaryHeap::with_capacity(k);
@@ -620,7 +616,7 @@ mod tests {
            .map(|mut doc| {
                doc.add_facet(
                    facet_field,
-                    &format!("/facet/{}", thread_rng().sample(&uniform)),
+                    &format!("/facet/{}", thread_rng().sample(uniform)),
                );
                doc
            })
--- a/src/collector/filter_collector_wrapper.rs
+++ b/src/collector/filter_collector_wrapper.rs
@@ -10,9 +10,12 @@
 // ---
 // Importing tantivy...
 use std::marker::PhantomData;
+use std::sync::Arc;
+
+use fastfield_codecs::Column;

 use crate::collector::{Collector, SegmentCollector};
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
+use crate::fastfield::FastValue;
 use crate::schema::Field;
 use crate::{Score, SegmentReader, TantivyError};

@@ -158,7 +161,7 @@ where
    TPredicate: 'static,
    TPredicateValue: FastValue,
 {
-    fast_field_reader: DynamicFastFieldReader<TPredicateValue>,
+    fast_field_reader: Arc<dyn Column<TPredicateValue>>,
    segment_collector: TSegmentCollector,
    predicate: TPredicate,
    t_predicate_value: PhantomData<TPredicateValue>,
@@ -174,7 +177,7 @@ where
    type Fruit = TSegmentCollector::Fruit;

    fn collect(&mut self, doc: u32, score: Score) {
-        let value = self.fast_field_reader.get(doc);
+        let value = self.fast_field_reader.get_val(doc);
        if (self.predicate)(value) {
            self.segment_collector.collect(doc, score)
        }
--- a/src/collector/histogram_collector.rs
+++ b/src/collector/histogram_collector.rs
@@ -1,7 +1,10 @@
+use std::sync::Arc;
+
 use fastdivide::DividerU64;
+use fastfield_codecs::Column;

 use crate::collector::{Collector, SegmentCollector};
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
+use crate::fastfield::FastValue;
 use crate::schema::{Field, Type};
 use crate::{DocId, Score};

@@ -34,7 +37,7 @@ impl HistogramCollector {
    /// The scale/range of the histogram is not dynamic. It is required to
    /// define it by supplying following parameter:
    ///  - `min_value`: the minimum value that can be recorded in the histogram.
-    ///  - `bucket_width`: the length of the interval that is associated to each buckets.
+    ///  - `bucket_width`: the length of the interval that is associated with each buckets.
    ///  - `num_buckets`: The overall number of buckets.
    ///
    /// Together, this parameters define a partition of `[min_value, min_value + num_buckets *
@@ -84,14 +87,14 @@ impl HistogramComputer {
 }
 pub struct SegmentHistogramCollector {
    histogram_computer: HistogramComputer,
-    ff_reader: DynamicFastFieldReader<u64>,
+    ff_reader: Arc<dyn Column<u64>>,
 }

 impl SegmentCollector for SegmentHistogramCollector {
    type Fruit = Vec<u64>;

    fn collect(&mut self, doc: DocId, _score: Score) {
-        let value = self.ff_reader.get(doc);
+        let value = self.ff_reader.get_val(doc);
        self.histogram_computer.add_value(value);
    }

--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -4,13 +4,13 @@
 //! In tantivy jargon, we call this information your search "fruit".
 //!
 //! Your fruit could for instance be :
-//! - [the count of matching documents](./struct.Count.html)
-//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html)
-//! - [facet counts](./struct.FacetCollector.html)
+//! - [the count of matching documents](crate::collector::Count)
+//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs)
+//! - [facet counts](FacetCollector)
 //!
-//! At one point in your code, you will trigger the actual search operation by calling
-//! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search).
-//! This call will look like this.
+//! At some point in your code, you will trigger the actual search operation by calling
+//! [`Searcher::search()`](crate::Searcher::search).
+//! This call will look like this:
 //!
 //! ```verbatim
 //! let fruit = searcher.search(&query, &collector)?;
@@ -64,7 +64,7 @@
 //!
 //! The `Collector` trait is implemented for up to 4 collectors.
 //! If you have more than 4 collectors, you can either group them into
-//! tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`](./struct.MultiCollector.html).
+//! tuples of tuples `(a,(b,(c,d)))`, or rely on [`MultiCollector`].
 //!
 //! # Combining several collectors dynamically
 //!
@@ -74,7 +74,7 @@
 //!
 //! Unfortunately it requires you to know at compile time your collector types.
 //! If on the other hand, the collectors depend on some query parameter,
-//! you can rely on `MultiCollector`'s.
+//! you can rely on [`MultiCollector`]'s.
 //!
 //!
 //! # Implementing your own collectors.
@@ -142,7 +142,7 @@ pub trait Collector: Sync + Send {
    /// e.g. `usize` for the `Count` collector.
    type Fruit: Fruit;

-    /// Type of the `SegmentCollector` associated to this collector.
+    /// Type of the `SegmentCollector` associated with this collector.
    type Child: SegmentCollector;

    /// `set_segment` is called before beginning to enumerate
@@ -156,7 +156,7 @@ pub trait Collector: Sync + Send {
    /// Returns true iff the collector requires to compute scores for documents.
    fn requires_scoring(&self) -> bool;

-    /// Combines the fruit associated to the collection of each segments
+    /// Combines the fruit associated with the collection of each segments
    /// into one fruit.
    fn merge_fruits(
        &self,
@@ -170,19 +170,35 @@ pub trait Collector: Sync + Send {
        segment_ord: u32,
        reader: &SegmentReader,
    ) -> crate::Result<<Self::Child as SegmentCollector>::Fruit> {
-        let mut segment_collector = self.for_segment(segment_ord as u32, reader)?;
+        let mut segment_collector = self.for_segment(segment_ord, reader)?;

-        if let Some(alive_bitset) = reader.alive_bitset() {
-            weight.for_each(reader, &mut |doc, score| {
-                if alive_bitset.is_alive(doc) {
+        match (reader.alive_bitset(), self.requires_scoring()) {
+            (Some(alive_bitset), true) => {
+                weight.for_each(reader, &mut |doc, score| {
+                    if alive_bitset.is_alive(doc) {
+                        segment_collector.collect(doc, score);
+                    }
+                })?;
+            }
+            (Some(alive_bitset), false) => {
+                weight.for_each_no_score(reader, &mut |doc| {
+                    if alive_bitset.is_alive(doc) {
+                        segment_collector.collect(doc, 0.0);
+                    }
+                })?;
+            }
+            (None, true) => {
+                weight.for_each(reader, &mut |doc, score| {
                    segment_collector.collect(doc, score);
-                }
-            })?;
-        } else {
-            weight.for_each(reader, &mut |doc, score| {
-                segment_collector.collect(doc, score);
-            })?;
+                })?;
+            }
+            (None, false) => {
+                weight.for_each_no_score(reader, &mut |doc| {
+                    segment_collector.collect(doc, 0.0);
+                })?;
+            }
        }
+
        Ok(segment_collector.harvest())
    }
 }
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -1,7 +1,11 @@
+use std::sync::Arc;
+
+use fastfield_codecs::Column;
+
 use super::*;
 use crate::collector::{Count, FilterCollector, TopDocs};
 use crate::core::SegmentReader;
-use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
+use crate::fastfield::BytesFastFieldReader;
 use crate::query::{AllQuery, QueryParser};
 use crate::schema::{Field, Schema, FAST, TEXT};
 use crate::time::format_description::well_known::Rfc3339;
@@ -156,7 +160,7 @@ pub struct FastFieldTestCollector {

 pub struct FastFieldSegmentCollector {
    vals: Vec<u64>,
-    reader: DynamicFastFieldReader<u64>,
+    reader: Arc<dyn Column<u64>>,
 }

 impl FastFieldTestCollector {
@@ -197,7 +201,7 @@ impl SegmentCollector for FastFieldSegmentCollector {
    type Fruit = Vec<u64>;

    fn collect(&mut self, doc: DocId, _score: Score) {
-        let val = self.reader.get(doc);
+        let val = self.reader.get_val(doc);
        self.vals.push(val);
    }

--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -1,6 +1,9 @@
 use std::collections::BinaryHeap;
 use std::fmt;
 use std::marker::PhantomData;
+use std::sync::Arc;
+
+use fastfield_codecs::Column;

 use super::Collector;
 use crate::collector::custom_score_top_collector::CustomScoreTopCollector;
@@ -9,7 +12,7 @@ use crate::collector::tweak_score_top_collector::TweakedScoreTopCollector;
 use crate::collector::{
    CustomScorer, CustomSegmentScorer, ScoreSegmentTweaker, ScoreTweaker, SegmentCollector,
 };
-use crate::fastfield::{DynamicFastFieldReader, FastFieldReader, FastValue};
+use crate::fastfield::FastValue;
 use crate::query::Weight;
 use crate::schema::Field;
 use crate::{DocAddress, DocId, Score, SegmentOrdinal, SegmentReader, TantivyError};
@@ -129,12 +132,12 @@ impl fmt::Debug for TopDocs {
 }

 struct ScorerByFastFieldReader {
-    ff_reader: DynamicFastFieldReader<u64>,
+    ff_reader: Arc<dyn Column<u64>>,
 }

 impl CustomSegmentScorer<u64> for ScorerByFastFieldReader {
    fn score(&mut self, doc: DocId) -> u64 {
-        self.ff_reader.get(doc)
+        self.ff_reader.get_val(doc)
    }
 }

@@ -284,7 +287,7 @@ impl TopDocs {
    /// # See also
    ///
    /// To comfortably work with `u64`s, `i64`s, `f64`s, or `date`s, please refer to
-    /// [.order_by_fast_field(...)](#method.order_by_fast_field) method.
+    /// the [.order_by_fast_field(...)](TopDocs::order_by_fast_field) method.
    pub fn order_by_u64_field(
        self,
        field: Field,
@@ -381,7 +384,7 @@ impl TopDocs {
    ///
    /// This method offers a convenient way to tweak or replace
    /// the documents score. As suggested by the prototype you can
-    /// manually define your own [`ScoreTweaker`](./trait.ScoreTweaker.html)
+    /// manually define your own [`ScoreTweaker`]
    /// and pass it as an argument, but there is a much simpler way to
    /// tweak your score: you can use a closure as in the following
    /// example.
@@ -398,7 +401,7 @@ impl TopDocs {
    /// In the following example will will tweak our ranking a bit by
    /// boosting popular products a notch.
    ///
-    /// In more serious application, this tweaking could involved running a
+    /// In more serious application, this tweaking could involve running a
    /// learning-to-rank model over various features
    ///
    /// ```rust
@@ -407,7 +410,6 @@ impl TopDocs {
    /// # use tantivy::query::QueryParser;
    /// use tantivy::SegmentReader;
    /// use tantivy::collector::TopDocs;
-    /// use tantivy::fastfield::FastFieldReader;
    /// use tantivy::schema::Field;
    ///
    /// fn create_schema() -> Schema {
@@ -456,7 +458,7 @@ impl TopDocs {
    ///
    ///             // We can now define our actual scoring function
    ///             move |doc: DocId, original_score: Score| {
-    ///                 let popularity: u64 = popularity_reader.get(doc);
+    ///                 let popularity: u64 = popularity_reader.get_val(doc);
    ///                 // Well.. For the sake of the example we use a simple logarithm
    ///                 // function.
    ///                 let popularity_boost_score = ((2u64 + popularity) as Score).log2();
@@ -472,7 +474,7 @@ impl TopDocs {
    /// ```
    ///
    /// # See also
-    /// [custom_score(...)](#method.custom_score).
+    /// - [custom_score(...)](TopDocs::custom_score)
    pub fn tweak_score<TScore, TScoreSegmentTweaker, TScoreTweaker>(
        self,
        score_tweaker: TScoreTweaker,
@@ -489,8 +491,7 @@ impl TopDocs {
    ///
    /// This method offers a convenient way to use a different score.
    ///
-    /// As suggested by the prototype you can manually define your
-    /// own [`CustomScorer`](./trait.CustomScorer.html)
+    /// As suggested by the prototype you can manually define your own [`CustomScorer`]
    /// and pass it as an argument, but there is a much simpler way to
    /// tweak your score: you can use a closure as in the following
    /// example.
@@ -499,7 +500,7 @@ impl TopDocs {
    ///
    /// This method only makes it possible to compute the score from a given
    /// `DocId`, fastfield values for the doc and any information you could
-    /// have precomputed beforehands. It does not make it possible for instance
+    /// have precomputed beforehand. It does not make it possible for instance
    /// to compute something like TfIdf as it does not have access to the list of query
    /// terms present in the document, nor the term frequencies for the different terms.
    ///
@@ -515,7 +516,6 @@ impl TopDocs {
    /// use tantivy::SegmentReader;
    /// use tantivy::collector::TopDocs;
    /// use tantivy::schema::Field;
-    /// use tantivy::fastfield::FastFieldReader;
    ///
    /// # fn create_schema() -> Schema {
    /// #    let mut schema_builder = Schema::builder();
@@ -567,8 +567,8 @@ impl TopDocs {
    ///
    ///             // We can now define our actual scoring function
    ///             move |doc: DocId| {
-    ///                 let popularity: u64 = popularity_reader.get(doc);
-    ///                 let boosted: u64 = boosted_reader.get(doc);
+    ///                 let popularity: u64 = popularity_reader.get_val(doc);
+    ///                 let boosted: u64 = boosted_reader.get_val(doc);
    ///                 // Score do not have to be `f64` in tantivy.
    ///                 // Here we return a couple to get lexicographical order
    ///                 // for free.
@@ -587,7 +587,7 @@ impl TopDocs {
    /// ```
    ///
    /// # See also
-    /// [tweak_score(...)](#method.tweak_score).
+    /// - [tweak_score(...)](TopDocs::tweak_score)
    pub fn custom_score<TScore, TCustomSegmentScorer, TCustomScorer>(
        self,
        custom_score: TCustomScorer,
@@ -693,7 +693,7 @@ impl Collector for TopDocs {
    }
 }

-/// Segment Collector associated to `TopDocs`.
+/// Segment Collector associated with `TopDocs`.
 pub struct TopScoreSegmentCollector(TopSegmentCollector<Score>);

 impl SegmentCollector for TopScoreSegmentCollector {
--- a/src/collector/tweak_score_top_collector.rs
+++ b/src/collector/tweak_score_top_collector.rs
@@ -24,7 +24,7 @@ where TScore: Clone + PartialOrd
 /// A `ScoreSegmentTweaker` makes it possible to modify the default score
 /// for a given document belonging to a specific segment.
 ///
-/// It is the segment local version of the [`ScoreTweaker`](./trait.ScoreTweaker.html).
+/// It is the segment local version of the [`ScoreTweaker`].
 pub trait ScoreSegmentTweaker<TScore>: 'static {
    /// Tweak the given `score` for the document `doc`.
    fn score(&mut self, doc: DocId, score: Score) -> TScore;
@@ -37,10 +37,10 @@ pub trait ScoreSegmentTweaker<TScore>: 'static {
 /// Instead, it helps constructing `Self::Child` instances that will compute
 /// the score at a segment scale.
 pub trait ScoreTweaker<TScore>: Sync {
-    /// Type of the associated [`ScoreSegmentTweaker`](./trait.ScoreSegmentTweaker.html).
+    /// Type of the associated [`ScoreSegmentTweaker`].
    type Child: ScoreSegmentTweaker<TScore>;

-    /// Builds a child tweaker for a specific segment. The child scorer is associated to
+    /// Builds a child tweaker for a specific segment. The child scorer is associated with
    /// a specific segment.
    fn segment_tweaker(&self, segment_reader: &SegmentReader) -> Result<Self::Child>;
 }
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -7,6 +7,7 @@ use std::sync::Arc;

 use super::segment::Segment;
 use super::IndexSettings;
+use crate::core::single_segment_index_writer::SingleSegmentIndexWriter;
 use crate::core::{
    Executor, IndexMeta, SegmentId, SegmentMeta, SegmentMetaInventory, META_FILEPATH,
 };
@@ -16,9 +17,9 @@ use crate::directory::MmapDirectory;
 use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK};
 use crate::error::{DataCorruption, TantivyError};
 use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN};
-use crate::indexer::segment_updater::save_new_metas;
+use crate::indexer::segment_updater::save_metas;
 use crate::reader::{IndexReader, IndexReaderBuilder};
-use crate::schema::{Field, FieldType, Schema};
+use crate::schema::{Cardinality, Field, FieldType, Schema};
 use crate::tokenizer::{TextAnalyzer, TokenizerManager};
 use crate::IndexWriter;

@@ -47,10 +48,38 @@ fn load_metas(
        .map_err(From::from)
 }

+/// Save the index meta file.
+/// This operation is atomic :
+/// Either
+///  - it fails, in which case an error is returned,
+/// and the `meta.json` remains untouched,
+/// - it succeeds, and `meta.json` is written
+/// and flushed.
+///
+/// This method is not part of tantivy's public API
+fn save_new_metas(
+    schema: Schema,
+    index_settings: IndexSettings,
+    directory: &dyn Directory,
+) -> crate::Result<()> {
+    save_metas(
+        &IndexMeta {
+            index_settings,
+            segments: Vec::new(),
+            schema,
+            opstamp: 0u64,
+            payload: None,
+        },
+        directory,
+    )?;
+    directory.sync_directory()?;
+    Ok(())
+}
+
 /// IndexBuilder can be used to create an index.
 ///
-/// Use in conjunction with `SchemaBuilder`. Global index settings
-/// can be configured with `IndexSettings`
+/// Use in conjunction with [`SchemaBuilder`][crate::schema::SchemaBuilder].
+/// Global index settings can be configured with [`IndexSettings`].
 ///
 /// # Examples
 ///
@@ -68,7 +97,13 @@ fn load_metas(
 /// );
 ///
 /// let schema = schema_builder.build();
-/// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()};
+/// let settings = IndexSettings{
+///     sort_by_field: Some(IndexSortByField{
+///         field: "number".to_string(),
+///         order: Order::Asc
+///     }),
+///     ..Default::default()
+/// };
 /// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
 /// ```
 pub struct IndexBuilder {
@@ -111,21 +146,21 @@ impl IndexBuilder {
        self
    }

-    /// Creates a new index using the `RAMDirectory`.
+    /// Creates a new index using the [`RamDirectory`].
    ///
    /// The index will be allocated in anonymous memory.
-    /// This should only be used for unit tests.
+    /// This is useful for indexing small set of documents
+    /// for instances like unit test or temporary in memory index.
    pub fn create_in_ram(self) -> Result<Index, TantivyError> {
        let ram_directory = RamDirectory::create();
-        Ok(self
-            .create(ram_directory)
-            .expect("Creating a RAMDirectory should never fail"))
+        self.create(ram_directory)
    }

    /// Creates a new index in a given filepath.
-    /// The index will use the `MMapDirectory`.
+    /// The index will use the [`MmapDirectory`].
    ///
-    /// If a previous index was in this directory, it returns an `IndexAlreadyExists` error.
+    /// If a previous index was in this directory, it returns an
+    /// [`TantivyError::IndexAlreadyExists`] error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(self, directory_path: P) -> crate::Result<Index> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::open(directory_path)?);
@@ -135,14 +170,34 @@ impl IndexBuilder {
        self.create(mmap_directory)
    }

+    /// Dragons ahead!!!
+    ///
+    /// The point of this API is to let users create a simple index with a single segment
+    /// and without starting any thread.
+    ///
+    /// Do not use this method if you are not sure what you are doing.
+    ///
+    /// It expects an originally empty directory, and will not run any GC operation.
+    #[doc(hidden)]
+    pub fn single_segment_index_writer(
+        self,
+        dir: impl Into<Box<dyn Directory>>,
+        mem_budget: usize,
+    ) -> crate::Result<SingleSegmentIndexWriter> {
+        let index = self.create(dir)?;
+        let index_simple_writer = SingleSegmentIndexWriter::new(index, mem_budget)?;
+        Ok(index_simple_writer)
+    }
+
    /// Creates a new index in a temp directory.
    ///
-    /// The index will use the `MMapDirectory` in a newly created directory.
-    /// The temp directory will be destroyed automatically when the `Index` object
+    /// The index will use the [`MmapDirectory`] in a newly created directory.
+    /// The temp directory will be destroyed automatically when the [`Index`] object
    /// is destroyed.
    ///
-    /// The temp directory is only used for testing the `MmapDirectory`.
-    /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`.
+    /// The temp directory is only used for testing the [`MmapDirectory`].
+    /// For other unit tests, prefer the [`RamDirectory`], see:
+    /// [`IndexBuilder::create_in_ram()`].
    #[cfg(feature = "mmap")]
    pub fn create_from_tempdir(self) -> crate::Result<Index> {
        let mmap_directory: Box<dyn Directory> = Box::new(MmapDirectory::create_from_tempdir()?);
@@ -172,10 +227,44 @@ impl IndexBuilder {
            ))
        }
    }
+
+    fn validate(&self) -> crate::Result<()> {
+        if let Some(schema) = self.schema.as_ref() {
+            if let Some(sort_by_field) = self.index_settings.sort_by_field.as_ref() {
+                let schema_field = schema.get_field(&sort_by_field.field).ok_or_else(|| {
+                    TantivyError::InvalidArgument(format!(
+                        "Field to sort index {} not found in schema",
+                        sort_by_field.field
+                    ))
+                })?;
+                let entry = schema.get_field_entry(schema_field);
+                if !entry.is_fast() {
+                    return Err(TantivyError::InvalidArgument(format!(
+                        "Field {} is no fast field. Field needs to be a single value fast field \
+                         to be used to sort an index",
+                        sort_by_field.field
+                    )));
+                }
+                if entry.field_type().fastfield_cardinality() != Some(Cardinality::SingleValue) {
+                    return Err(TantivyError::InvalidArgument(format!(
+                        "Only single value fast field Cardinality supported for sorting index {}",
+                        sort_by_field.field
+                    )));
+                }
+            }
+            Ok(())
+        } else {
+            Err(TantivyError::InvalidArgument(
+                "no schema passed".to_string(),
+            ))
+        }
+    }
+
    /// Creates a new index given an implementation of the trait `Directory`.
    ///
    /// If a directory previously existed, it will be erased.
    fn create<T: Into<Box<dyn Directory>>>(self, dir: T) -> crate::Result<Index> {
+        self.validate()?;
        let dir = dir.into();
        let directory = ManagedDirectory::wrap(dir)?;
        save_new_metas(
@@ -238,7 +327,7 @@ impl Index {
        self.set_multithread_executor(default_num_threads)
    }

-    /// Creates a new index using the `RamDirectory`.
+    /// Creates a new index using the [`RamDirectory`].
    ///
    /// The index will be allocated in anonymous memory.
    /// This is useful for indexing small set of documents
@@ -248,9 +337,10 @@ impl Index {
    }

    /// Creates a new index in a given filepath.
-    /// The index will use the `MMapDirectory`.
+    /// The index will use the [`MmapDirectory`].
    ///
-    /// If a previous index was in this directory, then it returns  an `IndexAlreadyExists` error.
+    /// If a previous index was in this directory, then it returns
+    /// a [`TantivyError::IndexAlreadyExists`] error.
    #[cfg(feature = "mmap")]
    pub fn create_in_dir<P: AsRef<Path>>(
        directory_path: P,
@@ -272,12 +362,13 @@ impl Index {

    /// Creates a new index in a temp directory.
    ///
-    /// The index will use the `MMapDirectory` in a newly created directory.
-    /// The temp directory will be destroyed automatically when the `Index` object
+    /// The index will use the [`MmapDirectory`] in a newly created directory.
+    /// The temp directory will be destroyed automatically when the [`Index`] object
    /// is destroyed.
    ///
-    /// The temp directory is only used for testing the `MmapDirectory`.
-    /// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`.
+    /// The temp directory is only used for testing the [`MmapDirectory`].
+    /// For other unit tests, prefer the [`RamDirectory`],
+    /// see: [`IndexBuilder::create_in_ram()`].
    #[cfg(feature = "mmap")]
    pub fn create_from_tempdir(schema: Schema) -> crate::Result<Index> {
        IndexBuilder::new().schema(schema).create_from_tempdir()
@@ -297,7 +388,7 @@ impl Index {
        builder.create(dir)
    }

-    /// Creates a new index given a directory and an `IndexMeta`.
+    /// Creates a new index given a directory and an [`IndexMeta`].
    fn open_from_metas(
        directory: ManagedDirectory,
        metas: &IndexMeta,
@@ -324,7 +415,7 @@ impl Index {
        &self.tokenizers
    }

-    /// Helper to access the tokenizer associated to a specific field.
+    /// Get the tokenizer associated with a specific field.
    pub fn tokenizer_for_field(&self, field: Field) -> crate::Result<TextAnalyzer> {
        let field_entry = self.schema.get_field_entry(field);
        let field_type = field_entry.field_type();
@@ -356,14 +447,14 @@ impl Index {
            })
    }

-    /// Create a default `IndexReader` for the given index.
+    /// Create a default [`IndexReader`] for the given index.
    ///
-    /// See [`Index.reader_builder()`](#method.reader_builder).
+    /// See [`Index.reader_builder()`].
    pub fn reader(&self) -> crate::Result<IndexReader> {
        self.reader_builder().try_into()
    }

-    /// Create a `IndexReader` for the given index.
+    /// Create a [`IndexReader`] for the given index.
    ///
    /// Most project should create at most one reader for a given index.
    /// This method is typically called only once per `Index` instance.
@@ -580,10 +671,12 @@ impl fmt::Debug for Index {

 #[cfg(test)]
 mod tests {
+    use crate::collector::Count;
    use crate::directory::{RamDirectory, WatchCallback};
-    use crate::schema::{Field, Schema, INDEXED, TEXT};
+    use crate::query::TermQuery;
+    use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, TEXT};
    use crate::tokenizer::TokenizerManager;
-    use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy};
+    use crate::{Directory, Index, IndexBuilder, IndexReader, IndexSettings, ReloadPolicy, Term};

    #[test]
    fn test_indexer_for_field() {
@@ -720,7 +813,7 @@ mod tests {
            let field = schema.get_field("num_likes").unwrap();
            let tempdir = TempDir::new().unwrap();
            let tempdir_path = PathBuf::from(tempdir.path());
-            let index = Index::create_in_dir(&tempdir_path, schema).unwrap();
+            let index = Index::create_in_dir(tempdir_path, schema).unwrap();
            let reader = index
                .reader_builder()
                .reload_policy(ReloadPolicy::OnCommit)
@@ -849,4 +942,28 @@ mod tests {
        );
        Ok(())
    }
+
+    #[test]
+    fn test_single_segment_index_writer() -> crate::Result<()> {
+        let mut schema_builder = Schema::builder();
+        let text_field = schema_builder.add_text_field("text", TEXT);
+        let schema = schema_builder.build();
+        let directory = RamDirectory::default();
+        let mut single_segment_index_writer = Index::builder()
+            .schema(schema)
+            .single_segment_index_writer(directory, 10_000_000)?;
+        for _ in 0..10 {
+            let doc = doc!(text_field=>"hello");
+            single_segment_index_writer.add_document(doc)?;
+        }
+        let index = single_segment_index_writer.finalize()?;
+        let searcher = index.reader()?.searcher();
+        let term_query = TermQuery::new(
+            Term::from_field_text(text_field, "hello"),
+            IndexRecordOption::Basic,
+        );
+        let count = searcher.search(&term_query, &Count)?;
+        assert_eq!(count, 10);
+        Ok(())
+    }
 }
--- a/src/core/index_meta.rs
+++ b/src/core/index_meta.rs
@@ -130,10 +130,10 @@ impl SegmentMeta {
    /// Returns the relative path of a component of our segment.
    ///
    /// It just joins the segment id with the extension
-    /// associated to a segment component.
+    /// associated with a segment component.
    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
        let mut path = self.id().uuid_string();
-        path.push_str(&*match component {
+        path.push_str(&match component {
            SegmentComponent::Postings => ".idx".to_string(),
            SegmentComponent::Positions => ".pos".to_string(),
            SegmentComponent::Terms => ".term".to_string(),
@@ -235,6 +235,14 @@ impl InnerSegmentMeta {
    }
 }

+fn return_true() -> bool {
+    true
+}
+
+fn is_true(val: &bool) -> bool {
+    *val
+}
+
 /// Search Index Settings.
 ///
 /// Contains settings which are applied on the whole
@@ -248,6 +256,12 @@ pub struct IndexSettings {
    /// The `Compressor` used to compress the doc store.
    #[serde(default)]
    pub docstore_compression: Compressor,
+    /// If set to true, docstore compression will happen on a dedicated thread.
+    /// (defaults: true)
+    #[doc(hidden)]
+    #[serde(default = "return_true")]
+    #[serde(skip_serializing_if = "is_true")]
+    pub docstore_compress_dedicated_thread: bool,
    #[serde(default = "default_docstore_blocksize")]
    /// The size of each block that will be compressed and written to disk
    pub docstore_blocksize: usize,
@@ -264,6 +278,7 @@ impl Default for IndexSettings {
            sort_by_field: None,
            docstore_compression: Compressor::default(),
            docstore_blocksize: default_docstore_blocksize(),
+            docstore_compress_dedicated_thread: true,
        }
    }
 }
@@ -311,13 +326,13 @@ pub struct IndexMeta {
    /// `IndexSettings` to configure index options.
    #[serde(default)]
    pub index_settings: IndexSettings,
-    /// List of `SegmentMeta` informations associated to each finalized segment of the index.
+    /// List of `SegmentMeta` information associated with each finalized segment of the index.
    pub segments: Vec<SegmentMeta>,
    /// Index `Schema`
    pub schema: Schema,
-    /// Opstamp associated to the last `commit` operation.
+    /// Opstamp associated with the last `commit` operation.
    pub opstamp: Opstamp,
-    /// Payload associated to the last commit.
+    /// Payload associated with the last commit.
    ///
    /// Upon commit, clients can optionally add a small `String` payload to their commit
    /// to help identify this commit.
@@ -395,7 +410,7 @@ mod tests {
    use super::IndexMeta;
    use crate::core::index_meta::UntrackedIndexMeta;
    use crate::schema::{Schema, TEXT};
-    use crate::store::ZstdCompressor;
+    use crate::store::{Compressor, ZstdCompressor};
    use crate::{IndexSettings, IndexSortByField, Order};

    #[test]
@@ -447,6 +462,7 @@ mod tests {
                    compression_level: Some(4),
                }),
                docstore_blocksize: 1_000_000,
+                docstore_compress_dedicated_thread: true,
            },
            segments: Vec::new(),
            schema,
@@ -485,4 +501,47 @@ mod tests {
            "unknown zstd option \"bla\" at line 1 column 103".to_string()
        );
    }
+
+    #[test]
+    #[cfg(feature = "lz4-compression")]
+    fn test_index_settings_default() {
+        let mut index_settings = IndexSettings::default();
+        assert_eq!(
+            index_settings,
+            IndexSettings {
+                sort_by_field: None,
+                docstore_compression: Compressor::default(),
+                docstore_compress_dedicated_thread: true,
+                docstore_blocksize: 16_384
+            }
+        );
+        {
+            let index_settings_json = serde_json::to_value(&index_settings).unwrap();
+            assert_eq!(
+                index_settings_json,
+                serde_json::json!({
+                    "docstore_compression": "lz4",
+                    "docstore_blocksize": 16384
+                })
+            );
+            let index_settings_deser: IndexSettings =
+                serde_json::from_value(index_settings_json).unwrap();
+            assert_eq!(index_settings_deser, index_settings);
+        }
+        {
+            index_settings.docstore_compress_dedicated_thread = false;
+            let index_settings_json = serde_json::to_value(&index_settings).unwrap();
+            assert_eq!(
+                index_settings_json,
+                serde_json::json!({
+                    "docstore_compression": "lz4",
+                    "docstore_blocksize": 16384,
+                    "docstore_compress_dedicated_thread": false,
+                })
+            );
+            let index_settings_deser: IndexSettings =
+                serde_json::from_value(index_settings_json).unwrap();
+            assert_eq!(index_settings_deser, index_settings);
+        }
+    }
 }
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -9,18 +9,17 @@ use crate::schema::{IndexRecordOption, Term};
 use crate::termdict::TermDictionary;

 /// The inverted index reader is in charge of accessing
-/// the inverted index associated to a specific field.
+/// the inverted index associated with a specific field.
 ///
 /// # Note
 ///
-/// It is safe to delete the segment associated to
+/// It is safe to delete the segment associated with
 /// an `InvertedIndexReader`. As long as it is open,
-/// the `FileSlice` it is relying on should
+/// the [`FileSlice`] it is relying on should
 /// stay available.
 ///
-///
 /// `InvertedIndexReader` are created by calling
-/// the `SegmentReader`'s [`.inverted_index(...)`] method
+/// [`SegmentReader::inverted_index()`](crate::SegmentReader::inverted_index).
 pub struct InvertedIndexReader {
    termdict: TermDictionary,
    postings_file_slice: FileSlice,
@@ -30,7 +29,7 @@ pub struct InvertedIndexReader {
 }

 impl InvertedIndexReader {
-    #[cfg_attr(feature = "cargo-clippy", allow(clippy::needless_pass_by_value))] // for symmetry
+    #[allow(clippy::needless_pass_by_value)] // for symmetry
    pub(crate) fn new(
        termdict: TermDictionary,
        postings_file_slice: FileSlice,
@@ -75,7 +74,7 @@ impl InvertedIndexReader {
    ///
    /// This is useful for enumerating through a list of terms,
    /// and consuming the associated posting lists while avoiding
-    /// reallocating a `BlockSegmentPostings`.
+    /// reallocating a [`BlockSegmentPostings`].
    ///
    /// # Warning
    ///
@@ -96,7 +95,7 @@ impl InvertedIndexReader {
    /// Returns a block postings given a `Term`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_block_postings(
        &self,
        term: &Term,
@@ -110,7 +109,7 @@ impl InvertedIndexReader {
    /// Returns a block postings given a `term_info`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_block_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
@@ -130,7 +129,7 @@ impl InvertedIndexReader {
    /// Returns a posting object given a `term_info`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
+    /// Most users should prefer using [`Self::read_postings()`] instead.
    pub fn read_postings_from_terminfo(
        &self,
        term_info: &TermInfo,
@@ -164,12 +163,12 @@ impl InvertedIndexReader {
    /// or `None` if the term has never been encountered and indexed.
    ///
    /// If the field was not indexed with the indexing options that cover
-    /// the requested options, the returned `SegmentPostings` the method does not fail
+    /// the requested options, the returned [`SegmentPostings`] the method does not fail
    /// and returns a `SegmentPostings` with as much information as possible.
    ///
-    /// For instance, requesting `IndexRecordOption::Freq` for a
-    /// `TextIndexingOptions` that does not index position will return a `SegmentPostings`
-    /// with `DocId`s and frequencies.
+    /// For instance, requesting [`IndexRecordOption::WithFreqs`] for a
+    /// [`TextOptions`](crate::schema::TextOptions) that does not index position
+    /// will return a [`SegmentPostings`] with `DocId`s and frequencies.
    pub fn read_postings(
        &self,
        term: &Term,
@@ -201,23 +200,16 @@ impl InvertedIndexReader {

 #[cfg(feature = "quickwit")]
 impl InvertedIndexReader {
-    pub(crate) async fn get_term_info_async(
-        &self,
-        term: &Term,
-    ) -> crate::AsyncIoResult<Option<TermInfo>> {
+    pub(crate) async fn get_term_info_async(&self, term: &Term) -> io::Result<Option<TermInfo>> {
        self.termdict.get_async(term.value_bytes()).await
    }

    /// Returns a block postings given a `Term`.
    /// This method is for an advanced usage only.
    ///
-    /// Most user should prefer using `read_postings` instead.
-    pub async fn warm_postings(
-        &self,
-        term: &Term,
-        with_positions: bool,
-    ) -> crate::AsyncIoResult<()> {
-        let term_info_opt = self.get_term_info_async(term).await?;
+    /// Most users should prefer using [`Self::read_postings()`] instead.
+    pub async fn warm_postings(&self, term: &Term, with_positions: bool) -> io::Result<()> {
+        let term_info_opt: Option<TermInfo> = self.get_term_info_async(term).await?;
        if let Some(term_info) = term_info_opt {
            self.postings_file_slice
                .read_bytes_slice_async(term_info.postings_range.clone())
@@ -230,4 +222,25 @@ impl InvertedIndexReader {
        }
        Ok(())
    }
+
+    /// Read the block postings for all terms.
+    /// This method is for an advanced usage only.
+    ///
+    /// If you know which terms to pre-load, prefer using [`Self::warm_postings`] instead.
+    pub async fn warm_postings_full(&self, with_positions: bool) -> io::Result<()> {
+        self.postings_file_slice.read_bytes_async().await?;
+        if with_positions {
+            self.positions_file_slice.read_bytes_async().await?;
+        }
+        Ok(())
+    }
+
+    /// Returns the number of documents containing the term asynchronously.
+    pub async fn doc_freq_async(&self, term: &Term) -> io::Result<u32> {
+        Ok(self
+            .get_term_info_async(term)
+            .await?
+            .map(|term_info| term_info.doc_freq)
+            .unwrap_or(0u32))
+    }
 }
--- a/src/core/mod.rs
+++ b/src/core/mod.rs
@@ -7,6 +7,7 @@ mod segment;
 mod segment_component;
 mod segment_id;
 mod segment_reader;
+mod single_segment_index_writer;

 use std::path::Path;

@@ -23,6 +24,7 @@ pub use self::segment::Segment;
 pub use self::segment_component::SegmentComponent;
 pub use self::segment_id::SegmentId;
 pub use self::segment_reader::SegmentReader;
+pub use self::single_segment_index_writer::SingleSegmentIndexWriter;

 /// The meta file contains all the information about the list of segments and the schema
 /// of the index.
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -4,18 +4,18 @@ use std::{fmt, io};

 use crate::collector::Collector;
 use crate::core::{Executor, SegmentReader};
-use crate::query::Query;
+use crate::query::{EnableScoring, Query};
 use crate::schema::{Document, Schema, Term};
 use crate::space_usage::SearcherSpaceUsage;
 use crate::store::{CacheStats, StoreReader};
 use crate::{DocAddress, Index, Opstamp, SegmentId, TrackedObject};

-/// Identifies the searcher generation accessed by a [Searcher].
+/// Identifies the searcher generation accessed by a [`Searcher`].
 ///
-/// While this might seem redundant, a [SearcherGeneration] contains
+/// While this might seem redundant, a [`SearcherGeneration`] contains
 /// both a `generation_id` AND a list of `(SegmentId, DeleteOpstamp)`.
 ///
-/// This is on purpose. This object is used by the `Warmer` API.
+/// This is on purpose. This object is used by the [`Warmer`](crate::reader::Warmer) API.
 /// Having both information makes it possible to identify which
 /// artifact should be refreshed or garbage collected.
 ///
@@ -69,20 +69,20 @@ pub struct Searcher {
 }

 impl Searcher {
-    /// Returns the `Index` associated to the `Searcher`
+    /// Returns the `Index` associated with the `Searcher`
    pub fn index(&self) -> &Index {
        &self.inner.index
    }

-    /// [SearcherGeneration] which identifies the version of the snapshot held by this `Searcher`.
+    /// [`SearcherGeneration`] which identifies the version of the snapshot held by this `Searcher`.
    pub fn generation(&self) -> &SearcherGeneration {
        self.inner.generation.as_ref()
    }

-    /// Fetches a document from tantivy's store given a `DocAddress`.
+    /// Fetches a document from tantivy's store given a [`DocAddress`].
    ///
    /// The searcher uses the segment ordinal to route the
-    /// the request to the right `Segment`.
+    /// request to the right `Segment`.
    pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
        let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize];
        store_reader.get(doc_address.doc_id)
@@ -108,7 +108,7 @@ impl Searcher {
        store_reader.get_async(doc_address.doc_id).await
    }

-    /// Access the schema associated to the index of this searcher.
+    /// Access the schema associated with the index of this searcher.
    pub fn schema(&self) -> &Schema {
        &self.inner.schema
    }
@@ -134,6 +134,19 @@ impl Searcher {
        Ok(total_doc_freq)
    }

+    /// Return the overall number of documents containing
+    /// the given term in an asynchronous manner.
+    #[cfg(feature = "quickwit")]
+    pub async fn doc_freq_async(&self, term: &Term) -> crate::Result<u64> {
+        let mut total_doc_freq = 0;
+        for segment_reader in &self.inner.segment_readers {
+            let inverted_index = segment_reader.inverted_index(term.field())?;
+            let doc_freq = inverted_index.doc_freq_async(term).await?;
+            total_doc_freq += u64::from(doc_freq);
+        }
+        Ok(total_doc_freq)
+    }
+
    /// Return the list of segment readers
    pub fn segment_readers(&self) -> &[SegmentReader] {
        &self.inner.segment_readers
@@ -148,11 +161,11 @@ impl Searcher {
    ///
    /// Search works as follows :
    ///
-    ///  First the weight object associated to the query is created.
+    ///  First the weight object associated with the query is created.
    ///
    ///  Then, the query loops over the segments and for each segment :
    ///  - setup the collector and informs it that the segment being processed has changed.
-    ///  - creates a SegmentCollector for collecting documents associated to the segment
+    ///  - creates a SegmentCollector for collecting documents associated with the segment
    ///  - creates a `Scorer` object associated for this segment
    ///  - iterate through the matched documents and push them to the segment collector.
    ///
@@ -167,7 +180,7 @@ impl Searcher {
        self.search_with_executor(query, collector, executor)
    }

-    /// Same as [`search(...)`](#method.search) but multithreaded.
+    /// Same as [`search(...)`](Searcher::search) but multithreaded.
    ///
    /// The current implementation is rather naive :
    /// multithreading is by splitting search into as many task
@@ -186,7 +199,12 @@ impl Searcher {
        executor: &Executor,
    ) -> crate::Result<C::Fruit> {
        let scoring_enabled = collector.requires_scoring();
-        let weight = query.weight(self, scoring_enabled)?;
+        let enabled_scoring = if scoring_enabled {
+            EnableScoring::Enabled(self)
+        } else {
+            EnableScoring::Disabled(self.schema())
+        };
+        let weight = query.weight(enabled_scoring)?;
        let segment_readers = self.segment_readers();
        let fruits = executor.map(
            |(segment_ord, segment_reader)| {
@@ -234,6 +252,14 @@ impl SearcherInner {
        generation: TrackedObject<SearcherGeneration>,
        doc_store_cache_size: usize,
    ) -> io::Result<SearcherInner> {
+        assert_eq!(
+            &segment_readers
+                .iter()
+                .map(|reader| (reader.segment_id(), reader.delete_opstamp()))
+                .collect::<BTreeMap<_, _>>(),
+            generation.segments(),
+            "Set of segments referenced by this Searcher and its SearcherGeneration must match"
+        );
        let store_readers: Vec<StoreReader> = segment_readers
            .iter()
            .map(|segment_reader| segment_reader.get_store_reader(doc_store_cache_size))
--- a/src/core/segment.rs
+++ b/src/core/segment.rs
@@ -70,7 +70,7 @@ impl Segment {
    /// Returns the relative path of a component of our segment.
    ///
    /// It just joins the segment id with the extension
-    /// associated to a segment component.
+    /// associated with a segment component.
    pub fn relative_path(&self, component: SegmentComponent) -> PathBuf {
        self.meta.relative_path(component)
    }
--- a/Show More
+++ b/Show More